diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index e450a25..c62559b 100755 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -35,133 +35,6 @@ def mean_list(l): return 0 return sum(l)/len(l) -@app.task(name='make_a_report') -def make_a_report(report_id, query_set): - # TODO: to be deprecated - try: - start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') - end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z') - base_query = Q(created_at__range=(start_date, end_date)) - if query_set["request_id"]: - base_query &= Q(request_id=query_set["request_id"]) - if query_set["redemption_id"]: - base_query &= Q(redemption_id=query_set["redemption_id"]) - base_query &= Q(is_test_request=False) - if isinstance(query_set["include_test"], str): - query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False - if query_set["include_test"]: - # base_query = ~base_query - base_query.children = base_query.children[:-1] - - elif isinstance(query_set["include_test"], bool): - if query_set["include_test"]: - base_query = ~base_query - if isinstance(query_set["subsidiary"], str): - if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all": - base_query &= Q(redemption_id__startswith=query_set["subsidiary"]) - if isinstance(query_set["is_reviewed"], str): - if query_set["is_reviewed"] == "reviewed": - base_query &= Q(is_reviewed=True) - elif query_set["is_reviewed"] == "not reviewed": - base_query &= Q(is_reviewed=False) - # elif query_set["is_reviewed"] == "all": - # pass - - errors = [] - # Create a placeholder to fill - accuracy = {"feedback" :{"imei_number": IterAvg(), - "purchase_date": IterAvg(), - "retailername": IterAvg(), - "sold_to_party": IterAvg(),}, - "reviewed" :{"imei_number": IterAvg(), - "purchase_date": IterAvg(), - "retailername": IterAvg(), - "sold_to_party": IterAvg(),} - } # {"imei": {"acc": 0.1, count: 1}, ...} - time_cost = {"invoice": IterAvg(), - "imei": IterAvg()} - number_images = 0 - number_bad_images = 0 - # TODO: Multithreading - # Calculate accuracy, processing time, ....Then save. - subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') - report: Report = \ - Report.objects.filter(report_id=report_id).first() - # TODO: number of transaction by doc type - num_request = 0 - for request in subscription_requests: - if request.status != 200 or not (request.reviewed_result or request.feedback_result): - # Failed requests or lack of reviewed_result/feedback_result - continue - request_att = calculate_and_save_subcription_file(report, request) - - request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])), - "purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])), - "retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])), - "sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))} - request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])), - "purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])), - "retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])), - "sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))} - request.save() - number_images += request_att["total_images"] - number_bad_images += request_att["bad_images"] - update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) - update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) - - time_cost["imei"].add(request_att["time_cost"].get("imei", [])) - time_cost["invoice"].add(request_att["time_cost"].get("invoice", [])) - - errors += request_att["err"] - num_request += 1 - transaction_att = count_transactions(start_date, end_date, report.subsidiary) - # Do saving process - report.number_request = num_request - report.number_images = number_images - report.number_imei = time_cost["imei"].count - report.number_invoice = time_cost["invoice"].count - report.number_bad_images = number_bad_images - # FIXME: refactor this data stream for endurability - report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), - "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} - - report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) if (report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) > 0 else None - - report.number_imei_transaction = transaction_att.get("imei", 0) - report.number_invoice_transaction = transaction_att.get("invoice", 0) - - acumulated_acc = {"feedback": {}, - "reviewed": {}} - for acc_type in ["feedback", "reviewed"]: - avg_acc = IterAvg() - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: - acumulated_acc[acc_type][key] = accuracy[acc_type][key]() - acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count - avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) - acumulated_acc[acc_type]["avg"] = avg_acc() - - report.feedback_accuracy = acumulated_acc["feedback"] - report.reviewed_accuracy = acumulated_acc["reviewed"] - - report.errors = "|".join(errors) - report.status = "Ready" - report.save() - # Saving a xlsx file - report_files = ReportFile.objects.filter(report=report) - data = extract_report_detail_list(report_files, lower=True) - data_workbook = dict2xlsx(data, _type='report_detail') - local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook) - s3_key=save_report_to_S3(report.report_id, local_workbook) - - except IndexError as e: - print(e) - traceback.print_exc() - print("NotFound request by report id, %d", report_id) - except Exception as e: - print("[ERROR]: an error occured while processing report: ", report_id) - traceback.print_exc() - return 400 - @app.task(name='make_a_report_2') def make_a_report_2(report_id, query_set): report_type = query_set.pop("report_type", "accuracy") @@ -281,7 +154,7 @@ def create_accuracy_report(report_id, **kwargs): "acumulated": {}} for acc_type in ["feedback", "reviewed", "acumulated"]: avg_acc = IterAvg() - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: acumulated_acc[acc_type][key] = accuracy[acc_type][key]() acumulated_acc[acc_type][key + "_count"] = accuracy[acc_type][key].count avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])