diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index 2159ad0..4338205 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -2,24 +2,21 @@ from rest_framework import status, viewsets from rest_framework.decorators import action from rest_framework.response import Response from django.core.paginator import Paginator -from django.http import JsonResponse +from django.http import JsonResponse, FileResponse, HttpResponse from django.utils import timezone from django.db.models import Q import uuid - +import os +from fwd import settings from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes # from drf_spectacular.types import OpenApiString import json -from ..exception.exceptions import InvalidException, RequiredFieldException +from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..models import SubscriptionRequest, Report, ReportFile -from ..utils.accuracy import shadow_report, MonthReportAccumulate -from ..utils.file import validate_report_list +from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg +from ..utils.file import download_from_S3 from ..utils.process import string_to_boolean - -def first_of_list(the_list): - if not the_list: - return None - return the_list[0] +from ..celery_worker.client_connector import c_connector class AccuracyViewSet(viewsets.ViewSet): lookup_field = "username" @@ -239,10 +236,10 @@ class AccuracyViewSet(viewsets.ViewSet): end_date_str = request.GET.get('end_date') request_id = request.GET.get('request_id', None) redemption_id = request.GET.get('redemption_id', None) - is_reviewed = string_to_boolean(request.data.get('is_reviewed', "false")) - include_test = string_to_boolean(request.data.get('include_test', "false")) + is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false")) + include_test = string_to_boolean(request.GET.get('include_test', "false")) subsidiary = request.GET.get("subsidiary", "all") - is_daily_report = string_to_boolean(request.data.get('is_daily_report', "false")) + is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false")) try: start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') @@ -269,7 +266,10 @@ class AccuracyViewSet(viewsets.ViewSet): include_reviewed=is_reviewed, start_at=start_date, end_at=end_date, + status="Processing", ) + if is_daily_report: + new_report.created_at = end_date new_report.save() # Background job to calculate accuracy shadow_report(report_id, query_set) @@ -314,33 +314,13 @@ class AccuracyViewSet(viewsets.ViewSet): paginator = Paginator(report_files, page_size) page = paginator.get_page(page_number) - data = [] - for report_file in page: - data.append({ - "Request ID": report_file.correspond_request_id, - "Redemption Number": report_file.correspond_redemption_id, - "Image type": report_file.doc_type, - "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), - "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), - "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), - "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None), - "Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []), - "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), - "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), - "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), - "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), - "OCR Image Accuracy": report_file.acc, - "OCR Image Speed (seconds)": report_file.time_cost, - "Reviewed?": "No", - "Bad Image Reasons": report_file.bad_image_reason, - "Countermeasures": report_file.counter_measures, - "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), - "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), - "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), - }) - + data = extract_report_detail_list(page, in_percent=False) + response = { 'report_detail': data, + 'metadata': {"subsidiary": report.subsidiary, + "start_at": report.start_at, + "end_at": report.end_at}, 'page': { 'number': page.number, 'total_pages': page.paginator.num_pages, @@ -428,7 +408,7 @@ class AccuracyViewSet(viewsets.ViewSet): "IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None, "Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None, "Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0, - "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_client_time else 0, + "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0, "report_id": report.report_id, }) @@ -491,21 +471,21 @@ class AccuracyViewSet(viewsets.ViewSet): end_date_str = request.GET.get('end_date', "") page_number = int(request.GET.get('page', 1)) page_size = int(request.GET.get('page_size', 10)) - - if not start_date_str or not end_date_str: - reports = Report.objects.all() - else: + base_query = Q() + + if start_date_str and end_date_str: try: start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') except ValueError: raise InvalidException(excArgs="Date format") - base_query = Q(created_at__range=(start_date, end_date)) - if subsidiary: - base_query &= Q(subsidiary=subsidiary) - base_query &= Q(is_daily_report=True) - reports = Report.objects.filter(base_query).order_by('created_at') + base_query &= Q(created_at__range=(start_date, end_date)) + + if subsidiary: + base_query &= Q(subsidiary=subsidiary) + base_query &= Q(is_daily_report=True) + reports = Report.objects.filter(base_query).order_by('created_at') paginator = Paginator(reports, page_size) page = paginator.get_page(page_number) @@ -525,8 +505,15 @@ class AccuracyViewSet(viewsets.ViewSet): _, _data, total = this_month_report() data += [total] data += _data + # Generate xlsx file + # workbook = dict2xlsx(data, _type="report") + # tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx" + # os.makedirs(os.path.dirname(tmp_file), exist_ok=True) + # workbook.save(tmp_file) + # c_connector.remove_local_file((tmp_file, "fake_request_id")) response = { + # 'file': load_xlsx_file(), 'overview_data': data, 'page': { 'number': page.number, @@ -538,28 +525,59 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse({'error': 'Invalid request method.'}, status=405) + @extend_schema( + parameters=[], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path=r"get_report_file/(?P[\w\-]+)", methods=["GET"]) + def get_report_file(self, request, report_id): + if request.method == 'GET': + # report_id = request.GET.get('report_id', None) + if not report_id: + raise RequiredFieldException(excArgs="report_id1") + report_num = Report.objects.filter(report_id=report_id).count() + if report_num == 0: + raise NotFoundException(excArgs=f"report: {report_id}") + report = Report.objects.filter(report_id=report_id).first() + # download from s3 to local + tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx" + os.makedirs("/tmp", exist_ok=True) + if not report.S3_file_name: + raise NotFoundException(excArgs="S3 file name") + download_from_S3(report.S3_file_name, tmp_file) + file = open(tmp_file, 'rb') + response = FileResponse(file, status=200) + + # Set the content type and content disposition headers + response['Content-Type'] = 'application/octet-stream' + response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file)) + return response + + return JsonResponse({'error': 'Invalid request method.'}, status=405) class RequestViewSet(viewsets.ViewSet): lookup_field = "username" - @extend_schema(request = { + @extend_schema( + request={ 'multipart/form-data': { 'type': 'object', 'properties': { 'reviewed_result': { 'type': 'string', + 'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party"}''', }, - } + }, }, - }, responses=None, tags=['Request'] + }, + responses=None, + tags=['Request'] ) @action(detail=False, url_path=r"request/(?P[\w\-]+)", methods=["GET", "POST"]) def get_subscription_request(self, request, request_id=None): if request.method == 'GET': base_query = Q(request_id=request_id) - subscription_request = SubscriptionRequest.objects.filter(base_query).first() - data = [] imeis = [] @@ -611,7 +629,7 @@ class RequestViewSet(viewsets.ViewSet): subscription_request = SubscriptionRequest.objects.filter(base_query).first() - reviewed_result = json.loads(data["reviewed_result"][1:-1]) + reviewed_result = json.loads(data["reviewed_result"]) for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']: if not field in reviewed_result.keys(): raise RequiredFieldException(excArgs=f'reviewed_result.{field}') diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index 5e0d59c..c10cbdd 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -32,6 +32,7 @@ class CeleryConnector: 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, + 'upload_report_to_s3': {'queue': "upload_report_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, 'make_a_report': {'queue': "report"}, @@ -50,8 +51,10 @@ class CeleryConnector: return self.send_task('do_pdf', args) def upload_file_to_s3(self, args): return self.send_task('upload_file_to_s3', args) - def upload_feedback_to_s3(self, args): - return self.send_task('upload_feedback_to_s3', args) + def upload_file_to_s3(self, args): + return self.send_task('upload_file_to_s3', args) + def upload_report_to_s3(self, args): + return self.send_task('upload_report_to_s3', args) def upload_obj_to_s3(self, args): return self.send_task('upload_obj_to_s3', args) def remove_local_file(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 4443ad6..bf12b3f 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -9,7 +9,7 @@ from fwd_api.models import SubscriptionRequest, UserProfile from fwd_api.celery_worker.worker import app from ..constant.common import FolderFileType, image_extensions from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile, FeedbackRequest +from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util @@ -211,6 +211,22 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id): else: logger.info(f"S3 is not available, skipping,...") +@app.task(name='upload_report_to_s3') +def upload_report_to_s3(local_file_path, s3_key, report_id): + if s3_client.s3_client is not None: + try: + s3_client.upload_file(local_file_path, s3_key) + report = Report.objects.filter(report_id=report_id)[0] + report.S3_uploaded = True + report.S3_file_name = s3_key + report.save() + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + return + else: + logger.info(f"S3 is not available, skipping,...") + @app.task(name='remove_local_file') def remove_local_file(local_file_path, request_id): print(f"[INFO] Removing local file: {local_file_path}, ...") diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index c5b2a86..9dd6915 100644 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -1,24 +1,12 @@ -import time -import uuid -import os -import base64 import traceback -from multiprocessing.pool import ThreadPool -from fwd_api.models import SubscriptionRequest, UserProfile +from fwd_api.models import SubscriptionRequest, Report, ReportFile from fwd_api.celery_worker.worker import app -from ..constant.common import FolderFileType, image_extensions -from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report -from ..utils import file as FileUtils -from ..utils import process as ProcessUtil from ..utils import s3 as S3Util -from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file -from fwd_api.constant.common import ProcessType +from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list +from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from django.utils import timezone from django.db.models import Q -import csv -import json from celery.utils.log import get_task_logger from fwd import settings @@ -117,28 +105,45 @@ def make_a_report(report_id, query_set): errors += request_att["err"] num_request += 1 + transaction_att = count_transactions(start_date, end_date) # Do saving process report.number_request = num_request report.number_images = number_images report.number_imei = time_cost["imei"].count report.number_invoice = time_cost["invoice"].count report.number_bad_images = number_bad_images + # FIXME: refactor this data stream for endurability report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} + report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) + + report.number_imei_transaction = transaction_att.get("imei", 0) + report.number_invoice_transaction = transaction_att.get("invoice", 0) + acumulated_acc = {"feedback": {}, "reviewed": {}} - for acc_type in ["feedback", "reviewed"]: + avg_acc = IterAvg() for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: acumulated_acc[acc_type][key] = accuracy[acc_type][key]() acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count + avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) + acumulated_acc[acc_type]["avg"] = avg_acc() report.feedback_accuracy = acumulated_acc["feedback"] report.reviewed_accuracy = acumulated_acc["reviewed"] report.errors = "|".join(errors) + report.status = "Ready" report.save() + # Saving a xlsx file + report_files = ReportFile.objects.filter(report=report) + data = extract_report_detail_list(report_files, lower=True) + data_workbook = dict2xlsx(data, _type='report_detail') + local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook) + s3_key=save_report_to_S3(report.report_id, local_workbook) + except IndexError as e: print(e) traceback.print_exc() diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index 31a3262..5bb6963 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -38,6 +38,7 @@ app.conf.update({ Queue('upload_file_to_s3'), Queue('upload_feedback_to_s3'), Queue('upload_obj_to_s3'), + Queue('upload_report_to_s3'), Queue('remove_local_file'), Queue('csv_feedback'), Queue('report'), @@ -56,6 +57,7 @@ app.conf.update({ 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, + 'upload_report_to_s3': {'queue': "upload_report_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, 'make_a_report': {'queue': "report"}, diff --git a/cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py b/cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py new file mode 100644 index 0000000..aed983a --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2024-01-31 09:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0175_rename_number_ivoice_transaction_report_number_imei_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='report', + name='S3_file_name', + field=models.TextField(default=None, null=True), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py b/cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py new file mode 100644 index 0000000..418712b --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2024-02-01 03:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0176_report_s3_file_name'), + ] + + operations = [ + migrations.AlterField( + model_name='report', + name='subsidiary', + field=models.CharField(default='', max_length=200, null=True), + ), + ] diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py index 340b305..92a2755 100644 --- a/cope2n-api/fwd_api/models/Report.py +++ b/cope2n-api/fwd_api/models/Report.py @@ -16,12 +16,13 @@ class Report(models.Model): status = models.CharField(max_length=100) is_daily_report = models.BooleanField(default=False) errors = models.TextField(default="", null=True) - subsidiary = models.TextField(default="", null=True) - include_reviewed = models.TextField(default="", null=True) + subsidiary = models.CharField(default="", null=True, max_length=200) + include_reviewed = models.TextField(default="", null=True, ) include_test = models.CharField(max_length=200, default="") # Data S3_uploaded = models.BooleanField(default=False) + S3_file_name = models.TextField(default=None, null=True) number_request = models.IntegerField(default=0) number_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0) diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 56152f6..06b9187 100644 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -2,10 +2,12 @@ import re from datetime import datetime import copy +from typing import Any from .ocr_utils.ocr_metrics import eval_ocr_metric from .ocr_utils.sbt_report import post_processing_str from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile from ..celery_worker.client_connector import c_connector +from django.db.models import Q BAD_THRESHOLD = 0.75 @@ -40,6 +42,8 @@ class MonthReportAccumulate: } self.data = [] self.data_format = { + 'subs': "", + 'extraction_date': "", 'num_imei': 0, 'num_invoice': 0, 'total_images': 0, @@ -69,6 +73,7 @@ class MonthReportAccumulate: self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images self.total["images_quality"]["bad"] += report.number_bad_images + if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 : self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0)) self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0)) @@ -78,8 +83,8 @@ class MonthReportAccumulate: self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0)) self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0)) - self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) - self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) + self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0 + self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0 self.total["usage"]["imei"] += report.number_imei_transaction self.total["usage"]["invoice"] += report.number_invoice_transaction @@ -89,6 +94,7 @@ class MonthReportAccumulate: if self.month is None: self.month = report_month self.total["extraction_date"] = f"Subtotal ({self.month})" + elif self.month != report_month: self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] @@ -96,11 +102,15 @@ class MonthReportAccumulate: # accumulate fields new_data = copy.deepcopy(self.data_format)[0] new_data["num_imei"] = report.number_imei + new_data["subs"] = report.subsidiary + new_data["extraction_date"] = report.created_at new_data["num_invoice"] = report.number_invoice new_data["total_images"] = report.number_images new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images new_data["images_quality"]["bad"] = report.number_bad_images - + + report.reviewed_accuracy = {} if report.reviewed_accuracy is None else report.reviewed_accuracy + report.feedback_accuracy = {} if report.feedback_accuracy is None else report.feedback_accuracy if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]): new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None) @@ -109,20 +119,20 @@ class MonthReportAccumulate: new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None) new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None) - new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) - new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) + new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0 + new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0 new_data["usage"]["imei"] = report.number_imei_transaction new_data["usage"]["invoice"] = report.number_invoice_transaction - new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] - new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] + new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] if new_data["total_images"] else 0 + new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] if new_data["total_images"] else 0 self.data.append(new_data) self.accumulate(report) return True def __call__(self): - self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] - self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] + self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0 + self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0 total = copy.deepcopy(self.total) total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]() total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]() @@ -157,6 +167,67 @@ class IterAvg: def __call__(self): return self.avg +def first_of_list(the_list): + if not the_list: + return None + return the_list[0] + +def extract_report_detail_list(report_detail_list, lower=False, in_percent=True): + data = [] + for report_file in report_detail_list: + data.append({ + "Request ID": report_file.correspond_request_id, + "Redemption Number": report_file.correspond_redemption_id, + "Image type": report_file.doc_type, + "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), + "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), + "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), + "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None), + "Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []), + "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), + "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), + "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), + "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), + "OCR Image Accuracy": report_file.acc, + "OCR Image Speed (seconds)": report_file.time_cost, + "Reviewed?": "No", + "Bad Image Reasons": report_file.bad_image_reason, + "Countermeasures": report_file.counter_measures, + "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), + "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), + "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), + }) + if lower: + for i, dat in enumerate(data): + keys = list(dat.keys()) + for old_key in keys: + data[i][old_key.lower().replace(" ", "_")] = data[i].pop(old_key) + if in_percent: + for i, dat in enumerate(data): + keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] + for key in keys: + if data[i][key]: + data[i][key] = data[i][key]*100 + return data + +def count_transactions(start_date, end_date): + base_query = Q(created_at__range=(start_date, end_date)) + base_query &= Q(is_test_request=False) + transaction_att = {} + + print(f"[DEBUG]: atracting transactions attribute...") + total_transaction_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + for request in total_transaction_requests: + if not request.doc_type: + continue + doc_types = request.doc_type.split(",") + for doc_type in doc_types: + if transaction_att.get(doc_type, None) == None: + transaction_att[doc_type] = 1 + else: + transaction_att[doc_type] += 1 + return transaction_att + def convert_datetime_format(date_string: str, is_gt=False) -> str: # pattern_date_string = "2023-02-28" input_format = "%Y-%m-%d" diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index a4d364c..d79ed96 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -9,10 +9,11 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from django.utils import timezone from fwd import settings +from ..utils import s3 as S3Util from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException -from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile +from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile, Report, ReportFile from fwd_api.utils import process as ProcessUtil from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.image import resize @@ -22,6 +23,13 @@ import csv from openpyxl import load_workbook from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle +s3_client = S3Util.MinioS3Client( + endpoint=settings.S3_ENDPOINT, + access_key=settings.S3_ACCESS_KEY, + secret_key=settings.S3_SECRET_KEY, + bucket_name=settings.S3_BUCKET_NAME +) + def validate_report_list(request): start_date_str = request.GET.get('start_date') end_date_str = request.GET.get('end_date') @@ -182,6 +190,16 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict) csvfile.write(file_contents) return file_path +def save_workbook_file(file_name: str, rp: Report, workbook): + report_id = str(rp.report_id) + + folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id) + os.makedirs(folder_path, exist_ok = True) + + file_path = os.path.join(folder_path, file_name) + workbook.save(file_path) + return file_path + def delete_file_with_path(file_path: str) -> bool: try: os.remove(file_path) @@ -266,6 +284,19 @@ def save_feedback_to_S3(file_name, id, local_file_path): print(f"[ERROR]: {e}") raise ServiceUnavailableException() +def save_report_to_S3(id, local_file_path): + try: + s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1]) + c_connector.upload_report_to_s3((local_file_path, s3_key, id)) + c_connector.remove_local_file((local_file_path, id)) + return s3_key + except Exception as e: + print(f"[ERROR]: {e}") + raise ServiceUnavailableException() + +def download_from_S3(s3_key, local_file_path): + s3_client.download_file(s3_key, local_file_path) + def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): try: file_path = os.path.join(folder_path, file_name) @@ -363,10 +394,11 @@ def get_value(_dict, keys): else: value = value.get(key, {}) - if value != 0: - return value - else: + if not value: return "-" + elif isinstance(value, list): + value = str(value) + return value def dict2xlsx(input: json, _type='report'): diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index e9bf8c7..44f8c94 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -67,6 +67,7 @@ services: - ADMIN_PASSWORD=${ADMIN_PASSWORD} - STANDARD_USER_NAME=${STANDARD_USER_NAME} - STANDARD_PASSWORD=${STANDARD_PASSWORD} + - S3_ENDPOINT=${S3_ENDPOINT} - S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY} - S3_BUCKET_NAME=${S3_BUCKET_NAME}