From 2c50f86ed28231c70db79ca2dd81e168664c613f Mon Sep 17 00:00:00 2001 From: dx-tan Date: Mon, 11 Dec 2023 13:15:48 +0000 Subject: [PATCH] Add: Limit image decompressed size --- cope2n-ai-fi/.gitignore | 6 +++--- cope2n-api/fwd/settings.py | 1 + cope2n-api/fwd_api/exception/exceptions.py | 6 ++++++ cope2n-api/fwd_api/utils/FileUtils.py | 13 ++++++++++++- cope2n-api/fwd_api/utils/ProcessUtil.py | 6 ++++-- cope2n-api/requirements.txt | 3 ++- 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/cope2n-ai-fi/.gitignore b/cope2n-ai-fi/.gitignore index f2c8e28..9df20fd 100755 --- a/cope2n-ai-fi/.gitignore +++ b/cope2n-ai-fi/.gitignore @@ -3,9 +3,9 @@ __pycache__ DataBase/image_temp/ DataBase/json_temp/ DataBase/template.db -sdsvtd/ -sdsvtr/ -sdsvkie/ +# sdsvtd/ +# sdsvtr/ +# sdsvkie/ detectron2/ output/ data/ diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index e3ae101..b3eab12 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -202,6 +202,7 @@ CELERY_TASK_TIME_LIMIT = 30 * 60 MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILES_IN_A_REQUEST = 5 +MAX_PIXEL_IN_A_FILE = 5000 SIZE_TO_COMPRESS = 2 * 1024 * 1024 MAX_NUMBER_OF_TEMPLATE = 3 MAX_PAGES_OF_PDF_FILE = 50 diff --git a/cope2n-api/fwd_api/exception/exceptions.py b/cope2n-api/fwd_api/exception/exceptions.py index 6a71863..5e0d687 100755 --- a/cope2n-api/fwd_api/exception/exceptions.py +++ b/cope2n-api/fwd_api/exception/exceptions.py @@ -108,6 +108,12 @@ class FileContentInvalidException(InvalidException): default_detail = 'Invalid content file' detail_with_arg = 'One of the files is broken, please select other file and try again' +class InvalidDecompressedSizeException(InvalidException): + status_code = status.HTTP_400_BAD_REQUEST + default_code = 4008 + default_detail = 'Invalid decompessed file' + detail_with_arg = '{}x{} is not valid, maximum size for one side is {}' + class TokenExpiredException(GeneralException): status_code = status.HTTP_401_UNAUTHORIZED diff --git a/cope2n-api/fwd_api/utils/FileUtils.py b/cope2n-api/fwd_api/utils/FileUtils.py index 3e2a443..6e3b2ba 100755 --- a/cope2n-api/fwd_api/utils/FileUtils.py +++ b/cope2n-api/fwd_api/utils/FileUtils.py @@ -10,11 +10,12 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from fwd import settings from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ - ServiceUnavailableException, FileFormatInvalidException, LimitReachedException + ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException from fwd_api.models import SubscriptionRequest, OcrTemplate from fwd_api.utils import ProcessUtil from fwd_api.utils.CryptoUtils import image_authenticator from ..celery_worker.client_connector import c_connector +import imagesize def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): total_file_size = 0 @@ -135,6 +136,8 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar # Create a new directory because it does not exist os.makedirs(folder_path) return save_file_with_path(file_name, file, quality, folder_path) + except InvalidDecompressedSizeException as e: + raise e except Exception as e: print(f"[ERROR]: {e}") raise ServiceUnavailableException() @@ -161,6 +164,8 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo save_pdf(file_path, file) else: save_img(file_path, file, quality) + except InvalidDecompressedSizeException as e: + raise e except Exception as e: print(e) raise ServiceUnavailableException() @@ -174,6 +179,12 @@ def save_pdf(file_path: str, file: TemporaryUploadedFile): def save_img(file_path: str, file: TemporaryUploadedFile, quality): + with open(file.temporary_file_path(), "rb") as fs: + input_file = io.BytesIO(fs.read()) + width, height = imagesize.get(input_file) + if width > settings.MAX_PIXEL_IN_A_FILE or height > settings.MAX_PIXEL_IN_A_FILE: + raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE))) + with open(file.temporary_file_path(), "rb") as fs: input_file = io.BytesIO(fs.read()) image = Image.open(input_file) diff --git a/cope2n-api/fwd_api/utils/ProcessUtil.py b/cope2n-api/fwd_api/utils/ProcessUtil.py index dd502a8..4c292e8 100755 --- a/cope2n-api/fwd_api/utils/ProcessUtil.py +++ b/cope2n-api/fwd_api/utils/ProcessUtil.py @@ -16,7 +16,7 @@ from fwd_api.exception.exceptions import NumberOfBoxLimitReachedException, \ from fwd_api.utils import DateUtil, FileUtils from ..constant.common import ProcessType, TEMPLATE_BOX_TYPE, EntityStatus from ..exception.exceptions import InvalidException, NotFoundException, \ - PermissionDeniedException, RequiredFieldException, InvalidException + PermissionDeniedException, RequiredFieldException, InvalidException, InvalidDecompressedSizeException from ..models import UserProfile, OcrTemplate, OcrTemplateBox, \ Subscription, SubscriptionRequestFile, SubscriptionRequest from ..celery_worker.client_connector import c_connector @@ -418,7 +418,7 @@ def process_image_local_file(file_name: str, file_path: str, request: Subscripti }] def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list: - def resize(image, max_w=1920, max_h=1080): + def resize(image, max_w=2048, max_h=2048): logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}") cur_w, cur_h = image.width, image.height image_bytes = image.samples @@ -446,6 +446,8 @@ def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, d pix = page.get_pixmap(dpi=250) # render page to an image # pix = resize(pix) # print(f"[DEBUG]: pix.size: {pix.size}") + if pix.size > 8*3*settings.MAX_PIXEL_IN_A_FILE*settings.MAX_PIXEL_IN_A_FILE: + raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE))) pix.save(saving_path) print(f"Saving {saving_path}") new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path, diff --git a/cope2n-api/requirements.txt b/cope2n-api/requirements.txt index 94a5e2a..97fa17c 100755 --- a/cope2n-api/requirements.txt +++ b/cope2n-api/requirements.txt @@ -48,4 +48,5 @@ PyJWT~=2.6.0 whitenoise==6.4.0 PyMuPDF==1.21.1 djangorestframework-xml==2.0.0 -boto3==1.29.7 \ No newline at end of file +boto3==1.29.7 +imagesize==1.4.1 \ No newline at end of file