From dd5b6bd023e8dda69efcf990c45ff982e86e3671 Mon Sep 17 00:00:00 2001 From: Manuel Raynaud Date: Tue, 20 Jan 2026 10:49:19 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=85(backend)=20improve=20validation=20on?= =?UTF-8?q?=20conversion=20uploaded=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now check the size and the extension of the uploaded file for conversion. --- docs/env.md | 2 + src/backend/core/api/serializers.py | 29 +++++++++- .../test_api_documents_create_with_file.py | 55 +++++++++++++++++++ src/backend/impress/settings.py | 19 ++++++- 4 files changed, 103 insertions(+), 2 deletions(-) diff --git a/docs/env.md b/docs/env.md index 43a4cec3..e4ed43d5 100644 --- a/docs/env.md +++ b/docs/env.md @@ -32,6 +32,8 @@ These are the environment variables you can set for the `impress-backend` contai | CONVERSION_API_ENDPOINT | Conversion API endpoint | convert | | CONVERSION_API_SECURE | Require secure conversion api | false | | CONVERSION_API_TIMEOUT | Conversion api timeout | 30 | +| CONVERSION_FILE_MAX_SIZE | The file max size allowed when uploaded to convert it | 20971520 (20MB) | +| CONVERSION_FILE_EXTENSIONS_ALLOWED | Extension list managed by the conversion service | [".docx", ".md"] | CRISP_WEBSITE_ID | Crisp website id for support | | | DB_ENGINE | Engine to use for database connections | django.db.backends.postgresql_psycopg2 | | DB_HOST | Host of the database | localhost | diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 3c13cf4d..349e0191 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -4,6 +4,7 @@ import binascii import mimetypes from base64 import b64decode +from os.path import splitext from django.conf import settings from django.db.models import Q @@ -165,7 +166,9 @@ class DocumentSerializer(ListDocumentSerializer): content = serializers.CharField(required=False) websocket = serializers.BooleanField(required=False, write_only=True) - file = serializers.FileField(required=False, write_only=True, allow_null=True) + file = serializers.FileField( + required=False, write_only=True, allow_null=True, max_length=255 + ) class Meta: model = models.Document @@ -252,6 +255,30 @@ class DocumentSerializer(ListDocumentSerializer): return value + def validate_file(self, file): + """Add file size and type constraints as defined in settings.""" + if not file: + return None + + # Validate file size + if file.size > settings.CONVERSION_FILE_MAX_SIZE: + max_size = settings.CONVERSION_FILE_MAX_SIZE // (1024 * 1024) + raise serializers.ValidationError( + f"File size exceeds the maximum limit of {max_size:d} MB." + ) + + _name, extension = splitext(file.name) + + if extension.lower() not in settings.CONVERSION_FILE_EXTENSIONS_ALLOWED: + raise serializers.ValidationError( + ( + f"File extension {extension} is not allowed. Allowed extensions" + f" are: {settings.CONVERSION_FILE_EXTENSIONS_ALLOWED}." + ) + ) + + return file + def save(self, **kwargs): """ Process the content field to extract attachment keys and update the document's diff --git a/src/backend/core/tests/documents/test_api_documents_create_with_file.py b/src/backend/core/tests/documents/test_api_documents_create_with_file.py index 9389a816..3cd6dda2 100644 --- a/src/backend/core/tests/documents/test_api_documents_create_with_file.py +++ b/src/backend/core/tests/documents/test_api_documents_create_with_file.py @@ -356,3 +356,58 @@ def test_api_documents_create_with_file_unicode_filename(mock_convert): assert response.status_code == 201 document = Document.objects.get() assert document.title == "文档-télécharger-документ.docx" + + +def test_api_documents_create_with_file_max_size_exceeded(settings): + """ + The uploaded file should not exceed the maximum size in settings. + """ + settings.CONVERSION_FILE_MAX_SIZE = 1 # 1 byte for test + + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + file = BytesIO(b"a" * (10)) + file.name = "test.docx" + + response = client.post( + "/api/v1.0/documents/", + { + "file": file, + }, + format="multipart", + ) + + assert response.status_code == 400 + + assert response.json() == {"file": ["File size exceeds the maximum limit of 0 MB."]} + + +def test_api_documents_create_with_file_extension_not_allowed(settings): + """ + The uploaded file should not have an allowed extension. + """ + settings.CONVERSION_FILE_EXTENSIONS_ALLOWED = [".docx"] + + user = factories.UserFactory() + client = APIClient() + client.force_login(user) + + file = BytesIO(b"fake docx content") + file.name = "test.md" + + response = client.post( + "/api/v1.0/documents/", + { + "file": file, + }, + format="multipart", + ) + + assert response.status_code == 400 + assert response.json() == { + "file": [ + "File extension .md is not allowed. Allowed extensions are: ['.docx']." + ] + } diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index 64ffed97..60bf31fe 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -29,6 +29,10 @@ from sentry_sdk.integrations.logging import ignore_logger BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATA_DIR = os.getenv("DATA_DIR", os.path.join("/", "data")) +KB = 1024 +MB = KB * KB +GB = MB * KB + def get_release(): """ @@ -168,7 +172,7 @@ class Base(Configuration): # Document images DOCUMENT_IMAGE_MAX_SIZE = values.IntegerValue( - 10 * (2**20), # 10MB + 10 * MB, # 10MB environ_name="DOCUMENT_IMAGE_MAX_SIZE", environ_prefix=None, ) @@ -712,6 +716,19 @@ class Base(Configuration): # DocSpec API microservice DOCSPEC_API_URL = values.Value(environ_name="DOCSPEC_API_URL", environ_prefix=None) + # Imported file settings + CONVERSION_FILE_MAX_SIZE = values.IntegerValue( + 20 * MB, # 10MB + environ_name="CONVERSION_FILE_MAX_SIZE", + environ_prefix=None, + ) + + CONVERSION_FILE_EXTENSIONS_ALLOWED = values.ListValue( + default=[".docx", ".md"], + environ_name="CONVERSION_FILE_EXTENSIONS_ALLOWED", + environ_prefix=None, + ) + # Conversion endpoint CONVERSION_API_ENDPOINT = values.Value( default="convert",