(backend) improve validation on conversion uploaded file

We now check the size and the extension of the uploaded file for
conversion.
This commit is contained in:
Manuel Raynaud
2026-01-20 10:49:19 +01:00
committed by Anthony LC
parent 9345d8deab
commit dd5b6bd023
4 changed files with 103 additions and 2 deletions

View File

@@ -32,6 +32,8 @@ These are the environment variables you can set for the `impress-backend` contai
| CONVERSION_API_ENDPOINT | Conversion API endpoint | convert | | CONVERSION_API_ENDPOINT | Conversion API endpoint | convert |
| CONVERSION_API_SECURE | Require secure conversion api | false | | CONVERSION_API_SECURE | Require secure conversion api | false |
| CONVERSION_API_TIMEOUT | Conversion api timeout | 30 | | CONVERSION_API_TIMEOUT | Conversion api timeout | 30 |
| CONVERSION_FILE_MAX_SIZE | The file max size allowed when uploaded to convert it | 20971520 (20MB) |
| CONVERSION_FILE_EXTENSIONS_ALLOWED | Extension list managed by the conversion service | [".docx", ".md"]
| CRISP_WEBSITE_ID | Crisp website id for support | | | CRISP_WEBSITE_ID | Crisp website id for support | |
| DB_ENGINE | Engine to use for database connections | django.db.backends.postgresql_psycopg2 | | DB_ENGINE | Engine to use for database connections | django.db.backends.postgresql_psycopg2 |
| DB_HOST | Host of the database | localhost | | DB_HOST | Host of the database | localhost |

View File

@@ -4,6 +4,7 @@
import binascii import binascii
import mimetypes import mimetypes
from base64 import b64decode from base64 import b64decode
from os.path import splitext
from django.conf import settings from django.conf import settings
from django.db.models import Q from django.db.models import Q
@@ -165,7 +166,9 @@ class DocumentSerializer(ListDocumentSerializer):
content = serializers.CharField(required=False) content = serializers.CharField(required=False)
websocket = serializers.BooleanField(required=False, write_only=True) websocket = serializers.BooleanField(required=False, write_only=True)
file = serializers.FileField(required=False, write_only=True, allow_null=True) file = serializers.FileField(
required=False, write_only=True, allow_null=True, max_length=255
)
class Meta: class Meta:
model = models.Document model = models.Document
@@ -252,6 +255,30 @@ class DocumentSerializer(ListDocumentSerializer):
return value return value
def validate_file(self, file):
"""Add file size and type constraints as defined in settings."""
if not file:
return None
# Validate file size
if file.size > settings.CONVERSION_FILE_MAX_SIZE:
max_size = settings.CONVERSION_FILE_MAX_SIZE // (1024 * 1024)
raise serializers.ValidationError(
f"File size exceeds the maximum limit of {max_size:d} MB."
)
_name, extension = splitext(file.name)
if extension.lower() not in settings.CONVERSION_FILE_EXTENSIONS_ALLOWED:
raise serializers.ValidationError(
(
f"File extension {extension} is not allowed. Allowed extensions"
f" are: {settings.CONVERSION_FILE_EXTENSIONS_ALLOWED}."
)
)
return file
def save(self, **kwargs): def save(self, **kwargs):
""" """
Process the content field to extract attachment keys and update the document's Process the content field to extract attachment keys and update the document's

View File

@@ -356,3 +356,58 @@ def test_api_documents_create_with_file_unicode_filename(mock_convert):
assert response.status_code == 201 assert response.status_code == 201
document = Document.objects.get() document = Document.objects.get()
assert document.title == "文档-télécharger-документ.docx" assert document.title == "文档-télécharger-документ.docx"
def test_api_documents_create_with_file_max_size_exceeded(settings):
"""
The uploaded file should not exceed the maximum size in settings.
"""
settings.CONVERSION_FILE_MAX_SIZE = 1 # 1 byte for test
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
file = BytesIO(b"a" * (10))
file.name = "test.docx"
response = client.post(
"/api/v1.0/documents/",
{
"file": file,
},
format="multipart",
)
assert response.status_code == 400
assert response.json() == {"file": ["File size exceeds the maximum limit of 0 MB."]}
def test_api_documents_create_with_file_extension_not_allowed(settings):
"""
The uploaded file should not have an allowed extension.
"""
settings.CONVERSION_FILE_EXTENSIONS_ALLOWED = [".docx"]
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
file = BytesIO(b"fake docx content")
file.name = "test.md"
response = client.post(
"/api/v1.0/documents/",
{
"file": file,
},
format="multipart",
)
assert response.status_code == 400
assert response.json() == {
"file": [
"File extension .md is not allowed. Allowed extensions are: ['.docx']."
]
}

View File

@@ -29,6 +29,10 @@ from sentry_sdk.integrations.logging import ignore_logger
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.getenv("DATA_DIR", os.path.join("/", "data")) DATA_DIR = os.getenv("DATA_DIR", os.path.join("/", "data"))
KB = 1024
MB = KB * KB
GB = MB * KB
def get_release(): def get_release():
""" """
@@ -168,7 +172,7 @@ class Base(Configuration):
# Document images # Document images
DOCUMENT_IMAGE_MAX_SIZE = values.IntegerValue( DOCUMENT_IMAGE_MAX_SIZE = values.IntegerValue(
10 * (2**20), # 10MB 10 * MB, # 10MB
environ_name="DOCUMENT_IMAGE_MAX_SIZE", environ_name="DOCUMENT_IMAGE_MAX_SIZE",
environ_prefix=None, environ_prefix=None,
) )
@@ -712,6 +716,19 @@ class Base(Configuration):
# DocSpec API microservice # DocSpec API microservice
DOCSPEC_API_URL = values.Value(environ_name="DOCSPEC_API_URL", environ_prefix=None) DOCSPEC_API_URL = values.Value(environ_name="DOCSPEC_API_URL", environ_prefix=None)
# Imported file settings
CONVERSION_FILE_MAX_SIZE = values.IntegerValue(
20 * MB, # 10MB
environ_name="CONVERSION_FILE_MAX_SIZE",
environ_prefix=None,
)
CONVERSION_FILE_EXTENSIONS_ALLOWED = values.ListValue(
default=[".docx", ".md"],
environ_name="CONVERSION_FILE_EXTENSIONS_ALLOWED",
environ_prefix=None,
)
# Conversion endpoint # Conversion endpoint
CONVERSION_API_ENDPOINT = values.Value( CONVERSION_API_ENDPOINT = values.Value(
default="convert", default="convert",