(backend) improve validation on conversion uploaded file

We now check the size and the extension of the uploaded file for
conversion.
This commit is contained in:
Manuel Raynaud
2026-01-20 10:49:19 +01:00
committed by Anthony LC
parent 9345d8deab
commit dd5b6bd023
4 changed files with 103 additions and 2 deletions

View File

@@ -32,6 +32,8 @@ These are the environment variables you can set for the `impress-backend` contai
| CONVERSION_API_ENDPOINT | Conversion API endpoint | convert |
| CONVERSION_API_SECURE | Require secure conversion api | false |
| CONVERSION_API_TIMEOUT | Conversion api timeout | 30 |
| CONVERSION_FILE_MAX_SIZE | The file max size allowed when uploaded to convert it | 20971520 (20MB) |
| CONVERSION_FILE_EXTENSIONS_ALLOWED | Extension list managed by the conversion service | [".docx", ".md"]
| CRISP_WEBSITE_ID | Crisp website id for support | |
| DB_ENGINE | Engine to use for database connections | django.db.backends.postgresql_psycopg2 |
| DB_HOST | Host of the database | localhost |

View File

@@ -4,6 +4,7 @@
import binascii
import mimetypes
from base64 import b64decode
from os.path import splitext
from django.conf import settings
from django.db.models import Q
@@ -165,7 +166,9 @@ class DocumentSerializer(ListDocumentSerializer):
content = serializers.CharField(required=False)
websocket = serializers.BooleanField(required=False, write_only=True)
file = serializers.FileField(required=False, write_only=True, allow_null=True)
file = serializers.FileField(
required=False, write_only=True, allow_null=True, max_length=255
)
class Meta:
model = models.Document
@@ -252,6 +255,30 @@ class DocumentSerializer(ListDocumentSerializer):
return value
def validate_file(self, file):
"""Add file size and type constraints as defined in settings."""
if not file:
return None
# Validate file size
if file.size > settings.CONVERSION_FILE_MAX_SIZE:
max_size = settings.CONVERSION_FILE_MAX_SIZE // (1024 * 1024)
raise serializers.ValidationError(
f"File size exceeds the maximum limit of {max_size:d} MB."
)
_name, extension = splitext(file.name)
if extension.lower() not in settings.CONVERSION_FILE_EXTENSIONS_ALLOWED:
raise serializers.ValidationError(
(
f"File extension {extension} is not allowed. Allowed extensions"
f" are: {settings.CONVERSION_FILE_EXTENSIONS_ALLOWED}."
)
)
return file
def save(self, **kwargs):
"""
Process the content field to extract attachment keys and update the document's

View File

@@ -356,3 +356,58 @@ def test_api_documents_create_with_file_unicode_filename(mock_convert):
assert response.status_code == 201
document = Document.objects.get()
assert document.title == "文档-télécharger-документ.docx"
def test_api_documents_create_with_file_max_size_exceeded(settings):
"""
The uploaded file should not exceed the maximum size in settings.
"""
settings.CONVERSION_FILE_MAX_SIZE = 1 # 1 byte for test
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
file = BytesIO(b"a" * (10))
file.name = "test.docx"
response = client.post(
"/api/v1.0/documents/",
{
"file": file,
},
format="multipart",
)
assert response.status_code == 400
assert response.json() == {"file": ["File size exceeds the maximum limit of 0 MB."]}
def test_api_documents_create_with_file_extension_not_allowed(settings):
"""
The uploaded file should not have an allowed extension.
"""
settings.CONVERSION_FILE_EXTENSIONS_ALLOWED = [".docx"]
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
file = BytesIO(b"fake docx content")
file.name = "test.md"
response = client.post(
"/api/v1.0/documents/",
{
"file": file,
},
format="multipart",
)
assert response.status_code == 400
assert response.json() == {
"file": [
"File extension .md is not allowed. Allowed extensions are: ['.docx']."
]
}

View File

@@ -29,6 +29,10 @@ from sentry_sdk.integrations.logging import ignore_logger
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.getenv("DATA_DIR", os.path.join("/", "data"))
KB = 1024
MB = KB * KB
GB = MB * KB
def get_release():
"""
@@ -168,7 +172,7 @@ class Base(Configuration):
# Document images
DOCUMENT_IMAGE_MAX_SIZE = values.IntegerValue(
10 * (2**20), # 10MB
10 * MB, # 10MB
environ_name="DOCUMENT_IMAGE_MAX_SIZE",
environ_prefix=None,
)
@@ -712,6 +716,19 @@ class Base(Configuration):
# DocSpec API microservice
DOCSPEC_API_URL = values.Value(environ_name="DOCSPEC_API_URL", environ_prefix=None)
# Imported file settings
CONVERSION_FILE_MAX_SIZE = values.IntegerValue(
20 * MB, # 10MB
environ_name="CONVERSION_FILE_MAX_SIZE",
environ_prefix=None,
)
CONVERSION_FILE_EXTENSIONS_ALLOWED = values.ListValue(
default=[".docx", ".md"],
environ_name="CONVERSION_FILE_EXTENSIONS_ALLOWED",
environ_prefix=None,
)
# Conversion endpoint
CONVERSION_API_ENDPOINT = values.Value(
default="convert",