(backend) allow uploading more types of attachments

We want to allow users to upload files to a document, not just images.
We try to enforce coherence between the file extension and the real
mime type of its content. If a file is deemed unsafe, it is still accepted
during upload and the information is stored as metadata on the object
for display to readers.
This commit is contained in:
Samuel Paccoud - DINUM
2024-10-07 20:10:42 +02:00
committed by Samuel Paccoud
parent a9f08df566
commit e8d95facdf
7 changed files with 229 additions and 50 deletions

View File

@@ -6,6 +6,7 @@ from django.conf import settings
from django.db.models import Q
from django.utils.translation import gettext_lazy as _
import magic
from rest_framework import exceptions, serializers
from core import models
@@ -219,16 +220,42 @@ class FileUploadSerializer(serializers.Serializer):
f"File size exceeds the maximum limit of {max_size:d} MB."
)
# Validate file type
mime_type, _ = mimetypes.guess_type(file.name)
if mime_type not in settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES:
mime_types = ", ".join(settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES)
raise serializers.ValidationError(
f"File type '{mime_type:s}' is not allowed. Allowed types are: {mime_types:s}"
)
extension = file.name.rpartition(".")[-1] if "." in file.name else None
# Read the first few bytes to determine the MIME type accurately
mime = magic.Magic(mime=True)
magic_mime_type = mime.from_buffer(file.read(1024))
file.seek(0) # Reset file pointer to the beginning after reading
self.context["is_unsafe"] = (
magic_mime_type in settings.DOCUMENT_UNSAFE_MIME_TYPES
)
extension_mime_type, _ = mimetypes.guess_type(file.name)
# Try guessing a coherent extension from the mimetype
if extension_mime_type != magic_mime_type:
self.context["is_unsafe"] = True
guessed_ext = mimetypes.guess_extension(magic_mime_type)
# Missing extensions or extensions longer than 5 characters (it's as long as an extension
# can be) are replaced by the extension we eventually guessed from mimetype.
if (extension is None or len(extension) > 5) and guessed_ext:
extension = guessed_ext[1:]
if extension is None:
raise serializers.ValidationError("Could not determine file extension.")
self.context["expected_extension"] = extension
return file
def validate(self, attrs):
"""Override validate to add the computed extension to validated_data."""
attrs["expected_extension"] = self.context["expected_extension"]
attrs["is_unsafe"] = self.context["is_unsafe"]
return attrs
class TemplateSerializer(BaseResourceSerializer):
"""Serialize templates."""