(backend) allow uploading more types of attachments

We want to allow users to upload files to a document, not just images.
We try to enforce coherence between the file extension and the real
mime type of its content. If a file is deemed unsafe, it is still accepted
during upload and the information is stored as metadata on the object
for display to readers.
This commit is contained in:
Samuel Paccoud - DINUM
2024-10-07 20:10:42 +02:00
committed by Samuel Paccoud
parent a9f08df566
commit e8d95facdf
7 changed files with 229 additions and 50 deletions

View File

@@ -6,6 +6,7 @@ from django.conf import settings
from django.db.models import Q
from django.utils.translation import gettext_lazy as _
import magic
from rest_framework import exceptions, serializers
from core import models
@@ -219,16 +220,42 @@ class FileUploadSerializer(serializers.Serializer):
f"File size exceeds the maximum limit of {max_size:d} MB."
)
# Validate file type
mime_type, _ = mimetypes.guess_type(file.name)
if mime_type not in settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES:
mime_types = ", ".join(settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES)
raise serializers.ValidationError(
f"File type '{mime_type:s}' is not allowed. Allowed types are: {mime_types:s}"
)
extension = file.name.rpartition(".")[-1] if "." in file.name else None
# Read the first few bytes to determine the MIME type accurately
mime = magic.Magic(mime=True)
magic_mime_type = mime.from_buffer(file.read(1024))
file.seek(0) # Reset file pointer to the beginning after reading
self.context["is_unsafe"] = (
magic_mime_type in settings.DOCUMENT_UNSAFE_MIME_TYPES
)
extension_mime_type, _ = mimetypes.guess_type(file.name)
# Try guessing a coherent extension from the mimetype
if extension_mime_type != magic_mime_type:
self.context["is_unsafe"] = True
guessed_ext = mimetypes.guess_extension(magic_mime_type)
# Missing extensions or extensions longer than 5 characters (it's as long as an extension
# can be) are replaced by the extension we eventually guessed from mimetype.
if (extension is None or len(extension) > 5) and guessed_ext:
extension = guessed_ext[1:]
if extension is None:
raise serializers.ValidationError("Could not determine file extension.")
self.context["expected_extension"] = extension
return file
def validate(self, attrs):
"""Override validate to add the computed extension to validated_data."""
attrs["expected_extension"] = self.context["expected_extension"]
attrs["is_unsafe"] = self.context["is_unsafe"]
return attrs
class TemplateSerializer(BaseResourceSerializer):
"""Serialize templates."""

View File

@@ -1,6 +1,5 @@
"""API endpoints"""
import os
import re
import uuid
from urllib.parse import urlparse
@@ -476,15 +475,22 @@ class DocumentViewSet(
return drf_response.Response(
serializer.errors, status=status.HTTP_400_BAD_REQUEST
)
# Extract the file extension from the original filename
file = serializer.validated_data["file"]
extension = os.path.splitext(file.name)[1]
# Generate a generic yet unique filename to store the image in object storage
file_id = uuid.uuid4()
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{extension:s}"
extension = serializer.validated_data["expected_extension"]
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}.{extension:s}"
# Prepare metadata for storage
metadata = {"Metadata": {"owner": str(request.user.id)}}
if serializer.validated_data["is_unsafe"]:
metadata["Metadata"]["is_unsafe"] = "true"
file = serializer.validated_data["file"]
default_storage.connection.meta.client.upload_fileobj(
file, default_storage.bucket_name, key, ExtraArgs=metadata
)
default_storage.save(key, file)
return drf_response.Response(
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
)