(backend) allow uploading more types of attachments

We want to allow users to upload files to a document, not just images.
We try to enforce coherence between the file extension and the real
mime type of its content. If a file is deemed unsafe, it is still accepted
during upload and the information is stored as metadata on the object
for display to readers.
This commit is contained in:
Samuel Paccoud - DINUM
2024-10-07 20:10:42 +02:00
committed by Samuel Paccoud
parent a9f08df566
commit e8d95facdf
7 changed files with 229 additions and 50 deletions

View File

@@ -37,6 +37,7 @@ and this project adheres to
## Added ## Added
- ✨(backend) allow uploading more types of attachments #309
- ✨(backend) add name fields to the user synchronized with OIDC #301 - ✨(backend) add name fields to the user synchronized with OIDC #301
- ✨(ci) add security scan #291 - ✨(ci) add security scan #291
- ♻️(frontend) Add versions #277 - ♻️(frontend) Add versions #277

View File

@@ -65,14 +65,15 @@ ENV PYTHONUNBUFFERED=1
# Install required system libs # Install required system libs
RUN apk add \ RUN apk add \
gettext \
cairo \ cairo \
libffi-dev \ file \
gdk-pixbuf \
pango \
pandoc \
font-noto-emoji \
font-noto \ font-noto \
font-noto-emoji \
gettext \
gdk-pixbuf \
libffi-dev \
pandoc \
pango \
shared-mime-info shared-mime-info
# Copy entrypoint # Copy entrypoint

View File

@@ -6,6 +6,7 @@ from django.conf import settings
from django.db.models import Q from django.db.models import Q
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
import magic
from rest_framework import exceptions, serializers from rest_framework import exceptions, serializers
from core import models from core import models
@@ -219,16 +220,42 @@ class FileUploadSerializer(serializers.Serializer):
f"File size exceeds the maximum limit of {max_size:d} MB." f"File size exceeds the maximum limit of {max_size:d} MB."
) )
# Validate file type extension = file.name.rpartition(".")[-1] if "." in file.name else None
mime_type, _ = mimetypes.guess_type(file.name)
if mime_type not in settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES: # Read the first few bytes to determine the MIME type accurately
mime_types = ", ".join(settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES) mime = magic.Magic(mime=True)
raise serializers.ValidationError( magic_mime_type = mime.from_buffer(file.read(1024))
f"File type '{mime_type:s}' is not allowed. Allowed types are: {mime_types:s}" file.seek(0) # Reset file pointer to the beginning after reading
)
self.context["is_unsafe"] = (
magic_mime_type in settings.DOCUMENT_UNSAFE_MIME_TYPES
)
extension_mime_type, _ = mimetypes.guess_type(file.name)
# Try guessing a coherent extension from the mimetype
if extension_mime_type != magic_mime_type:
self.context["is_unsafe"] = True
guessed_ext = mimetypes.guess_extension(magic_mime_type)
# Missing extensions or extensions longer than 5 characters (it's as long as an extension
# can be) are replaced by the extension we eventually guessed from mimetype.
if (extension is None or len(extension) > 5) and guessed_ext:
extension = guessed_ext[1:]
if extension is None:
raise serializers.ValidationError("Could not determine file extension.")
self.context["expected_extension"] = extension
return file return file
def validate(self, attrs):
"""Override validate to add the computed extension to validated_data."""
attrs["expected_extension"] = self.context["expected_extension"]
attrs["is_unsafe"] = self.context["is_unsafe"]
return attrs
class TemplateSerializer(BaseResourceSerializer): class TemplateSerializer(BaseResourceSerializer):
"""Serialize templates.""" """Serialize templates."""

View File

@@ -1,6 +1,5 @@
"""API endpoints""" """API endpoints"""
import os
import re import re
import uuid import uuid
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -476,15 +475,22 @@ class DocumentViewSet(
return drf_response.Response( return drf_response.Response(
serializer.errors, status=status.HTTP_400_BAD_REQUEST serializer.errors, status=status.HTTP_400_BAD_REQUEST
) )
# Extract the file extension from the original filename
file = serializer.validated_data["file"]
extension = os.path.splitext(file.name)[1]
# Generate a generic yet unique filename to store the image in object storage # Generate a generic yet unique filename to store the image in object storage
file_id = uuid.uuid4() file_id = uuid.uuid4()
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{extension:s}" extension = serializer.validated_data["expected_extension"]
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}.{extension:s}"
# Prepare metadata for storage
metadata = {"Metadata": {"owner": str(request.user.id)}}
if serializer.validated_data["is_unsafe"]:
metadata["Metadata"]["is_unsafe"] = "true"
file = serializer.validated_data["file"]
default_storage.connection.meta.client.upload_fileobj(
file, default_storage.bucket_name, key, ExtraArgs=metadata
)
default_storage.save(key, file)
return drf_response.Response( return drf_response.Response(
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED {"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
) )

View File

@@ -5,7 +5,7 @@ Test file uploads API endpoint for users in impress's core app.
import re import re
import uuid import uuid
from django.core.files.base import ContentFile from django.core.files.storage import default_storage
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
import pytest import pytest
@@ -16,6 +16,12 @@ from core.tests.conftest import TEAM, USER, VIA
pytestmark = pytest.mark.django_db pytestmark = pytest.mark.django_db
PIXEL = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00"
b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe"
b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82"
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"reach, role", "reach, role",
@@ -33,7 +39,7 @@ def test_api_documents_attachment_upload_anonymous_forbidden(reach, role):
and role don't allow it. and role don't allow it.
""" """
document = factories.DocumentFactory(link_reach=reach, link_role=role) document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content") file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart") response = APIClient().post(url, {"file": file}, format="multipart")
@@ -50,14 +56,14 @@ def test_api_documents_attachment_upload_anonymous_success():
if the link reach and role permit it. if the link reach and role permit it.
""" """
document = factories.DocumentFactory(link_reach="public", link_role="editor") document = factories.DocumentFactory(link_reach="public", link_role="editor")
file = SimpleUploadedFile("test_file.jpg", b"Dummy content") file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart") response = APIClient().post(url, {"file": file}, format="multipart")
assert response.status_code == 201 assert response.status_code == 201
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg") pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"]) match = pattern.search(response.json()["file"])
file_id = match.group(1) file_id = match.group(1)
@@ -85,7 +91,7 @@ def test_api_documents_attachment_upload_authenticated_forbidden(reach, role):
client.force_login(user) client.force_login(user)
document = factories.DocumentFactory(link_reach=reach, link_role=role) document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content") file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart") response = client.post(url, {"file": file}, format="multipart")
@@ -114,14 +120,14 @@ def test_api_documents_attachment_upload_authenticated_success(reach, role):
client.force_login(user) client.force_login(user)
document = factories.DocumentFactory(link_reach=reach, link_role=role) document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content") file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart") response = client.post(url, {"file": file}, format="multipart")
assert response.status_code == 201 assert response.status_code == 201
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg") pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"]) match = pattern.search(response.json()["file"])
file_id = match.group(1) file_id = match.group(1)
@@ -148,7 +154,7 @@ def test_api_documents_attachment_upload_reader(via, mock_user_teams):
document=document, team="lasuite", role="reader" document=document, team="lasuite", role="reader"
) )
file = SimpleUploadedFile("test_file.jpg", b"Dummy content") file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart") response = client.post(url, {"file": file}, format="multipart")
@@ -179,20 +185,28 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
document=document, team="lasuite", role=role document=document, team="lasuite", role=role
) )
file = SimpleUploadedFile("test_file.jpg", b"Dummy content") file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart") response = client.post(url, {"file": file}, format="multipart")
assert response.status_code == 201 assert response.status_code == 201
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg") file_path = response.json()["file"]
match = pattern.search(response.json()["file"]) pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(file_path)
file_id = match.group(1) file_id = match.group(1)
# Validate that file_id is a valid UUID # Validate that file_id is a valid UUID
uuid.UUID(file_id) uuid.UUID(file_id)
# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id)}
def test_api_documents_attachment_upload_invalid(client): def test_api_documents_attachment_upload_invalid(client):
"""Attempt to upload without a file should return an explicit error.""" """Attempt to upload without a file should return an explicit error."""
@@ -222,8 +236,9 @@ def test_api_documents_attachment_upload_size_limit_exceeded(settings):
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
# Create a temporary file larger than the allowed size # Create a temporary file larger than the allowed size
content = b"a" * (1048576 + 1) file = SimpleUploadedFile(
file = ContentFile(content, name="test.jpg") name="test.txt", content=b"a" * (1048576 + 1), content_type="text/plain"
)
response = client.post(url, {"file": file}, format="multipart") response = client.post(url, {"file": file}, format="multipart")
@@ -231,10 +246,21 @@ def test_api_documents_attachment_upload_size_limit_exceeded(settings):
assert response.json() == {"file": ["File size exceeds the maximum limit of 1 MB."]} assert response.json() == {"file": ["File size exceeds the maximum limit of 1 MB."]}
def test_api_documents_attachment_upload_type_not_allowed(settings): @pytest.mark.parametrize(
"""The uploaded file should be of a whitelisted type.""" "name,content,extension",
settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES = ["image/jpeg", "image/png"] [
("test.exe", b"text", "exe"),
("test", b"text", "txt"),
("test.aaaaaa", b"test", "txt"),
("test.txt", PIXEL, "txt"),
("test.py", b"#!/usr/bin/python", "py"),
],
)
def test_api_documents_attachment_upload_fix_extension(name, content, extension):
"""
A file with no extension or a wrong extension is accepted and the extension
is corrected in storage.
"""
user = factories.UserFactory() user = factories.UserFactory()
client = APIClient() client = APIClient()
client.force_login(user) client.force_login(user)
@@ -242,14 +268,70 @@ def test_api_documents_attachment_upload_type_not_allowed(settings):
document = factories.DocumentFactory(users=[(user, "owner")]) document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/" url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
# Create a temporary file with a not allowed type (e.g., text file) file = SimpleUploadedFile(name=name, content=content)
file = ContentFile(b"a" * 1048576, name="test.txt") response = client.post(url, {"file": file}, format="multipart")
assert response.status_code == 201
file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.{extension:s}")
match = pattern.search(file_path)
file_id = match.group(1)
# Validate that file_id is a valid UUID
uuid.UUID(file_id)
# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
def test_api_documents_attachment_upload_empty_file():
"""An empty file should be rejected."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
file = SimpleUploadedFile(name="test.png", content=b"")
response = client.post(url, {"file": file}, format="multipart") response = client.post(url, {"file": file}, format="multipart")
assert response.status_code == 400 assert response.status_code == 400
assert response.json() == { assert response.json() == {"file": ["The submitted file is empty."]}
"file": [
"File type 'text/plain' is not allowed. Allowed types are: image/jpeg, image/png"
] def test_api_documents_attachment_upload_unsafe():
} """A file with an unsafe mime type should be tagged as such."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
file = SimpleUploadedFile(
name="script.exe", content=b"\x4d\x5a\x90\x00\x03\x00\x00\x00"
)
response = client.post(url, {"file": file}, format="multipart")
assert response.status_code == 201
file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.exe")
match = pattern.search(file_path)
file_id = match.group(1)
# Validate that file_id is a valid UUID
uuid.UUID(file_id)
# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}

View File

@@ -144,14 +144,75 @@ class Base(Configuration):
environ_name="DOCUMENT_IMAGE_MAX_SIZE", environ_name="DOCUMENT_IMAGE_MAX_SIZE",
environ_prefix=None, environ_prefix=None,
) )
DOCUMENT_IMAGE_ALLOWED_MIME_TYPES = [
"image/bmp", DOCUMENT_UNSAFE_MIME_TYPES = [
"image/gif", # Executable Files
"image/jpeg", "application/x-msdownload",
"image/png", "application/x-bat",
"application/x-dosexec",
"application/x-sh",
"application/x-ms-dos-executable",
"application/x-msi",
"application/java-archive",
"application/octet-stream",
# Dynamic Web Pages
"application/x-httpd-php",
"application/x-asp",
"application/x-aspx",
"application/jsp",
"application/xhtml+xml",
"application/x-python-code",
"application/x-perl",
"text/html",
"text/javascript",
"text/x-php",
# System Files
"application/x-msdownload",
"application/x-sys",
"application/x-drv",
"application/cpl",
"application/x-apple-diskimage",
# Script Files
"application/javascript",
"application/x-vbscript",
"application/x-powershell",
"application/x-shellscript",
# Compressed/Archive Files
"application/zip",
"application/x-tar",
"application/gzip",
"application/x-bzip2",
"application/x-7z-compressed",
"application/x-rar",
"application/x-rar-compressed",
"application/x-compress",
"application/x-lzma",
# Macros in Documents
"application/vnd.ms-word",
"application/vnd.ms-excel",
"application/vnd.ms-powerpoint",
"application/vnd.ms-word.document.macroenabled.12",
"application/vnd.ms-excel.sheet.macroenabled.12",
"application/vnd.ms-powerpoint.presentation.macroenabled.12",
# Disk Images & Virtual Disk Files
"application/x-iso9660-image",
"application/x-vmdk",
"application/x-apple-diskimage",
"application/x-dmg",
# Other Dangerous MIME Types
"application/x-ms-application",
"application/x-msdownload",
"application/x-shockwave-flash",
"application/x-silverlight-app",
"application/x-java-vm",
"application/x-bittorrent",
"application/hta",
"application/x-csh",
"application/x-ksh",
"application/x-ms-regedit",
"application/x-msdownload",
"application/xml",
"image/svg+xml", "image/svg+xml",
"image/tiff",
"image/webp",
] ]
# Document versions # Document versions

View File

@@ -50,6 +50,7 @@ dependencies = [
"PyJWT==2.9.0", "PyJWT==2.9.0",
"pypandoc==1.14", "pypandoc==1.14",
"python-frontmatter==1.1.0", "python-frontmatter==1.1.0",
"python-magic==0.4.27",
"requests==2.32.3", "requests==2.32.3",
"sentry-sdk==2.16.0", "sentry-sdk==2.16.0",
"url-normalize==1.4.3", "url-normalize==1.4.3",