diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py
index 89b835a9..387cf0d8 100644
--- a/src/backend/core/api/serializers.py
+++ b/src/backend/core/api/serializers.py
@@ -10,7 +10,7 @@ from django.utils.translation import gettext_lazy as _
import magic
from rest_framework import exceptions, serializers
-from core import enums, models
+from core import enums, models, utils
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (
ConversionError,
@@ -268,6 +268,53 @@ class DocumentSerializer(ListDocumentSerializer):
return value
+ def save(self, **kwargs):
+ """
+ Process the content field to extract attachment keys and update the document's
+ "attachments" field for access control.
+ """
+ content = self.validated_data.get("content", "")
+ extracted_attachments = set(utils.extract_attachments(content))
+
+ existing_attachments = (
+ set(self.instance.attachments or []) if self.instance else set()
+ )
+ new_attachments = extracted_attachments - existing_attachments
+
+ if new_attachments:
+ attachments_documents = (
+ models.Document.objects.filter(
+ attachments__overlap=list(new_attachments)
+ )
+ .only("path", "attachments")
+ .order_by("path")
+ )
+
+ user = self.context["request"].user
+ readable_per_se_paths = (
+ models.Document.objects.readable_per_se(user)
+ .order_by("path")
+ .values_list("path", flat=True)
+ )
+ readable_attachments_paths = utils.filter_descendants(
+ [doc.path for doc in attachments_documents],
+ readable_per_se_paths,
+ skip_sorting=True,
+ )
+
+ readable_attachments = set()
+ for document in attachments_documents:
+ if document.path not in readable_attachments_paths:
+ continue
+ readable_attachments.update(set(document.attachments) & new_attachments)
+
+ # Update attachments with readable keys
+ self.validated_data["attachments"] = list(
+ existing_attachments | readable_attachments
+ )
+
+ return super().save(**kwargs)
+
class ServerCreateDocumentSerializer(serializers.Serializer):
"""
diff --git a/src/backend/core/tests/documents/test_api_documents_media_auth.py b/src/backend/core/tests/documents/test_api_documents_media_auth.py
index e6ed3d00..13817e97 100644
--- a/src/backend/core/tests/documents/test_api_documents_media_auth.py
+++ b/src/backend/core/tests/documents/test_api_documents_media_auth.py
@@ -1,5 +1,5 @@
"""
-Test file uploads API endpoint for users in impress's core app.
+Test media-auth authorization API endpoint in docs core app.
"""
from io import BytesIO
diff --git a/src/backend/core/tests/test_utils.py b/src/backend/core/tests/test_utils.py
index 4fa33e1e..3fea93ed 100644
--- a/src/backend/core/tests/test_utils.py
+++ b/src/backend/core/tests/test_utils.py
@@ -1,5 +1,10 @@
"""Test util base64_yjs_to_text."""
+import base64
+import uuid
+
+import y_py
+
from core import utils
# This base64 string is an example of what is saved in the database.
@@ -35,3 +40,38 @@ def test_utils_base64_yjs_to_xml():
or 'Hello' in content
)
assert 'world' in content
+
+
+def test_utils_extract_attachments():
+ """
+ All attachment keys in the document content should be extracted.
+ """
+ document_id = uuid.uuid4()
+ image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png"
+ image_url1 = f"http://localhost/media/{image_key1:s}"
+
+ image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
+ image_url2 = f"http://localhost/{image_key2:s}"
+
+ image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
+ image_url3 = f"http://localhost/media/{image_key3:s}"
+
+ ydoc = y_py.YDoc() # pylint: disable=no-member
+ with ydoc.begin_transaction() as txn:
+ xml_fragment = ydoc.get_xml_element("document-store")
+
+ xml_image = xml_fragment.push_xml_element(txn, "image")
+ xml_image.set_attribute(txn, "src", image_url1)
+
+ xml_image = xml_fragment.push_xml_element(txn, "image")
+ xml_image.set_attribute(txn, "src", image_url2)
+
+ xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph")
+ xml_text = xml_paragraph.push_xml_text(txn)
+ xml_text.push(txn, image_url3)
+
+ update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
+ base64_string = base64.b64encode(update).decode("utf-8")
+
+ # image_key2 is missing the "/media/" part and shouldn't get extracted
+ assert utils.extract_attachments(base64_string) == [image_key1, image_key3]
diff --git a/src/backend/core/utils.py b/src/backend/core/utils.py
index 88a628b2..0378c323 100644
--- a/src/backend/core/utils.py
+++ b/src/backend/core/utils.py
@@ -69,5 +69,8 @@ def base64_yjs_to_text(base64_string):
def extract_attachments(content):
"""Helper method to extract media paths from a document's content."""
+ if not content:
+ return []
+
xml_content = base64_yjs_to_xml(content)
return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content)