diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 89b835a9..387cf0d8 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -10,7 +10,7 @@ from django.utils.translation import gettext_lazy as _ import magic from rest_framework import exceptions, serializers -from core import enums, models +from core import enums, models, utils from core.services.ai_services import AI_ACTIONS from core.services.converter_services import ( ConversionError, @@ -268,6 +268,53 @@ class DocumentSerializer(ListDocumentSerializer): return value + def save(self, **kwargs): + """ + Process the content field to extract attachment keys and update the document's + "attachments" field for access control. + """ + content = self.validated_data.get("content", "") + extracted_attachments = set(utils.extract_attachments(content)) + + existing_attachments = ( + set(self.instance.attachments or []) if self.instance else set() + ) + new_attachments = extracted_attachments - existing_attachments + + if new_attachments: + attachments_documents = ( + models.Document.objects.filter( + attachments__overlap=list(new_attachments) + ) + .only("path", "attachments") + .order_by("path") + ) + + user = self.context["request"].user + readable_per_se_paths = ( + models.Document.objects.readable_per_se(user) + .order_by("path") + .values_list("path", flat=True) + ) + readable_attachments_paths = utils.filter_descendants( + [doc.path for doc in attachments_documents], + readable_per_se_paths, + skip_sorting=True, + ) + + readable_attachments = set() + for document in attachments_documents: + if document.path not in readable_attachments_paths: + continue + readable_attachments.update(set(document.attachments) & new_attachments) + + # Update attachments with readable keys + self.validated_data["attachments"] = list( + existing_attachments | readable_attachments + ) + + return super().save(**kwargs) + class ServerCreateDocumentSerializer(serializers.Serializer): """ diff --git a/src/backend/core/tests/documents/test_api_documents_media_auth.py b/src/backend/core/tests/documents/test_api_documents_media_auth.py index e6ed3d00..13817e97 100644 --- a/src/backend/core/tests/documents/test_api_documents_media_auth.py +++ b/src/backend/core/tests/documents/test_api_documents_media_auth.py @@ -1,5 +1,5 @@ """ -Test file uploads API endpoint for users in impress's core app. +Test media-auth authorization API endpoint in docs core app. """ from io import BytesIO diff --git a/src/backend/core/tests/test_utils.py b/src/backend/core/tests/test_utils.py index 4fa33e1e..3fea93ed 100644 --- a/src/backend/core/tests/test_utils.py +++ b/src/backend/core/tests/test_utils.py @@ -1,5 +1,10 @@ """Test util base64_yjs_to_text.""" +import base64 +import uuid + +import y_py + from core import utils # This base64 string is an example of what is saved in the database. @@ -35,3 +40,38 @@ def test_utils_base64_yjs_to_xml(): or 'Hello' in content ) assert 'world' in content + + +def test_utils_extract_attachments(): + """ + All attachment keys in the document content should be extracted. + """ + document_id = uuid.uuid4() + image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png" + image_url1 = f"http://localhost/media/{image_key1:s}" + + image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png" + image_url2 = f"http://localhost/{image_key2:s}" + + image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png" + image_url3 = f"http://localhost/media/{image_key3:s}" + + ydoc = y_py.YDoc() # pylint: disable=no-member + with ydoc.begin_transaction() as txn: + xml_fragment = ydoc.get_xml_element("document-store") + + xml_image = xml_fragment.push_xml_element(txn, "image") + xml_image.set_attribute(txn, "src", image_url1) + + xml_image = xml_fragment.push_xml_element(txn, "image") + xml_image.set_attribute(txn, "src", image_url2) + + xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph") + xml_text = xml_paragraph.push_xml_text(txn) + xml_text.push(txn, image_url3) + + update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member + base64_string = base64.b64encode(update).decode("utf-8") + + # image_key2 is missing the "/media/" part and shouldn't get extracted + assert utils.extract_attachments(base64_string) == [image_key1, image_key3] diff --git a/src/backend/core/utils.py b/src/backend/core/utils.py index 88a628b2..0378c323 100644 --- a/src/backend/core/utils.py +++ b/src/backend/core/utils.py @@ -69,5 +69,8 @@ def base64_yjs_to_text(base64_string): def extract_attachments(content): """Helper method to extract media paths from a document's content.""" + if not content: + return [] + xml_content = base64_yjs_to_xml(content) return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content)