From c02f19a2cda7e74d7d228886fccd42d08deaa35d Mon Sep 17 00:00:00 2001 From: Samuel Paccoud - DINUM Date: Tue, 21 Jan 2025 23:56:50 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(backend)=20extract=20attachment=20key?= =?UTF-8?q?s=20from=20updated=20content=20for=20access?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can't prevent document editors from copy/pasting content to from one document to another. The problem is that copying content, will copy the urls pointing to attachments but if we don't do anything, the reader of the document to which the content is being pasted, may not be allowed to access the attachment files from the original document. Using the work from the previous commit, we can grant access to the readers of the target document by extracting the attachment keys from the content and adding themto the target document's "attachments" field. Before doing this, we check that the current user can indeed access the attachment files extracted from the content and that they are allowed to edit the current document. --- src/backend/core/api/serializers.py | 49 ++++++++++++++++++- .../test_api_documents_media_auth.py | 2 +- src/backend/core/tests/test_utils.py | 40 +++++++++++++++ src/backend/core/utils.py | 3 ++ 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 89b835a9..387cf0d8 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -10,7 +10,7 @@ from django.utils.translation import gettext_lazy as _ import magic from rest_framework import exceptions, serializers -from core import enums, models +from core import enums, models, utils from core.services.ai_services import AI_ACTIONS from core.services.converter_services import ( ConversionError, @@ -268,6 +268,53 @@ class DocumentSerializer(ListDocumentSerializer): return value + def save(self, **kwargs): + """ + Process the content field to extract attachment keys and update the document's + "attachments" field for access control. + """ + content = self.validated_data.get("content", "") + extracted_attachments = set(utils.extract_attachments(content)) + + existing_attachments = ( + set(self.instance.attachments or []) if self.instance else set() + ) + new_attachments = extracted_attachments - existing_attachments + + if new_attachments: + attachments_documents = ( + models.Document.objects.filter( + attachments__overlap=list(new_attachments) + ) + .only("path", "attachments") + .order_by("path") + ) + + user = self.context["request"].user + readable_per_se_paths = ( + models.Document.objects.readable_per_se(user) + .order_by("path") + .values_list("path", flat=True) + ) + readable_attachments_paths = utils.filter_descendants( + [doc.path for doc in attachments_documents], + readable_per_se_paths, + skip_sorting=True, + ) + + readable_attachments = set() + for document in attachments_documents: + if document.path not in readable_attachments_paths: + continue + readable_attachments.update(set(document.attachments) & new_attachments) + + # Update attachments with readable keys + self.validated_data["attachments"] = list( + existing_attachments | readable_attachments + ) + + return super().save(**kwargs) + class ServerCreateDocumentSerializer(serializers.Serializer): """ diff --git a/src/backend/core/tests/documents/test_api_documents_media_auth.py b/src/backend/core/tests/documents/test_api_documents_media_auth.py index e6ed3d00..13817e97 100644 --- a/src/backend/core/tests/documents/test_api_documents_media_auth.py +++ b/src/backend/core/tests/documents/test_api_documents_media_auth.py @@ -1,5 +1,5 @@ """ -Test file uploads API endpoint for users in impress's core app. +Test media-auth authorization API endpoint in docs core app. """ from io import BytesIO diff --git a/src/backend/core/tests/test_utils.py b/src/backend/core/tests/test_utils.py index 4fa33e1e..3fea93ed 100644 --- a/src/backend/core/tests/test_utils.py +++ b/src/backend/core/tests/test_utils.py @@ -1,5 +1,10 @@ """Test util base64_yjs_to_text.""" +import base64 +import uuid + +import y_py + from core import utils # This base64 string is an example of what is saved in the database. @@ -35,3 +40,38 @@ def test_utils_base64_yjs_to_xml(): or 'Hello' in content ) assert 'world' in content + + +def test_utils_extract_attachments(): + """ + All attachment keys in the document content should be extracted. + """ + document_id = uuid.uuid4() + image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png" + image_url1 = f"http://localhost/media/{image_key1:s}" + + image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png" + image_url2 = f"http://localhost/{image_key2:s}" + + image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png" + image_url3 = f"http://localhost/media/{image_key3:s}" + + ydoc = y_py.YDoc() # pylint: disable=no-member + with ydoc.begin_transaction() as txn: + xml_fragment = ydoc.get_xml_element("document-store") + + xml_image = xml_fragment.push_xml_element(txn, "image") + xml_image.set_attribute(txn, "src", image_url1) + + xml_image = xml_fragment.push_xml_element(txn, "image") + xml_image.set_attribute(txn, "src", image_url2) + + xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph") + xml_text = xml_paragraph.push_xml_text(txn) + xml_text.push(txn, image_url3) + + update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member + base64_string = base64.b64encode(update).decode("utf-8") + + # image_key2 is missing the "/media/" part and shouldn't get extracted + assert utils.extract_attachments(base64_string) == [image_key1, image_key3] diff --git a/src/backend/core/utils.py b/src/backend/core/utils.py index 88a628b2..0378c323 100644 --- a/src/backend/core/utils.py +++ b/src/backend/core/utils.py @@ -69,5 +69,8 @@ def base64_yjs_to_text(base64_string): def extract_attachments(content): """Helper method to extract media paths from a document's content.""" + if not content: + return [] + xml_content = base64_yjs_to_xml(content) return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content)