✨(backend) extract attachment keys from updated content for access

We can't prevent document editors from copy/pasting content to from one document to another. The problem is that copying content, will copy the urls pointing to attachments but if we don't do anything, the reader of the document to which the content is being pasted, may not be allowed to access the attachment files from the original document. Using the work from the previous commit, we can grant access to the readers of the target document by extracting the attachment keys from the content and adding themto the target document's "attachments" field. Before doing this, we check that the current user can indeed access the attachment files extracted from the content and that they are allowed to edit the current document.
2025-01-21 23:56:50 +01:00
parent 34a208a80d
commit c02f19a2cd
4 changed files with 92 additions and 2 deletions
--- a/src/backend/core/tests/test_utils.py
+++ b/src/backend/core/tests/test_utils.py
@@ -1,5 +1,10 @@
 """Test util base64_yjs_to_text."""

+import base64
+import uuid
+
+import y_py
+
 from core import utils

 # This base64 string is an example of what is saved in the database.
@@ -35,3 +40,38 @@ def test_utils_base64_yjs_to_xml():
        or '<heading "textAlignment"="left" "level"="1">Hello</heading>' in content
    )
    assert '<bulletListItem "textAlignment"="left">world</bulletListItem>' in content
+
+
+def test_utils_extract_attachments():
+    """
+    All attachment keys in the document content should be extracted.
+    """
+    document_id = uuid.uuid4()
+    image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png"
+    image_url1 = f"http://localhost/media/{image_key1:s}"
+
+    image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
+    image_url2 = f"http://localhost/{image_key2:s}"
+
+    image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
+    image_url3 = f"http://localhost/media/{image_key3:s}"
+
+    ydoc = y_py.YDoc()  # pylint: disable=no-member
+    with ydoc.begin_transaction() as txn:
+        xml_fragment = ydoc.get_xml_element("document-store")
+
+        xml_image = xml_fragment.push_xml_element(txn, "image")
+        xml_image.set_attribute(txn, "src", image_url1)
+
+        xml_image = xml_fragment.push_xml_element(txn, "image")
+        xml_image.set_attribute(txn, "src", image_url2)
+
+        xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph")
+        xml_text = xml_paragraph.push_xml_text(txn)
+        xml_text.push(txn, image_url3)
+
+    update = y_py.encode_state_as_update(ydoc)  # pylint: disable=no-member
+    base64_string = base64.b64encode(update).decode("utf-8")
+
+    # image_key2 is missing the "/media/" part and shouldn't get extracted
+    assert utils.extract_attachments(base64_string) == [image_key1, image_key3]