♻️(back) replace Ypy by pycrdt
Ypy is deprecated and unmaintained. We have problem with parsing existing documents. We replace it by pycrdt, library actively maintained and without the issues we have with Ypy.
This commit is contained in:
@@ -11,9 +11,9 @@ from django.conf import settings
|
||||
from django.core.files.storage import default_storage
|
||||
from django.utils import timezone
|
||||
|
||||
import pycrdt
|
||||
import pytest
|
||||
import requests
|
||||
import y_py
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories, models
|
||||
@@ -84,13 +84,14 @@ def test_api_documents_duplicate_success(index):
|
||||
image_refs = [get_image_refs(doc_id) for doc_id in document_ids]
|
||||
|
||||
# Create document content with the first image only
|
||||
ydoc = y_py.YDoc() # pylint: disable=no-member
|
||||
with ydoc.begin_transaction() as txn:
|
||||
xml_fragment = ydoc.get_xml_element("document-store")
|
||||
xml_fragment.push_xml_element(txn, "image").set_attribute(
|
||||
txn, "src", image_refs[0][1]
|
||||
)
|
||||
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
|
||||
ydoc = pycrdt.Doc()
|
||||
fragment = pycrdt.XmlFragment(
|
||||
[
|
||||
pycrdt.XmlElement("img", {"src": image_refs[0][1]}),
|
||||
]
|
||||
)
|
||||
ydoc["document-store"] = fragment
|
||||
update = ydoc.get_update()
|
||||
base64_content = base64.b64encode(update).decode("utf-8")
|
||||
|
||||
# Create documents
|
||||
|
||||
@@ -5,8 +5,8 @@ Test extract-attachments on document update in docs core app.
|
||||
import base64
|
||||
from uuid import uuid4
|
||||
|
||||
import pycrdt
|
||||
import pytest
|
||||
import y_py
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories
|
||||
@@ -16,14 +16,15 @@ pytestmark = pytest.mark.django_db
|
||||
|
||||
def get_ydoc_with_mages(image_keys):
|
||||
"""Return a ydoc from text for testing purposes."""
|
||||
ydoc = y_py.YDoc() # pylint: disable=no-member
|
||||
with ydoc.begin_transaction() as txn:
|
||||
xml_fragment = ydoc.get_xml_element("document-store")
|
||||
for key in image_keys:
|
||||
xml_image = xml_fragment.push_xml_element(txn, "image")
|
||||
xml_image.set_attribute(txn, "src", f"http://localhost/media/{key:s}")
|
||||
|
||||
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
|
||||
ydoc = pycrdt.Doc()
|
||||
fragment = pycrdt.XmlFragment(
|
||||
[
|
||||
pycrdt.XmlElement("img", {"src": f"http://localhost/media/{key:s}"})
|
||||
for key in image_keys
|
||||
]
|
||||
)
|
||||
ydoc["document-store"] = fragment
|
||||
update = ydoc.get_update()
|
||||
return base64.b64encode(update).decode("utf-8")
|
||||
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ import uuid
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
|
||||
import pycrdt
|
||||
import pytest
|
||||
import y_py
|
||||
|
||||
from core import models
|
||||
|
||||
@@ -27,14 +27,13 @@ def test_populate_attachments_on_all_documents(migrator):
|
||||
|
||||
# Create document content with an image
|
||||
file_key = f"{old_doc_with_attachments.id!s}/file"
|
||||
ydoc = y_py.YDoc() # pylint: disable=no-member
|
||||
image_key = f"{old_doc_with_attachments.id!s}/attachments/{uuid.uuid4()!s}.png"
|
||||
with ydoc.begin_transaction() as txn:
|
||||
xml_fragment = ydoc.get_xml_element("document-store")
|
||||
xml_fragment.push_xml_element(txn, "image").set_attribute(
|
||||
txn, "src", f"http://localhost/media/{image_key:s}"
|
||||
)
|
||||
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
|
||||
ydoc = pycrdt.Doc()
|
||||
fragment = pycrdt.XmlFragment(
|
||||
[pycrdt.XmlElement("img", {"src": f"http://localhost/media/{image_key:s}"})]
|
||||
)
|
||||
ydoc["document-store"] = fragment
|
||||
update = ydoc.get_update()
|
||||
base64_content = base64.b64encode(update).decode("utf-8")
|
||||
bytes_content = base64_content.encode("utf-8")
|
||||
content_file = ContentFile(bytes_content)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import base64
|
||||
import uuid
|
||||
|
||||
import y_py
|
||||
import pycrdt
|
||||
|
||||
from core import utils
|
||||
|
||||
@@ -29,17 +29,22 @@ TEST_BASE64_STRING = (
|
||||
|
||||
def test_utils_base64_yjs_to_text():
|
||||
"""Test extract text from saved yjs document"""
|
||||
assert utils.base64_yjs_to_text(TEST_BASE64_STRING) == "Hello world"
|
||||
assert utils.base64_yjs_to_text(TEST_BASE64_STRING) == "Hello w or ld"
|
||||
|
||||
|
||||
def test_utils_base64_yjs_to_xml():
|
||||
"""Test extract xml from saved yjs document"""
|
||||
content = utils.base64_yjs_to_xml(TEST_BASE64_STRING)
|
||||
assert (
|
||||
'<heading "level"="1" "textAlignment"="left">Hello</heading>' in content
|
||||
or '<heading "textAlignment"="left" "level"="1">Hello</heading>' in content
|
||||
'<heading textAlignment="left" level="1"><italic>Hello</italic></heading>'
|
||||
in content
|
||||
or '<heading level="1" textAlignment="left"><italic>Hello</italic></heading>'
|
||||
in content
|
||||
)
|
||||
assert (
|
||||
'<bulletListItem textAlignment="left">w<bold>or</bold>ld</bulletListItem>'
|
||||
in content
|
||||
)
|
||||
assert '<bulletListItem "textAlignment"="left">world</bulletListItem>' in content
|
||||
|
||||
|
||||
def test_utils_extract_attachments():
|
||||
@@ -56,22 +61,17 @@ def test_utils_extract_attachments():
|
||||
image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
|
||||
image_url3 = f"http://localhost/media/{image_key3:s}"
|
||||
|
||||
ydoc = y_py.YDoc() # pylint: disable=no-member
|
||||
with ydoc.begin_transaction() as txn:
|
||||
xml_fragment = ydoc.get_xml_element("document-store")
|
||||
ydoc = pycrdt.Doc()
|
||||
frag = pycrdt.XmlFragment(
|
||||
[
|
||||
pycrdt.XmlElement("img", {"src": image_url1}),
|
||||
pycrdt.XmlElement("img", {"src": image_url2}),
|
||||
pycrdt.XmlElement("p", {}, [pycrdt.XmlText(image_url3)]),
|
||||
]
|
||||
)
|
||||
ydoc["document-store"] = frag
|
||||
|
||||
xml_image = xml_fragment.push_xml_element(txn, "image")
|
||||
xml_image.set_attribute(txn, "src", image_url1)
|
||||
|
||||
xml_image = xml_fragment.push_xml_element(txn, "image")
|
||||
xml_image.set_attribute(txn, "src", image_url2)
|
||||
|
||||
xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph")
|
||||
xml_text = xml_paragraph.push_xml_text(txn)
|
||||
xml_text.push(txn, image_url3)
|
||||
|
||||
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
|
||||
update = ydoc.get_update()
|
||||
base64_string = base64.b64encode(update).decode("utf-8")
|
||||
|
||||
# image_key2 is missing the "/media/" part and shouldn't get extracted
|
||||
assert utils.extract_attachments(base64_string) == [image_key1, image_key3]
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
"""Test util base64_yjs_to_text."""
|
||||
|
||||
import base64
|
||||
import uuid
|
||||
|
||||
import y_py
|
||||
|
||||
from core import utils
|
||||
from core.utils import base64_yjs_to_text
|
||||
|
||||
|
||||
def test_utils_base64_yjs_to_text():
|
||||
"""
|
||||
Test extract_text_from_saved_yjs_document
|
||||
This base64 string is an example of what is saved in the database.
|
||||
This base64 is generated from the blocknote editor, it contains
|
||||
the text \n# *Hello* \n- w**or**ld
|
||||
"""
|
||||
base64_string = (
|
||||
"AR717vLVDgAHAQ5kb2N1bWVudC1zdG9yZQMKYmxvY2tHcm91cAcA9e7y1Q4AAw5ibG9ja0NvbnRh"
|
||||
"aW5lcgcA9e7y1Q4BAwdoZWFkaW5nBwD17vLVDgIGBgD17vLVDgMGaXRhbGljAnt9hPXu8tUOBAVI"
|
||||
"ZWxsb4b17vLVDgkGaXRhbGljBG51bGwoAPXu8tUOAg10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y"
|
||||
"1Q4CBWxldmVsAX0BKAD17vLVDgECaWQBdyQwNGQ2MjM0MS04MzI2LTQyMzYtYTA4My00ODdlMjZm"
|
||||
"YWQyMzAoAPXu8tUOAQl0ZXh0Q29sb3IBdwdkZWZhdWx0KAD17vLVDgEPYmFja2dyb3VuZENvbG9y"
|
||||
"AXcHZGVmYXVsdIf17vLVDgEDDmJsb2NrQ29udGFpbmVyBwD17vLVDhADDmJ1bGxldExpc3RJdGVt"
|
||||
"BwD17vLVDhEGBAD17vLVDhIBd4b17vLVDhMEYm9sZAJ7fYT17vLVDhQCb3KG9e7y1Q4WBGJvbGQE"
|
||||
"bnVsbIT17vLVDhcCbGQoAPXu8tUOEQ10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y1Q4QAmlkAXck"
|
||||
"ZDM1MWUwNjgtM2U1NS00MjI2LThlYTUtYWJiMjYzMTk4ZTJhKAD17vLVDhAJdGV4dENvbG9yAXcH"
|
||||
"ZGVmYXVsdCgA9e7y1Q4QD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHSH9e7y1Q4QAw5ibG9ja0Nv"
|
||||
"bnRhaW5lcgcA9e7y1Q4eAwlwYXJhZ3JhcGgoAPXu8tUOHw10ZXh0QWxpZ25tZW50AXcEbGVmdCgA"
|
||||
"9e7y1Q4eAmlkAXckODk3MDBjMDctZTBlMS00ZmUwLWFjYTItODQ5MzIwOWE3ZTQyKAD17vLVDh4J"
|
||||
"dGV4dENvbG9yAXcHZGVmYXVsdCgA9e7y1Q4eD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHQA"
|
||||
)
|
||||
|
||||
assert base64_yjs_to_text(base64_string) == "Hello world"
|
||||
|
||||
|
||||
def test_utils_extract_attachments():
|
||||
"""
|
||||
All attachment keys in the document content should be extracted.
|
||||
"""
|
||||
document_id = uuid.uuid4()
|
||||
image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png"
|
||||
image_url1 = f"http://localhost/media/{image_key1:s}"
|
||||
|
||||
image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
|
||||
image_url2 = f"http://localhost/{image_key2:s}"
|
||||
|
||||
image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
|
||||
image_url3 = f"http://localhost/media/{image_key3:s}"
|
||||
|
||||
ydoc = y_py.YDoc() # pylint: disable=no-member
|
||||
with ydoc.begin_transaction() as txn:
|
||||
xml_fragment = ydoc.get_xml_element("document-store")
|
||||
|
||||
xml_image = xml_fragment.push_xml_element(txn, "image")
|
||||
xml_image.set_attribute(txn, "src", image_url1)
|
||||
|
||||
xml_image = xml_fragment.push_xml_element(txn, "image")
|
||||
xml_image.set_attribute(txn, "src", image_url2)
|
||||
|
||||
xml_paragraph = xml_fragment.push_xml_element(txn, "paragraph")
|
||||
xml_text = xml_paragraph.push_xml_text(txn)
|
||||
xml_text.push(txn, image_url3)
|
||||
|
||||
update = y_py.encode_state_as_update(ydoc) # pylint: disable=no-member
|
||||
base64_string = base64.b64encode(update).decode("utf-8")
|
||||
|
||||
# image_url3 is missing the "/media/" part and shouldn't get extracted
|
||||
assert utils.extract_attachments(base64_string) == [image_key1, image_key3]
|
||||
@@ -3,7 +3,7 @@
|
||||
import base64
|
||||
import re
|
||||
|
||||
import y_py as Y
|
||||
import pycrdt
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from core import enums
|
||||
@@ -52,19 +52,19 @@ def base64_yjs_to_xml(base64_string):
|
||||
"""Extract xml from base64 yjs document."""
|
||||
|
||||
decoded_bytes = base64.b64decode(base64_string)
|
||||
uint8_array = bytearray(decoded_bytes)
|
||||
# uint8_array = bytearray(decoded_bytes)
|
||||
|
||||
doc = Y.YDoc() # pylint: disable=E1101
|
||||
Y.apply_update(doc, uint8_array) # pylint: disable=E1101
|
||||
return str(doc.get_xml_element("document-store"))
|
||||
doc = pycrdt.Doc()
|
||||
doc.apply_update(decoded_bytes)
|
||||
return str(doc.get("document-store", type=pycrdt.XmlFragment))
|
||||
|
||||
|
||||
def base64_yjs_to_text(base64_string):
|
||||
"""Extract text from base64 yjs document."""
|
||||
|
||||
blocknote_structure = base64_yjs_to_xml(base64_string)
|
||||
soup = BeautifulSoup(blocknote_structure, "html.parser")
|
||||
return soup.get_text(separator=" ").strip()
|
||||
soup = BeautifulSoup(blocknote_structure, "lxml-xml")
|
||||
return soup.get_text(separator=" ", strip=True)
|
||||
|
||||
|
||||
def extract_attachments(content):
|
||||
|
||||
@@ -47,18 +47,19 @@ dependencies = [
|
||||
"factory_boy==3.3.3",
|
||||
"gunicorn==23.0.0",
|
||||
"jsonschema==4.23.0",
|
||||
"lxml==5.3.1",
|
||||
"markdown==3.7",
|
||||
"mozilla-django-oidc==4.0.1",
|
||||
"nested-multipart-parser==1.5.0",
|
||||
"openai==1.68.2",
|
||||
"psycopg[binary]==3.2.6",
|
||||
"pycrdt==0.12.10",
|
||||
"PyJWT==2.10.1",
|
||||
"python-magic==0.4.27",
|
||||
"requests==2.32.3",
|
||||
"sentry-sdk==2.24.0",
|
||||
"url-normalize==1.4.3",
|
||||
"whitenoise==6.9.0",
|
||||
"y-py==0.6.2",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
Reference in New Issue
Block a user