(backend) Import of documents

We can now import documents in formats .docx and .md.
To do so we added a new container "docspec", which
uses the docspec service to convert
these formats to Blocknote format.

More here: #1567 #1569.
This commit is contained in:
Stephan Meijer
2025-11-15 16:29:43 +01:00
committed by Anthony LC
parent 61dbda0bf6
commit b547657efd
12 changed files with 305 additions and 109 deletions

View File

@@ -46,14 +46,12 @@ from core.api.filters import remove_accents
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
ConversionError,
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.converter_services import (
ValidationError as YProviderValidationError,
Converter,
)
from core.services.converter_services import (
YdocConverter,
)
from core.services import mime_types
from core.services.search_indexers import (
get_document_indexer,
get_visited_document_ids_of,
@@ -527,6 +525,28 @@ class DocumentViewSet(
"IN SHARE ROW EXCLUSIVE MODE;"
)
# Remove file from validated_data as it's not a model field
# Process it if present
uploaded_file = serializer.validated_data.pop("file", None)
# If a file is uploaded, convert it to Yjs format and set as content
if uploaded_file:
try:
file_content = uploaded_file.read()
converter = Converter()
converted_content = converter.convert(
file_content,
content_type=uploaded_file.content_type,
accept=mime_types.YJS
)
serializer.validated_data["content"] = converted_content
serializer.validated_data["title"] = uploaded_file.name
except ConversionError as err:
raise drf.exceptions.ValidationError(
{"file": ["Could not convert file content"]}
) from err
obj = models.Document.add_root(
creator=self.request.user,
**serializer.validated_data,
@@ -1881,14 +1901,14 @@ class DocumentViewSet(
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YdocConverter()
yprovider = Converter()
result = yprovider.convert(
base64.b64decode(base64_content),
"application/vnd.yjs.doc",
mime_types.YJS,
{
"markdown": "text/markdown",
"html": "text/html",
"json": "application/json",
"markdown": mime_types.MARKDOWN,
"html": mime_types.HTML,
"json": mime_types.JSON,
}[content_format],
)
content = result