(api) add API route to fetch document content

This allows API users to process document content, enabling the
use of Docs as a headless CMS for instance, or any kind of document
processing. Fixes #1206.
This commit is contained in:
Sylvain Zimmer
2025-07-24 02:31:50 +02:00
committed by Manuel Raynaud
parent 0ac9f059b6
commit 8a8a1460e5
19 changed files with 687 additions and 74 deletions

View File

@@ -15,9 +15,9 @@ from rest_framework import serializers
from core import choices, enums, models, utils, validators
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (
from core.services.yprovider_services import (
ConversionError,
YdocConverter,
YProviderAPI,
)
@@ -451,7 +451,7 @@ class ServerCreateDocumentSerializer(serializers.Serializer):
language = user.language or language
try:
document_content = YdocConverter().convert(validated_data["content"])
document_content = YProviderAPI().convert(validated_data["content"])
except ConversionError as err:
raise serializers.ValidationError(
{"content": ["Could not convert content"]}

View File

@@ -37,6 +37,15 @@ from rest_framework.permissions import AllowAny
from core import authentication, choices, enums, models
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.yprovider_services import (
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.yprovider_services import (
ValidationError as YProviderValidationError,
)
from core.services.yprovider_services import (
YProviderAPI,
)
from core.tasks.mail import send_ask_for_access_mail
from core.utils import extract_attachments, filter_descendants
@@ -1494,6 +1503,61 @@ class DocumentViewSet(
status=status.HTTP_400_BAD_REQUEST,
)
@drf.decorators.action(
detail=True,
methods=["get"],
url_path="content",
name="Get document content in different formats",
)
def content(self, request, pk=None):
"""
Retrieve document content in different formats (JSON, Markdown, HTML).
Query parameters:
- content_format: The desired output format (json, markdown, html)
Returns:
JSON response with content in the specified format.
"""
document = self.get_object()
content_format = request.query_params.get("content_format", "json").lower()
if content_format not in {"json", "markdown", "html"}:
raise drf.exceptions.ValidationError(
"Invalid format. Must be one of: json, markdown, html"
)
# Get the base64 content from the document
content = None
base64_content = document.content
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YProviderAPI()
result = yprovider.content(base64_content, content_format)
content = result["content"]
except YProviderValidationError as e:
return drf_response.Response(
{"error": str(e)}, status=status.HTTP_400_BAD_REQUEST
)
except YProviderServiceUnavailableError as e:
logger.error("Error getting content for document %s: %s", pk, e)
return drf_response.Response(
{"error": "Failed to get document content"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
return drf_response.Response(
{
"id": str(document.id),
"title": document.title,
"content": content,
"created_at": document.created_at,
"updated_at": document.updated_at,
}
)
class DocumentAccessViewSet(
ResourceAccessViewsetMixin,