♻️(backend) raw payloads on convert endpoint

Handle the raw payloads in requests and responses to convert-endpoint.

This change replaces Base64-encoded I/O with direct binary streaming,
yielding several benefits:
- **Network efficiency**: Eliminates the ~33% size inflation of Base64,
cutting bandwidth and latency.
- **Memory savings**: Enables piping DOCX (already compressed) buffers
straight to DocSpec API without holding, encoding and decoding multi-MB
payload in RAM.

Signed-off-by: Stephan Meijer <me@stephanmeijer.com>
This commit is contained in:
Stephan Meijer
2025-07-04 13:29:21 +02:00
parent fde520a6f3
commit 78a6772bab
2 changed files with 12 additions and 72 deletions

View File

@@ -1,5 +1,7 @@
"""Converter services."""
from base64 import b64encode
from django.conf import settings
import requests
@@ -17,14 +19,6 @@ class ServiceUnavailableError(ConversionError):
"""Raised when the conversion service is unavailable."""
class InvalidResponseError(ConversionError):
"""Raised when the conversion service returns an invalid response."""
class MissingContentError(ConversionError):
"""Raised when the response is missing required content."""
class YdocConverter:
"""Service class for conversion-related operations."""
@@ -43,36 +37,17 @@ class YdocConverter:
try:
response = requests.post(
f"{settings.Y_PROVIDER_API_BASE_URL}{settings.CONVERSION_API_ENDPOINT}/",
json={
"content": text,
},
data=text,
headers={
"Authorization": self.auth_header,
"Content-Type": "application/json",
"Content-Type": "text/markdown",
},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
)
response.raise_for_status()
conversion_response = response.json()
return b64encode(response.content).decode("utf-8")
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to conversion service",
) from err
except ValueError as err:
raise InvalidResponseError(
"Could not parse conversion service response"
) from err
try:
document_content = conversion_response[
settings.CONVERSION_API_CONTENT_FIELD
]
except KeyError as err:
raise MissingContentError(
f"Response missing required field: {settings.CONVERSION_API_CONTENT_FIELD}"
) from err
return document_content