♻️(backend) raw payloads on convert endpoint

Handle the raw payloads in requests and responses to convert-endpoint.

This change replaces Base64-encoded I/O with direct binary streaming,
yielding several benefits:
- **Network efficiency**: Eliminates the ~33% size inflation of Base64,
cutting bandwidth and latency.
- **Memory savings**: Enables piping DOCX (already compressed) buffers
straight to DocSpec API without holding, encoding and decoding multi-MB
payload in RAM.

Signed-off-by: Stephan Meijer <me@stephanmeijer.com>
This commit is contained in:
Stephan Meijer
2025-07-04 13:29:21 +02:00
parent fde520a6f3
commit 78a6772bab
2 changed files with 12 additions and 72 deletions

View File

@@ -1,5 +1,7 @@
"""Converter services."""
from base64 import b64encode
from django.conf import settings
import requests
@@ -17,14 +19,6 @@ class ServiceUnavailableError(ConversionError):
"""Raised when the conversion service is unavailable."""
class InvalidResponseError(ConversionError):
"""Raised when the conversion service returns an invalid response."""
class MissingContentError(ConversionError):
"""Raised when the response is missing required content."""
class YdocConverter:
"""Service class for conversion-related operations."""
@@ -43,36 +37,17 @@ class YdocConverter:
try:
response = requests.post(
f"{settings.Y_PROVIDER_API_BASE_URL}{settings.CONVERSION_API_ENDPOINT}/",
json={
"content": text,
},
data=text,
headers={
"Authorization": self.auth_header,
"Content-Type": "application/json",
"Content-Type": "text/markdown",
},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
)
response.raise_for_status()
conversion_response = response.json()
return b64encode(response.content).decode("utf-8")
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to conversion service",
) from err
except ValueError as err:
raise InvalidResponseError(
"Could not parse conversion service response"
) from err
try:
document_content = conversion_response[
settings.CONVERSION_API_CONTENT_FIELD
]
except KeyError as err:
raise MissingContentError(
f"Response missing required field: {settings.CONVERSION_API_CONTENT_FIELD}"
) from err
return document_content

View File

@@ -1,13 +1,12 @@
"""Test converter services."""
from base64 import b64decode
from unittest.mock import MagicMock, patch
import pytest
import requests
from core.services.converter_services import (
InvalidResponseError,
MissingContentError,
ServiceUnavailableError,
ValidationError,
YdocConverter,
@@ -58,41 +57,6 @@ def test_convert_http_error(mock_post):
converter.convert("test text")
@patch("requests.post")
def test_convert_invalid_json_response(mock_post):
"""Should raise InvalidResponseError when response is not valid JSON."""
converter = YdocConverter()
mock_response = MagicMock()
mock_response.json.side_effect = ValueError("Invalid JSON")
mock_post.return_value = mock_response
with pytest.raises(
InvalidResponseError,
match="Could not parse conversion service response",
):
converter.convert("test text")
@patch("requests.post")
def test_convert_missing_content_field(mock_post, settings):
"""Should raise MissingContentError when response is missing required field."""
settings.CONVERSION_API_CONTENT_FIELD = "expected_field"
converter = YdocConverter()
mock_response = MagicMock()
mock_response.json.return_value = {"wrong_field": "content"}
mock_post.return_value = mock_response
with pytest.raises(
MissingContentError,
match="Response missing required field: expected_field",
):
converter.convert("test text")
@patch("requests.post")
def test_convert_full_integration(mock_post, settings):
"""Test full integration with all settings."""
@@ -105,20 +69,21 @@ def test_convert_full_integration(mock_post, settings):
converter = YdocConverter()
expected_content = {"converted": "content"}
expected_content = b"converted content"
mock_response = MagicMock()
mock_response.json.return_value = {"content": expected_content}
mock_response.content = expected_content
mock_post.return_value = mock_response
result = converter.convert("test markdown")
assert result == expected_content
assert b64decode(result) == expected_content
mock_post.assert_called_once_with(
"http://test.com/conversion-endpoint/",
json={"content": "test markdown"},
data="test markdown",
headers={
"Authorization": "test-key",
"Content-Type": "application/json",
"Content-Type": "text/markdown",
},
timeout=5,
verify=False,