️(AI) improve formating of ai translation

The ai translation were quite lossy about formatting.
Colors, background, breaklines, table sizes were
lost in the translation.
We improve the AI translation request to keep
the formatting as close as possible by using
html instead of markdown.
This commit is contained in:
Anthony LC
2025-03-12 10:14:47 +01:00
committed by Anthony LC
parent 9176328200
commit cbf9091d1c
6 changed files with 99 additions and 145 deletions

View File

@@ -56,6 +56,7 @@ and this project adheres to
- 📝(doc) minor README.md formatting and wording enhancements
-Stop setting a default title on doc creation #634
- ♻️(frontend) misc ui improvements #644
- ♻️(frontend) Improve AI translations #478
## Fixed

View File

@@ -1,8 +1,5 @@
"""AI services."""
import json
import re
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
@@ -12,32 +9,34 @@ from core import enums
AI_ACTIONS = {
"prompt": (
"Answer the prompt in markdown format. Return JSON: "
'{"answer": "Your markdown answer"}. '
"Do not provide any other information."
"Answer the prompt in markdown format. "
"Preserve the language and markdown formatting. "
"Do not provide any other information. "
"Preserve the language."
),
"correct": (
"Correct grammar and spelling of the markdown text, "
"preserving language and markdown formatting. "
'Return JSON: {"answer": "your corrected markdown text"}. '
"Do not provide any other information."
"Do not provide any other information. "
"Preserve the language."
),
"rephrase": (
"Rephrase the given markdown text, "
"preserving language and markdown formatting. "
'Return JSON: {"answer": "your rephrased markdown text"}. '
"Do not provide any other information."
"Do not provide any other information. "
"Preserve the language."
),
"summarize": (
"Summarize the markdown text, preserving language and markdown formatting. "
'Return JSON: {"answer": "your markdown summary"}. '
"Do not provide any other information."
"Do not provide any other information. "
"Preserve the language."
),
}
AI_TRANSLATE = (
"Translate the markdown text to {language:s}, preserving markdown formatting. "
'Return JSON: {{"answer": "your translated markdown text in {language:s}"}}. '
"Keep the same html stucture and formatting. "
"Translate the content in the html to the specified language {language:s}. "
"Check the translation for accuracy and make any necessary corrections. "
"Do not provide any other information."
)
@@ -59,32 +58,18 @@ class AIService:
"""Helper method to call the OpenAI API and process the response."""
response = self.client.chat.completions.create(
model=settings.AI_MODEL,
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": system_content},
{"role": "user", "content": json.dumps({"markdown_input": text})},
{"role": "user", "content": text},
],
)
content = response.choices[0].message.content
try:
sanitized_content = re.sub(r'\s*"answer"\s*:\s*', '"answer": ', content)
sanitized_content = re.sub(r"\s*\}", "}", sanitized_content)
sanitized_content = re.sub(r"(?<!\\)\n", "\\\\n", sanitized_content)
sanitized_content = re.sub(r"(?<!\\)\t", "\\\\t", sanitized_content)
json_response = json.loads(sanitized_content)
except (json.JSONDecodeError, IndexError):
try:
json_response = json.loads(content)
except json.JSONDecodeError as err:
raise RuntimeError("AI response is not valid JSON", content) from err
if "answer" not in json_response:
if not content:
raise RuntimeError("AI response does not contain an answer")
return json_response
return {"answer": content}
def transform(self, text, action):
"""Transform text based on specified action."""

View File

@@ -71,9 +71,8 @@ def test_api_documents_ai_transform_anonymous_success(mock_create):
"""
document = factories.DocumentFactory(link_reach="public", link_role="editor")
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-transform/"
@@ -83,17 +82,15 @@ def test_api_documents_ai_transform_anonymous_success(mock_create):
assert response.json() == {"answer": "Salut"}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Summarize the markdown text, preserving language and markdown formatting. "
'Return JSON: {"answer": "your markdown summary"}. Do not provide any other '
"information."
"Do not provide any other information. Preserve the language."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": "Hello"},
],
)
@@ -170,9 +167,8 @@ def test_api_documents_ai_transform_authenticated_success(mock_create, reach, ro
document = factories.DocumentFactory(link_reach=reach, link_role=role)
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-transform/"
@@ -182,16 +178,15 @@ def test_api_documents_ai_transform_authenticated_success(mock_create, reach, ro
assert response.json() == {"answer": "Salut"}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
'Answer the prompt in markdown format. Return JSON: {"answer": '
'"Your markdown answer"}. Do not provide any other information.'
"Answer the prompt in markdown format. Preserve the language and markdown "
"formatting. Do not provide any other information. Preserve the language."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": "Hello"},
],
)
@@ -246,9 +241,8 @@ def test_api_documents_ai_transform_success(mock_create, via, role, mock_user_te
document=document, team="lasuite", role=role
)
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-transform/"
@@ -258,16 +252,15 @@ def test_api_documents_ai_transform_success(mock_create, via, role, mock_user_te
assert response.json() == {"answer": "Salut"}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
'Answer the prompt in markdown format. Return JSON: {"answer": '
'"Your markdown answer"}. Do not provide any other information.'
"Answer the prompt in markdown format. Preserve the language and markdown "
"formatting. Do not provide any other information. Preserve the language."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": "Hello"},
],
)
@@ -315,9 +308,8 @@ def test_api_documents_ai_transform_throttling_document(mock_create):
client = APIClient()
document = factories.DocumentFactory(link_reach="public", link_role="editor")
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-transform/"
@@ -350,9 +342,8 @@ def test_api_documents_ai_transform_throttling_user(mock_create):
client = APIClient()
client.force_login(user)
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
for _ in range(3):

View File

@@ -91,29 +91,28 @@ def test_api_documents_ai_translate_anonymous_success(mock_create):
"""
document = factories.DocumentFactory(link_reach="public", link_role="editor")
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Ola"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
response = APIClient().post(url, {"text": "Hello", "language": "es"})
assert response.status_code == 200
assert response.json() == {"answer": "Salut"}
assert response.json() == {"answer": "Ola"}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Translate the markdown text to Spanish, preserving markdown formatting. "
'Return JSON: {"answer": "your translated markdown text in Spanish"}. '
"Keep the same html stucture and formatting. "
"Translate the content in the html to the specified language Spanish. "
"Check the translation for accuracy and make any necessary corrections. "
"Do not provide any other information."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": "Hello"},
],
)
@@ -190,9 +189,8 @@ def test_api_documents_ai_translate_authenticated_success(mock_create, reach, ro
document = factories.DocumentFactory(link_reach=reach, link_role=role)
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
@@ -202,18 +200,18 @@ def test_api_documents_ai_translate_authenticated_success(mock_create, reach, ro
assert response.json() == {"answer": "Salut"}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Translate the markdown text to Colombian Spanish, "
"preserving markdown formatting. Return JSON: "
'{"answer": "your translated markdown text in Colombian Spanish"}. '
"Keep the same html stucture and formatting. "
"Translate the content in the html to the "
"specified language Colombian Spanish. "
"Check the translation for accuracy and make any necessary corrections. "
"Do not provide any other information."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": "Hello"},
],
)
@@ -268,9 +266,8 @@ def test_api_documents_ai_translate_success(mock_create, via, role, mock_user_te
document=document, team="lasuite", role=role
)
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
@@ -280,18 +277,18 @@ def test_api_documents_ai_translate_success(mock_create, via, role, mock_user_te
assert response.json() == {"answer": "Salut"}
mock_create.assert_called_once_with(
model="llama",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": (
"Translate the markdown text to Colombian Spanish, "
"preserving markdown formatting. Return JSON: "
'{"answer": "your translated markdown text in Colombian Spanish"}. '
"Keep the same html stucture and formatting. "
"Translate the content in the html to the "
"specified language Colombian Spanish. "
"Check the translation for accuracy and make any necessary corrections. "
"Do not provide any other information."
),
},
{"role": "user", "content": '{"markdown_input": "Hello"}'},
{"role": "user", "content": "Hello"},
],
)
@@ -339,9 +336,8 @@ def test_api_documents_ai_translate_throttling_document(mock_create):
client = APIClient()
document = factories.DocumentFactory(link_reach="public", link_role="editor")
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
url = f"/api/v1.0/documents/{document.id!s}/ai-translate/"
@@ -374,9 +370,8 @@ def test_api_documents_ai_translate_throttling_user(mock_create):
client = APIClient()
client.force_login(user)
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
for _ in range(3):

View File

@@ -2,7 +2,6 @@
Test ai API endpoints in the impress core app.
"""
import json
from unittest.mock import MagicMock, patch
from django.core.exceptions import ImproperlyConfigured
@@ -58,9 +57,8 @@ def test_api_ai__client_error(mock_create):
def test_api_ai__client_invalid_response(mock_create):
"""Fail when the client response is invalid"""
answer = {"no_answer": "This is an invalid response"}
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=json.dumps(answer)))]
choices=[MagicMock(message=MagicMock(content=None))]
)
with pytest.raises(
@@ -77,49 +75,10 @@ def test_api_ai__client_invalid_response(mock_create):
def test_api_ai__success(mock_create):
"""The AI request should work as expect when called with valid arguments."""
answer = '{"answer": "Salut"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
choices=[MagicMock(message=MagicMock(content="Salut"))]
)
response = AIService().transform("hello", "prompt")
assert response == {"answer": "Salut"}
@override_settings(
AI_BASE_URL="http://example.com", AI_API_KEY="test-key", AI_MODEL="test-model"
)
@patch("openai.resources.chat.completions.Completions.create")
def test_api_ai__success_sanitize(mock_create):
"""The AI response should be sanitized"""
answer = '{"answer": "Salut\\n \tle \nmonde"}'
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
)
response = AIService().transform("hello", "prompt")
assert response == {"answer": "Salut\n \tle \nmonde"}
@override_settings(
AI_BASE_URL="http://example.com", AI_API_KEY="test-key", AI_MODEL="test-model"
)
@patch("openai.resources.chat.completions.Completions.create")
def test_api_ai__success_when_sanitize_fails(mock_create):
"""The AI request should work as expected even with badly formatted response."""
# pylint: disable=C0303
answer = """{
"answer" :
"Salut le monde"
}"""
mock_create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=answer))]
)
response = AIService().transform("hello", "prompt")
assert response == {"answer": "Salut le monde"}

View File

@@ -1,3 +1,4 @@
import { Block } from '@blocknote/core';
import {
ComponentProps,
useBlockNoteEditor,
@@ -225,14 +226,23 @@ const AIMenuItemTransform = ({
icon,
}: PropsWithChildren<AIMenuItemTransform>) => {
const { mutateAsync: requestAI, isPending } = useDocAITransform();
const editor = useBlockNoteEditor();
const requestAIAction = async (selectedBlocks: Block[]) => {
const text = await editor.blocksToMarkdownLossy(selectedBlocks);
const requestAIAction = async (markdown: string) => {
const responseAI = await requestAI({
text: markdown,
text,
action,
docId,
});
return responseAI.answer;
if (!responseAI?.answer) {
throw new Error('No response from AI');
}
const markdown = await editor.tryParseMarkdownToBlocks(responseAI.answer);
editor.replaceBlocks(selectedBlocks, markdown);
};
return (
@@ -255,14 +265,35 @@ const AIMenuItemTranslate = ({
language,
}: PropsWithChildren<AIMenuItemTranslate>) => {
const { mutateAsync: requestAI, isPending } = useDocAITranslate();
const editor = useBlockNoteEditor();
const requestAITranslate = async (selectedBlocks: Block[]) => {
let fullHtml = '';
for (const block of selectedBlocks) {
if (Array.isArray(block.content) && block.content.length === 0) {
fullHtml += '<p><br/></p>';
continue;
}
fullHtml += await editor.blocksToHTMLLossy([block]);
}
const requestAITranslate = async (markdown: string) => {
const responseAI = await requestAI({
text: markdown,
text: fullHtml,
language,
docId,
});
return responseAI.answer;
if (!responseAI || !responseAI.answer) {
throw new Error('No response from AI');
}
try {
const blocks = await editor.tryParseHTMLToBlocks(responseAI.answer);
editor.replaceBlocks(selectedBlocks, blocks);
} catch {
editor.replaceBlocks(selectedBlocks, selectedBlocks);
}
};
return (
@@ -277,7 +308,7 @@ const AIMenuItemTranslate = ({
};
interface AIMenuItemProps {
requestAI: (markdown: string) => Promise<string>;
requestAI: (blocks: Block[]) => Promise<void>;
isPending: boolean;
icon?: ReactNode;
}
@@ -289,32 +320,24 @@ const AIMenuItem = ({
icon,
}: PropsWithChildren<AIMenuItemProps>) => {
const Components = useComponentsContext();
const { toast } = useToastProvider();
const { t } = useTranslation();
const editor = useBlockNoteEditor();
const handleAIError = useHandleAIError();
const handleAIAction = async () => {
let selectedBlocks = editor.getSelection()?.blocks;
const selectedBlocks = editor.getSelection()?.blocks ?? [
editor.getTextCursorPosition().block,
];
if (!selectedBlocks || selectedBlocks.length === 0) {
selectedBlocks = [editor.getTextCursorPosition().block];
if (!selectedBlocks || selectedBlocks.length === 0) {
return;
}
if (!selectedBlocks?.length) {
toast(t('No text selected'), VariantType.WARNING);
return;
}
const markdown = await editor.blocksToMarkdownLossy(selectedBlocks);
try {
const responseAI = await requestAI(markdown);
if (!responseAI) {
return;
}
const blockMarkdown = await editor.tryParseMarkdownToBlocks(responseAI);
editor.replaceBlocks(selectedBlocks, blockMarkdown);
await requestAI(selectedBlocks);
} catch (error) {
handleAIError(error);
}