(summary) add localization support for transcription context text

Transcription and summarization results were always generated
using a French text structure (e.g. "Réunion du..."), regardless
of user preference or meeting language. Introduced basic localization
support to adapt generated string languages.
This commit is contained in:
leo
2026-02-25 17:50:24 +01:00
committed by aleb_the_flash
parent cd0cec78ba
commit f5e0ddf692
13 changed files with 223 additions and 48 deletions

View File

@@ -11,6 +11,7 @@ and this project adheres to
### Added
- 👷(docker) add arm64 platform support for image builds
- ✨(summary) add localization support for transcription context text
### Changed

View File

@@ -2,6 +2,7 @@
Gitlint extra rule to validate that the message title is of the form
"<gitmoji>(<scope>) <subject>"
"""
from __future__ import unicode_literals
import re

View File

@@ -167,6 +167,7 @@ class NotificationService:
owner_access.user.timezone
).strftime("%H:%M"),
"download_link": f"{get_recording_download_base_url()}/{recording.id}",
"context_language": owner_access.user.language,
}
headers = {

View File

@@ -15,8 +15,8 @@ from summary.core.config import get_settings
settings = get_settings()
class TaskCreation(BaseModel):
"""Task data."""
class TranscribeSummarizeTaskCreation(BaseModel):
"""Transcription and summarization parameters."""
owner_id: str
filename: str
@@ -28,6 +28,7 @@ class TaskCreation(BaseModel):
recording_time: Optional[str]
language: Optional[str]
download_link: Optional[str]
context_language: Optional[str] = None
@field_validator("language")
@classmethod
@@ -45,8 +46,8 @@ router = APIRouter(prefix="/tasks")
@router.post("/")
async def create_task(request: TaskCreation):
"""Create a task."""
async def create_transcribe_summarize_task(request: TranscribeSummarizeTaskCreation):
"""Create a transcription and summarization task."""
task = process_audio_transcribe_summarize_v2.apply_async(
args=[
request.owner_id,
@@ -59,6 +60,7 @@ async def create_task(request: TaskCreation):
request.recording_time,
request.language,
request.download_link,
request.context_language,
],
queue=settings.transcribe_queue,
)

View File

@@ -18,6 +18,7 @@ from summary.core.analytics import MetadataManager, get_analytics
from summary.core.config import get_settings
from summary.core.file_service import FileService, FileServiceException
from summary.core.llm_service import LLMException, LLMObservability, LLMService
from summary.core.locales import get_locale
from summary.core.prompt import (
FORMAT_NEXT_STEPS,
FORMAT_PLAN,
@@ -121,6 +122,7 @@ def process_audio_transcribe_summarize_v2(
recording_time: Optional[str],
language: Optional[str],
download_link: Optional[str],
context_language: Optional[str] = None,
):
"""Process an audio file by transcribing it and generating a summary.
@@ -129,6 +131,19 @@ def process_audio_transcribe_summarize_v2(
2. Transcribes the audio using WhisperX model
3. Sends the results via webhook
Args:
self: Celery task instance (passed on with bind=True)
owner_id: Unique identifier of the recording owner.
filename: Name of the audio file in MinIO storage.
email: Email address of the recording owner.
sub: OIDC subject identifier of the recording owner.
received_at: Unix timestamp when the recording was received.
room: room name where the recording took place.
recording_date: Date of the recording (localized display string).
recording_time: Time of the recording (localized display string).
language: ISO 639-1 language code for transcription.
download_link: URL to download the original recording.
context_language: ISO 639-1 language code of the meeting summary context text.
"""
logger.info(
"Notification received | Owner: %s | Room: %s",
@@ -145,6 +160,7 @@ def process_audio_transcribe_summarize_v2(
max_retries=settings.whisperx_max_retries,
)
# Transcription
try:
with (
file_service.prepare_audio_file(filename) as (audio_file, metadata),
@@ -183,7 +199,10 @@ def process_audio_transcribe_summarize_v2(
metadata_manager.track_transcription_metadata(task_id, transcription)
formatter = TranscriptFormatter()
# For locale of context, use in decreasing priority context_language,
# language (of meeting), default context language
locale = get_locale(context_language, language)
formatter = TranscriptFormatter(locale)
content, title = formatter.format(
transcription,
@@ -221,6 +240,7 @@ def process_audio_transcribe_summarize_v2(
metadata_manager.capture(task_id, settings.posthog_event_success)
# LLM Summarization
if (
analytics.is_feature_enabled("summary-enabled", distinct_id=owner_id)
and settings.is_summary_enabled
@@ -336,9 +356,7 @@ def summarize_transcription(
summary = tldr + "\n\n" + cleaned_summary + "\n\n" + next_steps
data = {
"title": settings.summary_title_template.format(
title=title,
),
"title": settings.summary_title_template.format(title=title),
"content": summary,
"email": email,
"sub": sub,

View File

@@ -1,7 +1,7 @@
"""Application configuration and settings."""
from functools import lru_cache
from typing import Annotated, List, Optional, Set
from typing import Annotated, List, Literal, Optional, Set
from fastapi import Depends
from pydantic import SecretStr
@@ -51,7 +51,6 @@ class Settings(BaseSettings):
# Transcription processing
hallucination_patterns: List[str] = ["Vap'n'Roll Thierry"]
hallucination_replacement_text: str = "[Texte impossible à transcrire]"
# Webhook-related settings
webhook_max_retries: int = 2
@@ -60,11 +59,10 @@ class Settings(BaseSettings):
webhook_api_token: SecretStr
webhook_url: str
# Locale
default_context_language: Literal["de", "en", "fr", "nl"] = "fr"
# Output related settings
document_default_title: Optional[str] = "Transcription"
document_title_template: Optional[str] = (
'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
)
summary_title_template: Optional[str] = "Résumé de {title}"
# Summary related settings

View File

@@ -0,0 +1,30 @@
"""Locale support for the summary service."""
from typing import Optional
from summary.core.config import get_settings
from summary.core.locales import de, en, fr, nl
from summary.core.locales.strings import LocaleStrings
_LOCALES = {"fr": fr, "en": en, "de": de, "nl": nl}
def get_locale(*languages: Optional[str]) -> LocaleStrings:
"""Return locale strings for the first matching language candidate.
Accept language codes in decreasing priority order and return the
locale for the first one that matches a known locale.
Fall back to the configured default_context_language.
"""
for lang in languages:
if not lang:
continue
if lang in _LOCALES:
return _LOCALES[lang].STRINGS
# Provide fallback for longer formats of ISO 639-1 (e.g. "en-au" -> "en")
base_lang = lang.split("-")[0]
if base_lang in _LOCALES:
return _LOCALES[base_lang].STRINGS
return _LOCALES[get_settings().default_context_language].STRINGS

View File

@@ -0,0 +1,34 @@
"""German locale strings."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**In Ihrer Transkription wurde kein Audioinhalt erkannt.**
*Wenn Sie glauben, dass es sich um einen Fehler handelt, zögern Sie nicht,
unseren technischen Support zu kontaktieren: visio@numerique.gouv.fr*
.
.
.
Einige Punkte, die wir Ihnen empfehlen zu überprüfen:
- War ein Mikrofon aktiviert?
- Waren Sie nah genug am Mikrofon?
- Ist das Mikrofon von guter Qualität?
- Dauert die Aufnahme länger als 30 Sekunden?
""",
download_header_template=(
"\n*Laden Sie Ihre Aufnahme herunter, "
"indem Sie [diesem Link folgen]({download_link})*\n"
),
hallucination_replacement_text="[Text konnte nicht transkribiert werden]",
document_default_title="Transkription",
document_title_template=(
'Besprechung "{room}" am {room_recording_date} um {room_recording_time}'
),
)

View File

@@ -0,0 +1,33 @@
"""English locale strings."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**No audio content was detected in your transcription.**
*If you believe this is an error, please do not hesitate to contact
our technical support: visio@numerique.gouv.fr*
.
.
.
A few things we recommend you check:
- Was a microphone enabled?
- Were you close enough to the microphone?
- Is the microphone of good quality?
- Is the recording longer than 30 seconds?
""",
download_header_template=(
"\n*Download your recording by [following this link]({download_link})*\n"
),
hallucination_replacement_text="[Unable to transcribe text]",
document_default_title="Transcription",
document_title_template=(
'Meeting "{room}" on {room_recording_date} at {room_recording_time}'
),
)

View File

@@ -0,0 +1,33 @@
"""French locale strings (default)."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**Aucun contenu audio n'a été détecté dans votre transcription.**
*Si vous pensez qu'il s'agit d'une erreur, n'hésitez pas à contacter
notre support technique : visio@numerique.gouv.fr*
.
.
.
Quelques points que nous vous conseillons de vérifier :
- Un micro était-il activé ?
- Étiez-vous suffisamment proche ?
- Le micro est-il de bonne qualité ?
- L'enregistrement dure-t-il plus de 30 secondes ?
""",
download_header_template=(
"\n*Télécharger votre enregistrement en [suivant ce lien]({download_link})*\n"
),
hallucination_replacement_text="[Texte impossible à transcrire]",
document_default_title="Transcription",
document_title_template=(
'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
),
)

View File

@@ -0,0 +1,33 @@
"""Dutch locale strings."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**Er is geen audio-inhoud gedetecteerd in uw transcriptie.**
*Als u denkt dat dit een fout is, aarzel dan niet om contact op te nemen
met onze technische ondersteuning: visio@numerique.gouv.fr*
.
.
.
Een paar punten die wij u aanraden te controleren:
- Was er een microfoon ingeschakeld?
- Was u dicht genoeg bij de microfoon?
- Is de microfoon van goede kwaliteit?
- Duurt de opname langer dan 30 seconden?
""",
download_header_template=(
"\n*Download uw opname door [deze link te volgen]({download_link})*\n"
),
hallucination_replacement_text="[Tekst kon niet worden getranscribeerd]",
document_default_title="Transcriptie",
document_title_template=(
'Vergadering "{room}" op {room_recording_date} om {room_recording_time}'
),
)

View File

@@ -0,0 +1,15 @@
"""Locale types for the summary service."""
from dataclasses import dataclass
@dataclass(frozen=True)
class LocaleStrings:
"""All translatable output strings for the summary pipeline."""
# transcript_formatter.py
empty_transcription: str
download_header_template: str
hallucination_replacement_text: str
document_default_title: str
document_title_template: str

View File

@@ -4,34 +4,13 @@ import logging
from typing import Optional, Tuple
from summary.core.config import get_settings
from summary.core.locales import LocaleStrings
settings = get_settings()
logger = logging.getLogger(__name__)
DEFAULT_EMPTY_TRANSCRIPTION = """
**Aucun contenu audio na été détecté dans votre transcription.**
*Si vous pensez quil sagit dune erreur, nhésitez pas à contacter
notre support technique : visio@numerique.gouv.fr*
.
.
.
Quelques points que nous vous conseillons de vérifier :
- Un micro était-il activé ?
- Étiez-vous suffisamment proche ?
- Le micro est-il de bonne qualité ?
- Lenregistrement dure-t-il plus de 30 secondes ?
"""
class TranscriptFormatter:
"""Formats WhisperX transcription output into readable conversation format.
@@ -42,12 +21,10 @@ class TranscriptFormatter:
- Generating descriptive titles from context
"""
def __init__(self):
"""Initialize formatter with settings."""
def __init__(self, locale: LocaleStrings):
"""Initialize formatter with settings and locale."""
self.hallucination_patterns = settings.hallucination_patterns
self.hallucination_replacement_text = settings.hallucination_replacement_text
self.default_title = settings.document_default_title
self.default_empty_transcription = DEFAULT_EMPTY_TRANSCRIPTION
self._locale = locale
def _get_segments(self, transcription):
"""Extract segments from transcription object or dictionary."""
@@ -71,7 +48,7 @@ class TranscriptFormatter:
segments = self._get_segments(transcription)
if not segments:
content = self.default_empty_transcription
content = self._locale.empty_transcription
else:
content = self._format_speaker(segments)
content = self._remove_hallucinations(content)
@@ -83,7 +60,7 @@ class TranscriptFormatter:
def _remove_hallucinations(self, content: str) -> str:
"""Remove hallucination patterns from content."""
replacement = self.hallucination_replacement_text or ""
replacement = self._locale.hallucination_replacement_text or ""
for pattern in self.hallucination_patterns:
content = content.replace(pattern, replacement)
@@ -111,9 +88,8 @@ class TranscriptFormatter:
if not download_link:
return content
header = (
f"\n*Télécharger votre enregistrement "
f"en [suivant ce lien]({download_link})*\n"
header = self._locale.download_header_template.format(
download_link=download_link
)
content = header + content
@@ -127,9 +103,9 @@ class TranscriptFormatter:
) -> str:
"""Generate title from context or return default."""
if not room or not recording_date or not recording_time:
return self.default_title
return self._locale.document_default_title
return settings.document_title_template.format(
return self._locale.document_title_template.format(
room=room,
room_recording_date=recording_date,
room_recording_time=recording_time,