✨(summary) add localization support for transcription context text

Transcription and summarization results were always generated using a French text structure (e.g. "Réunion du..."), regardless of user preference or meeting language. Introduced basic localization support to adapt generated string languages.
2026-02-25 17:50:24 +01:00
parent cd0cec78ba
commit f5e0ddf692
13 changed files with 223 additions and 48 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to
 ### Added
 - 👷(docker) add arm64 platform support for image builds
 - ✨(summary) add localization support for transcription context text
 ### Changed
--- a/gitlint/gitlint_emoji.py
+++ b/gitlint/gitlint_emoji.py
@@ -2,6 +2,7 @@
 Gitlint extra rule to validate that the message title is of the form
 "<gitmoji>(<scope>) <subject>"
 """
 from __future__ import unicode_literals
 import re
--- a/src/backend/core/recording/event/notification.py
+++ b/src/backend/core/recording/event/notification.py
@@ -167,6 +167,7 @@ class NotificationService:
                owner_access.user.timezone
            ).strftime("%H:%M"),
            "download_link": f"{get_recording_download_base_url()}/{recording.id}",
            "context_language": owner_access.user.language,
        }
        headers = {
--- a/src/summary/summary/api/route/tasks.py
+++ b/src/summary/summary/api/route/tasks.py
@@ -15,8 +15,8 @@ from summary.core.config import get_settings
 settings = get_settings()
-class TaskCreation(BaseModel):
+class TranscribeSummarizeTaskCreation(BaseModel):
-    """Task data."""
+    """Transcription and summarization parameters."""
    owner_id: str
    filename: str
@@ -28,6 +28,7 @@ class TaskCreation(BaseModel):
    recording_time: Optional[str]
    language: Optional[str]
    download_link: Optional[str]
    context_language: Optional[str] = None
    @field_validator("language")
    @classmethod
@@ -45,8 +46,8 @@ router = APIRouter(prefix="/tasks")
@router.post("/")
-async def create_task(request: TaskCreation):
+async def create_transcribe_summarize_task(request: TranscribeSummarizeTaskCreation):
-    """Create a task."""
+    """Create a transcription and summarization task."""
    task = process_audio_transcribe_summarize_v2.apply_async(
        args=[
            request.owner_id,
@@ -59,6 +60,7 @@ async def create_task(request: TaskCreation):
            request.recording_time,
            request.language,
            request.download_link,
            request.context_language,
        ],
        queue=settings.transcribe_queue,
    )
--- a/src/summary/summary/core/celery_worker.py
+++ b/src/summary/summary/core/celery_worker.py
@@ -18,6 +18,7 @@ from summary.core.analytics import MetadataManager, get_analytics
 from summary.core.config import get_settings
 from summary.core.file_service import FileService, FileServiceException
 from summary.core.llm_service import LLMException, LLMObservability, LLMService
 from summary.core.locales import get_locale
 from summary.core.prompt import (
    FORMAT_NEXT_STEPS,
    FORMAT_PLAN,
@@ -121,6 +122,7 @@ def process_audio_transcribe_summarize_v2(
    recording_time: Optional[str],
    language: Optional[str],
    download_link: Optional[str],
    context_language: Optional[str] = None,
 ):
    """Process an audio file by transcribing it and generating a summary.
@@ -129,6 +131,19 @@ def process_audio_transcribe_summarize_v2(
    2. Transcribes the audio using WhisperX model
    3. Sends the results via webhook
    Args:
        self: Celery task instance (passed on with bind=True)
        owner_id: Unique identifier of the recording owner.
        filename: Name of the audio file in MinIO storage.
        email: Email address of the recording owner.
        sub: OIDC subject identifier of the recording owner.
        received_at: Unix timestamp when the recording was received.
        room: room name where the recording took place.
        recording_date: Date of the recording (localized display string).
        recording_time: Time of the recording (localized display string).
        language: ISO 639-1 language code for transcription.
        download_link: URL to download the original recording.
        context_language: ISO 639-1 language code of the meeting summary context text.
    """
    logger.info(
        "Notification received | Owner: %s | Room: %s",
@@ -145,6 +160,7 @@ def process_audio_transcribe_summarize_v2(
        max_retries=settings.whisperx_max_retries,
    )
    # Transcription
    try:
        with (
            file_service.prepare_audio_file(filename) as (audio_file, metadata),
@@ -183,7 +199,10 @@ def process_audio_transcribe_summarize_v2(
    metadata_manager.track_transcription_metadata(task_id, transcription)
-    formatter = TranscriptFormatter()
+    # For locale of context, use in decreasing priority context_language,
    # language (of meeting), default context language
    locale = get_locale(context_language, language)
    formatter = TranscriptFormatter(locale)
    content, title = formatter.format(
        transcription,
@@ -221,6 +240,7 @@ def process_audio_transcribe_summarize_v2(
    metadata_manager.capture(task_id, settings.posthog_event_success)
    # LLM Summarization
    if (
        analytics.is_feature_enabled("summary-enabled", distinct_id=owner_id)
        and settings.is_summary_enabled
@@ -336,9 +356,7 @@ def summarize_transcription(
    summary = tldr + "\n\n" + cleaned_summary + "\n\n" + next_steps
    data = {
-        "title": settings.summary_title_template.format(
+        "title": settings.summary_title_template.format(title=title),
            title=title,
        ),
        "content": summary,
        "email": email,
        "sub": sub,
--- a/src/summary/summary/core/config.py
+++ b/src/summary/summary/core/config.py
@@ -1,7 +1,7 @@
 """Application configuration and settings."""
 from functools import lru_cache
-from typing import Annotated, List, Optional, Set
+from typing import Annotated, List, Literal, Optional, Set
 from fastapi import Depends
 from pydantic import SecretStr
@@ -51,7 +51,6 @@ class Settings(BaseSettings):
    # Transcription processing
    hallucination_patterns: List[str] = ["Vap'n'Roll Thierry"]
    hallucination_replacement_text: str = "[Texte impossible à transcrire]"
    # Webhook-related settings
    webhook_max_retries: int = 2
@@ -60,11 +59,10 @@ class Settings(BaseSettings):
    webhook_api_token: SecretStr
    webhook_url: str
    # Locale
    default_context_language: Literal["de", "en", "fr", "nl"] = "fr"
    # Output related settings
    document_default_title: Optional[str] = "Transcription"
    document_title_template: Optional[str] = (
        'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
    )
    summary_title_template: Optional[str] = "Résumé de {title}"
    # Summary related settings
--- a/src/summary/summary/core/locales/init.py
+++ b/src/summary/summary/core/locales/init.py
@@ -0,0 +1,30 @@
 """Locale support for the summary service."""
 from typing import Optional
 from summary.core.config import get_settings
 from summary.core.locales import de, en, fr, nl
 from summary.core.locales.strings import LocaleStrings
 _LOCALES = {"fr": fr, "en": en, "de": de, "nl": nl}
 def get_locale(*languages: Optional[str]) -> LocaleStrings:
    """Return locale strings for the first matching language candidate.
    Accept language codes in decreasing priority order and return the
    locale for the first one that matches a known locale.
    Fall back to the configured default_context_language.
    """
    for lang in languages:
        if not lang:
            continue
        if lang in _LOCALES:
            return _LOCALES[lang].STRINGS
        # Provide fallback for longer formats of ISO 639-1 (e.g. "en-au" -> "en")
        base_lang = lang.split("-")[0]
        if base_lang in _LOCALES:
            return _LOCALES[base_lang].STRINGS
    return _LOCALES[get_settings().default_context_language].STRINGS
--- a/src/summary/summary/core/locales/de.py
+++ b/src/summary/summary/core/locales/de.py
@@ -0,0 +1,34 @@
 """German locale strings."""
 from summary.core.locales.strings import LocaleStrings
 STRINGS = LocaleStrings(
    empty_transcription="""
 **In Ihrer Transkription wurde kein Audioinhalt erkannt.**
 *Wenn Sie glauben, dass es sich um einen Fehler handelt, zögern Sie nicht,
 unseren technischen Support zu kontaktieren: visio@numerique.gouv.fr*
 .
 .
 .
 Einige Punkte, die wir Ihnen empfehlen zu überprüfen:
 - War ein Mikrofon aktiviert?
 - Waren Sie nah genug am Mikrofon?
 - Ist das Mikrofon von guter Qualität?
 - Dauert die Aufnahme länger als 30 Sekunden?
 """,
    download_header_template=(
        "\n*Laden Sie Ihre Aufnahme herunter, "
        "indem Sie [diesem Link folgen]({download_link})*\n"
    ),
    hallucination_replacement_text="[Text konnte nicht transkribiert werden]",
    document_default_title="Transkription",
    document_title_template=(
        'Besprechung "{room}" am {room_recording_date} um {room_recording_time}'
    ),
 )
--- a/src/summary/summary/core/locales/en.py
+++ b/src/summary/summary/core/locales/en.py
@@ -0,0 +1,33 @@
 """English locale strings."""
 from summary.core.locales.strings import LocaleStrings
 STRINGS = LocaleStrings(
    empty_transcription="""
 **No audio content was detected in your transcription.**
 *If you believe this is an error, please do not hesitate to contact
 our technical support: visio@numerique.gouv.fr*
 .
 .
 .
 A few things we recommend you check:
 - Was a microphone enabled?
 - Were you close enough to the microphone?
 - Is the microphone of good quality?
 - Is the recording longer than 30 seconds?
 """,
    download_header_template=(
        "\n*Download your recording by [following this link]({download_link})*\n"
    ),
    hallucination_replacement_text="[Unable to transcribe text]",
    document_default_title="Transcription",
    document_title_template=(
        'Meeting "{room}" on {room_recording_date} at {room_recording_time}'
    ),
 )
--- a/src/summary/summary/core/locales/fr.py
+++ b/src/summary/summary/core/locales/fr.py
@@ -0,0 +1,33 @@
 """French locale strings (default)."""
 from summary.core.locales.strings import LocaleStrings
 STRINGS = LocaleStrings(
    empty_transcription="""
 **Aucun contenu audio n'a été détecté dans votre transcription.**
 *Si vous pensez qu'il s'agit d'une erreur, n'hésitez pas à contacter
 notre support technique : visio@numerique.gouv.fr*
 .
 .
 .
 Quelques points que nous vous conseillons de vérifier :
 - Un micro était-il activé ?
 - Étiez-vous suffisamment proche ?
 - Le micro est-il de bonne qualité ?
 - L'enregistrement dure-t-il plus de 30 secondes ?
 """,
    download_header_template=(
        "\n*Télécharger votre enregistrement en [suivant ce lien]({download_link})*\n"
    ),
    hallucination_replacement_text="[Texte impossible à transcrire]",
    document_default_title="Transcription",
    document_title_template=(
        'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
    ),
 )
--- a/src/summary/summary/core/locales/nl.py
+++ b/src/summary/summary/core/locales/nl.py
@@ -0,0 +1,33 @@
 """Dutch locale strings."""
 from summary.core.locales.strings import LocaleStrings
 STRINGS = LocaleStrings(
    empty_transcription="""
 **Er is geen audio-inhoud gedetecteerd in uw transcriptie.**
 *Als u denkt dat dit een fout is, aarzel dan niet om contact op te nemen
 met onze technische ondersteuning: visio@numerique.gouv.fr*
 .
 .
 .
 Een paar punten die wij u aanraden te controleren:
 - Was er een microfoon ingeschakeld?
 - Was u dicht genoeg bij de microfoon?
 - Is de microfoon van goede kwaliteit?
 - Duurt de opname langer dan 30 seconden?
 """,
    download_header_template=(
        "\n*Download uw opname door [deze link te volgen]({download_link})*\n"
    ),
    hallucination_replacement_text="[Tekst kon niet worden getranscribeerd]",
    document_default_title="Transcriptie",
    document_title_template=(
        'Vergadering "{room}" op {room_recording_date} om {room_recording_time}'
    ),
 )
--- a/src/summary/summary/core/locales/strings.py
+++ b/src/summary/summary/core/locales/strings.py
@@ -0,0 +1,15 @@
 """Locale types for the summary service."""
 from dataclasses import dataclass
@dataclass(frozen=True)
 class LocaleStrings:
    """All translatable output strings for the summary pipeline."""
    # transcript_formatter.py
    empty_transcription: str
    download_header_template: str
    hallucination_replacement_text: str
    document_default_title: str
    document_title_template: str
--- a/src/summary/summary/core/transcript_formatter.py
+++ b/src/summary/summary/core/transcript_formatter.py
@@ -4,34 +4,13 @@ import logging
 from typing import Optional, Tuple
 from summary.core.config import get_settings
 from summary.core.locales import LocaleStrings
 settings = get_settings()
 logger = logging.getLogger(__name__)
 DEFAULT_EMPTY_TRANSCRIPTION = """
 **Aucun contenu audio n’a été détecté dans votre transcription.**
 *Si vous pensez qu’il s’agit d’une erreur, n’hésitez pas à contacter
 notre support technique : visio@numerique.gouv.fr*
 .
 .
 .
 Quelques points que nous vous conseillons de vérifier :
 - Un micro était-il activé ?
 - Étiez-vous suffisamment proche ?
 - Le micro est-il de bonne qualité ?
 - L’enregistrement dure-t-il plus de 30 secondes ?
 """
 class TranscriptFormatter:
    """Formats WhisperX transcription output into readable conversation format.
@@ -42,12 +21,10 @@ class TranscriptFormatter:
    - Generating descriptive titles from context
    """
-    def __init__(self):
+    def __init__(self, locale: LocaleStrings):
-        """Initialize formatter with settings."""
+        """Initialize formatter with settings and locale."""
        self.hallucination_patterns = settings.hallucination_patterns
-        self.hallucination_replacement_text = settings.hallucination_replacement_text
+        self._locale = locale
        self.default_title = settings.document_default_title
        self.default_empty_transcription = DEFAULT_EMPTY_TRANSCRIPTION
    def _get_segments(self, transcription):
        """Extract segments from transcription object or dictionary."""
@@ -71,7 +48,7 @@ class TranscriptFormatter:
        segments = self._get_segments(transcription)
        if not segments:
-            content = self.default_empty_transcription
+            content = self._locale.empty_transcription
        else:
            content = self._format_speaker(segments)
            content = self._remove_hallucinations(content)
@@ -83,7 +60,7 @@ class TranscriptFormatter:
    def _remove_hallucinations(self, content: str) -> str:
        """Remove hallucination patterns from content."""
-        replacement = self.hallucination_replacement_text or ""
+        replacement = self._locale.hallucination_replacement_text or ""
        for pattern in self.hallucination_patterns:
            content = content.replace(pattern, replacement)
@@ -111,9 +88,8 @@ class TranscriptFormatter:
        if not download_link:
            return content
-        header = (
+        header = self._locale.download_header_template.format(
-            f"\n*Télécharger votre enregistrement "
+            download_link=download_link
            f"en [suivant ce lien]({download_link})*\n"
        )
        content = header + content
@@ -127,9 +103,9 @@ class TranscriptFormatter:
    ) -> str:
        """Generate title from context or return default."""
        if not room or not recording_date or not recording_time:
-            return self.default_title
+            return self._locale.document_default_title
-        return settings.document_title_template.format(
+        return self._locale.document_title_template.format(
            room=room,
            room_recording_date=recording_date,
            room_recording_time=recording_time,