(summary) add localization support for transcription context text

Transcription and summarization results were always generated
using a French text structure (e.g. "Réunion du..."), regardless
of user preference or meeting language. Introduced basic localization
support to adapt generated string languages.
This commit is contained in:
leo
2026-02-25 17:50:24 +01:00
committed by aleb_the_flash
parent cd0cec78ba
commit f5e0ddf692
13 changed files with 223 additions and 48 deletions

View File

@@ -11,6 +11,7 @@ and this project adheres to
### Added ### Added
- 👷(docker) add arm64 platform support for image builds - 👷(docker) add arm64 platform support for image builds
- ✨(summary) add localization support for transcription context text
### Changed ### Changed

View File

@@ -2,6 +2,7 @@
Gitlint extra rule to validate that the message title is of the form Gitlint extra rule to validate that the message title is of the form
"<gitmoji>(<scope>) <subject>" "<gitmoji>(<scope>) <subject>"
""" """
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re

View File

@@ -167,6 +167,7 @@ class NotificationService:
owner_access.user.timezone owner_access.user.timezone
).strftime("%H:%M"), ).strftime("%H:%M"),
"download_link": f"{get_recording_download_base_url()}/{recording.id}", "download_link": f"{get_recording_download_base_url()}/{recording.id}",
"context_language": owner_access.user.language,
} }
headers = { headers = {

View File

@@ -15,8 +15,8 @@ from summary.core.config import get_settings
settings = get_settings() settings = get_settings()
class TaskCreation(BaseModel): class TranscribeSummarizeTaskCreation(BaseModel):
"""Task data.""" """Transcription and summarization parameters."""
owner_id: str owner_id: str
filename: str filename: str
@@ -28,6 +28,7 @@ class TaskCreation(BaseModel):
recording_time: Optional[str] recording_time: Optional[str]
language: Optional[str] language: Optional[str]
download_link: Optional[str] download_link: Optional[str]
context_language: Optional[str] = None
@field_validator("language") @field_validator("language")
@classmethod @classmethod
@@ -45,8 +46,8 @@ router = APIRouter(prefix="/tasks")
@router.post("/") @router.post("/")
async def create_task(request: TaskCreation): async def create_transcribe_summarize_task(request: TranscribeSummarizeTaskCreation):
"""Create a task.""" """Create a transcription and summarization task."""
task = process_audio_transcribe_summarize_v2.apply_async( task = process_audio_transcribe_summarize_v2.apply_async(
args=[ args=[
request.owner_id, request.owner_id,
@@ -59,6 +60,7 @@ async def create_task(request: TaskCreation):
request.recording_time, request.recording_time,
request.language, request.language,
request.download_link, request.download_link,
request.context_language,
], ],
queue=settings.transcribe_queue, queue=settings.transcribe_queue,
) )

View File

@@ -18,6 +18,7 @@ from summary.core.analytics import MetadataManager, get_analytics
from summary.core.config import get_settings from summary.core.config import get_settings
from summary.core.file_service import FileService, FileServiceException from summary.core.file_service import FileService, FileServiceException
from summary.core.llm_service import LLMException, LLMObservability, LLMService from summary.core.llm_service import LLMException, LLMObservability, LLMService
from summary.core.locales import get_locale
from summary.core.prompt import ( from summary.core.prompt import (
FORMAT_NEXT_STEPS, FORMAT_NEXT_STEPS,
FORMAT_PLAN, FORMAT_PLAN,
@@ -121,6 +122,7 @@ def process_audio_transcribe_summarize_v2(
recording_time: Optional[str], recording_time: Optional[str],
language: Optional[str], language: Optional[str],
download_link: Optional[str], download_link: Optional[str],
context_language: Optional[str] = None,
): ):
"""Process an audio file by transcribing it and generating a summary. """Process an audio file by transcribing it and generating a summary.
@@ -129,6 +131,19 @@ def process_audio_transcribe_summarize_v2(
2. Transcribes the audio using WhisperX model 2. Transcribes the audio using WhisperX model
3. Sends the results via webhook 3. Sends the results via webhook
Args:
self: Celery task instance (passed on with bind=True)
owner_id: Unique identifier of the recording owner.
filename: Name of the audio file in MinIO storage.
email: Email address of the recording owner.
sub: OIDC subject identifier of the recording owner.
received_at: Unix timestamp when the recording was received.
room: room name where the recording took place.
recording_date: Date of the recording (localized display string).
recording_time: Time of the recording (localized display string).
language: ISO 639-1 language code for transcription.
download_link: URL to download the original recording.
context_language: ISO 639-1 language code of the meeting summary context text.
""" """
logger.info( logger.info(
"Notification received | Owner: %s | Room: %s", "Notification received | Owner: %s | Room: %s",
@@ -145,6 +160,7 @@ def process_audio_transcribe_summarize_v2(
max_retries=settings.whisperx_max_retries, max_retries=settings.whisperx_max_retries,
) )
# Transcription
try: try:
with ( with (
file_service.prepare_audio_file(filename) as (audio_file, metadata), file_service.prepare_audio_file(filename) as (audio_file, metadata),
@@ -183,7 +199,10 @@ def process_audio_transcribe_summarize_v2(
metadata_manager.track_transcription_metadata(task_id, transcription) metadata_manager.track_transcription_metadata(task_id, transcription)
formatter = TranscriptFormatter() # For locale of context, use in decreasing priority context_language,
# language (of meeting), default context language
locale = get_locale(context_language, language)
formatter = TranscriptFormatter(locale)
content, title = formatter.format( content, title = formatter.format(
transcription, transcription,
@@ -221,6 +240,7 @@ def process_audio_transcribe_summarize_v2(
metadata_manager.capture(task_id, settings.posthog_event_success) metadata_manager.capture(task_id, settings.posthog_event_success)
# LLM Summarization
if ( if (
analytics.is_feature_enabled("summary-enabled", distinct_id=owner_id) analytics.is_feature_enabled("summary-enabled", distinct_id=owner_id)
and settings.is_summary_enabled and settings.is_summary_enabled
@@ -336,9 +356,7 @@ def summarize_transcription(
summary = tldr + "\n\n" + cleaned_summary + "\n\n" + next_steps summary = tldr + "\n\n" + cleaned_summary + "\n\n" + next_steps
data = { data = {
"title": settings.summary_title_template.format( "title": settings.summary_title_template.format(title=title),
title=title,
),
"content": summary, "content": summary,
"email": email, "email": email,
"sub": sub, "sub": sub,

View File

@@ -1,7 +1,7 @@
"""Application configuration and settings.""" """Application configuration and settings."""
from functools import lru_cache from functools import lru_cache
from typing import Annotated, List, Optional, Set from typing import Annotated, List, Literal, Optional, Set
from fastapi import Depends from fastapi import Depends
from pydantic import SecretStr from pydantic import SecretStr
@@ -51,7 +51,6 @@ class Settings(BaseSettings):
# Transcription processing # Transcription processing
hallucination_patterns: List[str] = ["Vap'n'Roll Thierry"] hallucination_patterns: List[str] = ["Vap'n'Roll Thierry"]
hallucination_replacement_text: str = "[Texte impossible à transcrire]"
# Webhook-related settings # Webhook-related settings
webhook_max_retries: int = 2 webhook_max_retries: int = 2
@@ -60,11 +59,10 @@ class Settings(BaseSettings):
webhook_api_token: SecretStr webhook_api_token: SecretStr
webhook_url: str webhook_url: str
# Locale
default_context_language: Literal["de", "en", "fr", "nl"] = "fr"
# Output related settings # Output related settings
document_default_title: Optional[str] = "Transcription"
document_title_template: Optional[str] = (
'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
)
summary_title_template: Optional[str] = "Résumé de {title}" summary_title_template: Optional[str] = "Résumé de {title}"
# Summary related settings # Summary related settings

View File

@@ -0,0 +1,30 @@
"""Locale support for the summary service."""
from typing import Optional
from summary.core.config import get_settings
from summary.core.locales import de, en, fr, nl
from summary.core.locales.strings import LocaleStrings
_LOCALES = {"fr": fr, "en": en, "de": de, "nl": nl}
def get_locale(*languages: Optional[str]) -> LocaleStrings:
"""Return locale strings for the first matching language candidate.
Accept language codes in decreasing priority order and return the
locale for the first one that matches a known locale.
Fall back to the configured default_context_language.
"""
for lang in languages:
if not lang:
continue
if lang in _LOCALES:
return _LOCALES[lang].STRINGS
# Provide fallback for longer formats of ISO 639-1 (e.g. "en-au" -> "en")
base_lang = lang.split("-")[0]
if base_lang in _LOCALES:
return _LOCALES[base_lang].STRINGS
return _LOCALES[get_settings().default_context_language].STRINGS

View File

@@ -0,0 +1,34 @@
"""German locale strings."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**In Ihrer Transkription wurde kein Audioinhalt erkannt.**
*Wenn Sie glauben, dass es sich um einen Fehler handelt, zögern Sie nicht,
unseren technischen Support zu kontaktieren: visio@numerique.gouv.fr*
.
.
.
Einige Punkte, die wir Ihnen empfehlen zu überprüfen:
- War ein Mikrofon aktiviert?
- Waren Sie nah genug am Mikrofon?
- Ist das Mikrofon von guter Qualität?
- Dauert die Aufnahme länger als 30 Sekunden?
""",
download_header_template=(
"\n*Laden Sie Ihre Aufnahme herunter, "
"indem Sie [diesem Link folgen]({download_link})*\n"
),
hallucination_replacement_text="[Text konnte nicht transkribiert werden]",
document_default_title="Transkription",
document_title_template=(
'Besprechung "{room}" am {room_recording_date} um {room_recording_time}'
),
)

View File

@@ -0,0 +1,33 @@
"""English locale strings."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**No audio content was detected in your transcription.**
*If you believe this is an error, please do not hesitate to contact
our technical support: visio@numerique.gouv.fr*
.
.
.
A few things we recommend you check:
- Was a microphone enabled?
- Were you close enough to the microphone?
- Is the microphone of good quality?
- Is the recording longer than 30 seconds?
""",
download_header_template=(
"\n*Download your recording by [following this link]({download_link})*\n"
),
hallucination_replacement_text="[Unable to transcribe text]",
document_default_title="Transcription",
document_title_template=(
'Meeting "{room}" on {room_recording_date} at {room_recording_time}'
),
)

View File

@@ -0,0 +1,33 @@
"""French locale strings (default)."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**Aucun contenu audio n'a été détecté dans votre transcription.**
*Si vous pensez qu'il s'agit d'une erreur, n'hésitez pas à contacter
notre support technique : visio@numerique.gouv.fr*
.
.
.
Quelques points que nous vous conseillons de vérifier :
- Un micro était-il activé ?
- Étiez-vous suffisamment proche ?
- Le micro est-il de bonne qualité ?
- L'enregistrement dure-t-il plus de 30 secondes ?
""",
download_header_template=(
"\n*Télécharger votre enregistrement en [suivant ce lien]({download_link})*\n"
),
hallucination_replacement_text="[Texte impossible à transcrire]",
document_default_title="Transcription",
document_title_template=(
'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
),
)

View File

@@ -0,0 +1,33 @@
"""Dutch locale strings."""
from summary.core.locales.strings import LocaleStrings
STRINGS = LocaleStrings(
empty_transcription="""
**Er is geen audio-inhoud gedetecteerd in uw transcriptie.**
*Als u denkt dat dit een fout is, aarzel dan niet om contact op te nemen
met onze technische ondersteuning: visio@numerique.gouv.fr*
.
.
.
Een paar punten die wij u aanraden te controleren:
- Was er een microfoon ingeschakeld?
- Was u dicht genoeg bij de microfoon?
- Is de microfoon van goede kwaliteit?
- Duurt de opname langer dan 30 seconden?
""",
download_header_template=(
"\n*Download uw opname door [deze link te volgen]({download_link})*\n"
),
hallucination_replacement_text="[Tekst kon niet worden getranscribeerd]",
document_default_title="Transcriptie",
document_title_template=(
'Vergadering "{room}" op {room_recording_date} om {room_recording_time}'
),
)

View File

@@ -0,0 +1,15 @@
"""Locale types for the summary service."""
from dataclasses import dataclass
@dataclass(frozen=True)
class LocaleStrings:
"""All translatable output strings for the summary pipeline."""
# transcript_formatter.py
empty_transcription: str
download_header_template: str
hallucination_replacement_text: str
document_default_title: str
document_title_template: str

View File

@@ -4,34 +4,13 @@ import logging
from typing import Optional, Tuple from typing import Optional, Tuple
from summary.core.config import get_settings from summary.core.config import get_settings
from summary.core.locales import LocaleStrings
settings = get_settings() settings = get_settings()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DEFAULT_EMPTY_TRANSCRIPTION = """
**Aucun contenu audio na été détecté dans votre transcription.**
*Si vous pensez quil sagit dune erreur, nhésitez pas à contacter
notre support technique : visio@numerique.gouv.fr*
.
.
.
Quelques points que nous vous conseillons de vérifier :
- Un micro était-il activé ?
- Étiez-vous suffisamment proche ?
- Le micro est-il de bonne qualité ?
- Lenregistrement dure-t-il plus de 30 secondes ?
"""
class TranscriptFormatter: class TranscriptFormatter:
"""Formats WhisperX transcription output into readable conversation format. """Formats WhisperX transcription output into readable conversation format.
@@ -42,12 +21,10 @@ class TranscriptFormatter:
- Generating descriptive titles from context - Generating descriptive titles from context
""" """
def __init__(self): def __init__(self, locale: LocaleStrings):
"""Initialize formatter with settings.""" """Initialize formatter with settings and locale."""
self.hallucination_patterns = settings.hallucination_patterns self.hallucination_patterns = settings.hallucination_patterns
self.hallucination_replacement_text = settings.hallucination_replacement_text self._locale = locale
self.default_title = settings.document_default_title
self.default_empty_transcription = DEFAULT_EMPTY_TRANSCRIPTION
def _get_segments(self, transcription): def _get_segments(self, transcription):
"""Extract segments from transcription object or dictionary.""" """Extract segments from transcription object or dictionary."""
@@ -71,7 +48,7 @@ class TranscriptFormatter:
segments = self._get_segments(transcription) segments = self._get_segments(transcription)
if not segments: if not segments:
content = self.default_empty_transcription content = self._locale.empty_transcription
else: else:
content = self._format_speaker(segments) content = self._format_speaker(segments)
content = self._remove_hallucinations(content) content = self._remove_hallucinations(content)
@@ -83,7 +60,7 @@ class TranscriptFormatter:
def _remove_hallucinations(self, content: str) -> str: def _remove_hallucinations(self, content: str) -> str:
"""Remove hallucination patterns from content.""" """Remove hallucination patterns from content."""
replacement = self.hallucination_replacement_text or "" replacement = self._locale.hallucination_replacement_text or ""
for pattern in self.hallucination_patterns: for pattern in self.hallucination_patterns:
content = content.replace(pattern, replacement) content = content.replace(pattern, replacement)
@@ -111,9 +88,8 @@ class TranscriptFormatter:
if not download_link: if not download_link:
return content return content
header = ( header = self._locale.download_header_template.format(
f"\n*Télécharger votre enregistrement " download_link=download_link
f"en [suivant ce lien]({download_link})*\n"
) )
content = header + content content = header + content
@@ -127,9 +103,9 @@ class TranscriptFormatter:
) -> str: ) -> str:
"""Generate title from context or return default.""" """Generate title from context or return default."""
if not room or not recording_date or not recording_time: if not room or not recording_date or not recording_time:
return self.default_title return self._locale.document_default_title
return settings.document_title_template.format( return self._locale.document_title_template.format(
room=room, room=room,
room_recording_date=recording_date, room_recording_date=recording_date,
room_recording_time=recording_time, room_recording_time=recording_time,