From 4cb6320b832d475867444706433e3f9e67e4293e Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Mon, 29 Dec 2025 18:25:46 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(summary)=20add=20a=20language=20param?= =?UTF-8?q?eter=20for=20transcription?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass recording options’ language to the summary service, allowing users to personalize the recording language. This is important because automatic language detection often fails, causing empty transcriptions or 5xx errors from the Whisper API. Users then do not receive their transcriptions, which leads to frustration. For most of our userbase, meetings are in French, and automatic detection is unreliable. Support for language parameterization in the Whisper API has existed for some time; only the frontend and backend integration were missing. I did not force French as the default, since a minority of users hold English or other European meetings. A proper settings tab to configure this value will be introduced later. --- src/backend/core/recording/event/notification.py | 1 + src/summary/summary/api/route/tasks.py | 15 ++++++++++++++- src/summary/summary/core/analytics.py | 2 +- src/summary/summary/core/celery_worker.py | 7 +++++-- src/summary/summary/core/config.py | 1 + 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/backend/core/recording/event/notification.py b/src/backend/core/recording/event/notification.py index 3138b923..89ea5111 100644 --- a/src/backend/core/recording/event/notification.py +++ b/src/backend/core/recording/event/notification.py @@ -137,6 +137,7 @@ class NotificationService: "email": owner_access.user.email, "sub": owner_access.user.sub, "room": recording.room.name, + "language": recording.options.get("language"), "recording_date": recording.created_at.astimezone( owner_access.user.timezone ).strftime("%Y-%m-%d"), diff --git a/src/summary/summary/api/route/tasks.py b/src/summary/summary/api/route/tasks.py index 89f20101..02e9f541 100644 --- a/src/summary/summary/api/route/tasks.py +++ b/src/summary/summary/api/route/tasks.py @@ -5,7 +5,7 @@ from typing import Optional from celery.result import AsyncResult from fastapi import APIRouter -from pydantic import BaseModel +from pydantic import BaseModel, field_validator from summary.core.celery_worker import ( process_audio_transcribe_summarize_v2, @@ -26,6 +26,18 @@ class TaskCreation(BaseModel): room: Optional[str] recording_date: Optional[str] recording_time: Optional[str] + language: Optional[str] + + @field_validator("language") + @classmethod + def validate_language(cls, v): + """Validate 'language' parameter.""" + if v is not None and v not in settings.whisperx_allowed_languages: + raise ValueError( + f"Language '{v}' is not allowed. " + f"Allowed languages: {', '.join(settings.whisperx_allowed_languages)}" + ) + return v router = APIRouter(prefix="/tasks") @@ -44,6 +56,7 @@ async def create_task(request: TaskCreation): request.room, request.recording_date, request.recording_time, + request.language, ], queue=settings.transcribe_queue, ) diff --git a/src/summary/summary/core/analytics.py b/src/summary/summary/core/analytics.py index 80be7ad0..e8b8dd55 100644 --- a/src/summary/summary/core/analytics.py +++ b/src/summary/summary/core/analytics.py @@ -118,7 +118,7 @@ class MetadataManager: "retries": 0, } - _required_args_count = 8 + _required_args_count = 9 if len(task_args) != _required_args_count: logger.error("Invalid number of arguments to enable metadata manager.") return diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py index e12110fa..d99f6a44 100644 --- a/src/summary/summary/core/celery_worker.py +++ b/src/summary/summary/core/celery_worker.py @@ -133,6 +133,7 @@ def process_audio_transcribe_summarize_v2( room: Optional[str], recording_date: Optional[str], recording_time: Optional[str], + language: Optional[str], ): """Process an audio file by transcribing it and generating a summary. @@ -193,14 +194,16 @@ def process_audio_transcribe_summarize_v2( try: logger.info( - "Querying transcription for %s seconds of audio …", audio_file.info.length + "Querying transcription for %s seconds of audio in %s …", + audio_file.info.length, + language, ) transcription_start_time = time.time() with open(temp_file_path, "rb") as audio_file: transcription = whisperx_client.audio.transcriptions.create( model=settings.whisperx_asr_model, file=audio_file, - language=settings.whisperx_default_language, + language=language or settings.whisperx_default_language, ) transcription_time = round(time.time() - transcription_start_time, 2) diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py index b5b057d6..ee7654a5 100644 --- a/src/summary/summary/core/config.py +++ b/src/summary/summary/core/config.py @@ -42,6 +42,7 @@ class Settings(BaseSettings): whisperx_max_retries: int = 0 # ISO 639-1 language code (e.g., "en", "fr", "es") whisperx_default_language: Optional[str] = None + whisperx_allowed_languages: Set[str] = {"en", "fr"} llm_base_url: str llm_api_key: SecretStr llm_model: str