✨(summary) add a language parameter for transcription

Pass recording options’ language to the summary service, allowing users to personalize the recording language. This is important because automatic language detection often fails, causing empty transcriptions or 5xx errors from the Whisper API. Users then do not receive their transcriptions, which leads to frustration. For most of our userbase, meetings are in French, and automatic detection is unreliable. Support for language parameterization in the Whisper API has existed for some time; only the frontend and backend integration were missing. I did not force French as the default, since a minority of users hold English or other European meetings. A proper settings tab to configure this value will be introduced later.
2025-12-29 18:25:46 +01:00
parent 587a5bc574
commit 4cb6320b83
5 changed files with 22 additions and 4 deletions
--- a/src/backend/core/recording/event/notification.py
+++ b/src/backend/core/recording/event/notification.py
@@ -137,6 +137,7 @@ class NotificationService:
            "email": owner_access.user.email,
            "sub": owner_access.user.sub,
            "room": recording.room.name,
+            "language": recording.options.get("language"),
            "recording_date": recording.created_at.astimezone(
                owner_access.user.timezone
            ).strftime("%Y-%m-%d"),
--- a/src/summary/summary/api/route/tasks.py
+++ b/src/summary/summary/api/route/tasks.py
@@ -5,7 +5,7 @@ from typing import Optional

 from celery.result import AsyncResult
 from fastapi import APIRouter
-from pydantic import BaseModel
+from pydantic import BaseModel, field_validator

 from summary.core.celery_worker import (
    process_audio_transcribe_summarize_v2,
@@ -26,6 +26,18 @@ class TaskCreation(BaseModel):
    room: Optional[str]
    recording_date: Optional[str]
    recording_time: Optional[str]
+    language: Optional[str]
+
+    @field_validator("language")
+    @classmethod
+    def validate_language(cls, v):
+        """Validate 'language' parameter."""
+        if v is not None and v not in settings.whisperx_allowed_languages:
+            raise ValueError(
+                f"Language '{v}' is not allowed. "
+                f"Allowed languages: {', '.join(settings.whisperx_allowed_languages)}"
+            )
+        return v


 router = APIRouter(prefix="/tasks")
@@ -44,6 +56,7 @@ async def create_task(request: TaskCreation):
            request.room,
            request.recording_date,
            request.recording_time,
+            request.language,
        ],
        queue=settings.transcribe_queue,
    )
--- a/src/summary/summary/core/analytics.py
+++ b/src/summary/summary/core/analytics.py
@@ -118,7 +118,7 @@ class MetadataManager:
            "retries": 0,
        }

-        _required_args_count = 8
+        _required_args_count = 9
        if len(task_args) != _required_args_count:
            logger.error("Invalid number of arguments to enable metadata manager.")
            return
--- a/src/summary/summary/core/celery_worker.py
+++ b/src/summary/summary/core/celery_worker.py
@@ -133,6 +133,7 @@ def process_audio_transcribe_summarize_v2(
    room: Optional[str],
    recording_date: Optional[str],
    recording_time: Optional[str],
+    language: Optional[str],
 ):
    """Process an audio file by transcribing it and generating a summary.

@@ -193,14 +194,16 @@ def process_audio_transcribe_summarize_v2(

    try:
        logger.info(
-            "Querying transcription for %s seconds of audio …", audio_file.info.length
+            "Querying transcription for %s seconds of audio in %s …",
+            audio_file.info.length,
+            language,
        )
        transcription_start_time = time.time()
        with open(temp_file_path, "rb") as audio_file:
            transcription = whisperx_client.audio.transcriptions.create(
                model=settings.whisperx_asr_model,
                file=audio_file,
-                language=settings.whisperx_default_language,
+                language=language or settings.whisperx_default_language,
            )

            transcription_time = round(time.time() - transcription_start_time, 2)
--- a/src/summary/summary/core/config.py
+++ b/src/summary/summary/core/config.py
@@ -42,6 +42,7 @@ class Settings(BaseSettings):
    whisperx_max_retries: int = 0
    # ISO 639-1 language code (e.g., "en", "fr", "es")
    whisperx_default_language: Optional[str] = None
+    whisperx_allowed_languages: Set[str] = {"en", "fr"}
    llm_base_url: str
    llm_api_key: SecretStr
    llm_model: str