(summary) add a language parameter for transcription

Pass recording options’ language to the summary service, allowing users to
personalize the recording language.

This is important because automatic language detection often fails, causing
empty transcriptions or 5xx errors from the Whisper API. Users then do not
receive their transcriptions, which leads to frustration. For most of our
userbase, meetings are in French, and automatic detection is unreliable.

Support for language parameterization in the Whisper API has existed for some
time; only the frontend and backend integration were missing.

I did not force French as the default, since a minority of users hold English or
other European meetings. A proper settings tab to configure this value will be
introduced later.
This commit is contained in:
lebaudantoine
2025-12-29 18:25:46 +01:00
committed by aleb_the_flash
parent 587a5bc574
commit 4cb6320b83
5 changed files with 22 additions and 4 deletions

View File

@@ -137,6 +137,7 @@ class NotificationService:
"email": owner_access.user.email,
"sub": owner_access.user.sub,
"room": recording.room.name,
"language": recording.options.get("language"),
"recording_date": recording.created_at.astimezone(
owner_access.user.timezone
).strftime("%Y-%m-%d"),

View File

@@ -5,7 +5,7 @@ from typing import Optional
from celery.result import AsyncResult
from fastapi import APIRouter
from pydantic import BaseModel
from pydantic import BaseModel, field_validator
from summary.core.celery_worker import (
process_audio_transcribe_summarize_v2,
@@ -26,6 +26,18 @@ class TaskCreation(BaseModel):
room: Optional[str]
recording_date: Optional[str]
recording_time: Optional[str]
language: Optional[str]
@field_validator("language")
@classmethod
def validate_language(cls, v):
"""Validate 'language' parameter."""
if v is not None and v not in settings.whisperx_allowed_languages:
raise ValueError(
f"Language '{v}' is not allowed. "
f"Allowed languages: {', '.join(settings.whisperx_allowed_languages)}"
)
return v
router = APIRouter(prefix="/tasks")
@@ -44,6 +56,7 @@ async def create_task(request: TaskCreation):
request.room,
request.recording_date,
request.recording_time,
request.language,
],
queue=settings.transcribe_queue,
)

View File

@@ -118,7 +118,7 @@ class MetadataManager:
"retries": 0,
}
_required_args_count = 8
_required_args_count = 9
if len(task_args) != _required_args_count:
logger.error("Invalid number of arguments to enable metadata manager.")
return

View File

@@ -133,6 +133,7 @@ def process_audio_transcribe_summarize_v2(
room: Optional[str],
recording_date: Optional[str],
recording_time: Optional[str],
language: Optional[str],
):
"""Process an audio file by transcribing it and generating a summary.
@@ -193,14 +194,16 @@ def process_audio_transcribe_summarize_v2(
try:
logger.info(
"Querying transcription for %s seconds of audio …", audio_file.info.length
"Querying transcription for %s seconds of audio in %s ",
audio_file.info.length,
language,
)
transcription_start_time = time.time()
with open(temp_file_path, "rb") as audio_file:
transcription = whisperx_client.audio.transcriptions.create(
model=settings.whisperx_asr_model,
file=audio_file,
language=settings.whisperx_default_language,
language=language or settings.whisperx_default_language,
)
transcription_time = round(time.time() - transcription_start_time, 2)

View File

@@ -42,6 +42,7 @@ class Settings(BaseSettings):
whisperx_max_retries: int = 0
# ISO 639-1 language code (e.g., "en", "fr", "es")
whisperx_default_language: Optional[str] = None
whisperx_allowed_languages: Set[str] = {"en", "fr"}
llm_base_url: str
llm_api_key: SecretStr
llm_model: str