✨(summary) add a language parameter for transcription
Pass recording options’ language to the summary service, allowing users to personalize the recording language. This is important because automatic language detection often fails, causing empty transcriptions or 5xx errors from the Whisper API. Users then do not receive their transcriptions, which leads to frustration. For most of our userbase, meetings are in French, and automatic detection is unreliable. Support for language parameterization in the Whisper API has existed for some time; only the frontend and backend integration were missing. I did not force French as the default, since a minority of users hold English or other European meetings. A proper settings tab to configure this value will be introduced later.
This commit is contained in:
committed by
aleb_the_flash
parent
587a5bc574
commit
4cb6320b83
@@ -137,6 +137,7 @@ class NotificationService:
|
||||
"email": owner_access.user.email,
|
||||
"sub": owner_access.user.sub,
|
||||
"room": recording.room.name,
|
||||
"language": recording.options.get("language"),
|
||||
"recording_date": recording.created_at.astimezone(
|
||||
owner_access.user.timezone
|
||||
).strftime("%Y-%m-%d"),
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Optional
|
||||
|
||||
from celery.result import AsyncResult
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from summary.core.celery_worker import (
|
||||
process_audio_transcribe_summarize_v2,
|
||||
@@ -26,6 +26,18 @@ class TaskCreation(BaseModel):
|
||||
room: Optional[str]
|
||||
recording_date: Optional[str]
|
||||
recording_time: Optional[str]
|
||||
language: Optional[str]
|
||||
|
||||
@field_validator("language")
|
||||
@classmethod
|
||||
def validate_language(cls, v):
|
||||
"""Validate 'language' parameter."""
|
||||
if v is not None and v not in settings.whisperx_allowed_languages:
|
||||
raise ValueError(
|
||||
f"Language '{v}' is not allowed. "
|
||||
f"Allowed languages: {', '.join(settings.whisperx_allowed_languages)}"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
router = APIRouter(prefix="/tasks")
|
||||
@@ -44,6 +56,7 @@ async def create_task(request: TaskCreation):
|
||||
request.room,
|
||||
request.recording_date,
|
||||
request.recording_time,
|
||||
request.language,
|
||||
],
|
||||
queue=settings.transcribe_queue,
|
||||
)
|
||||
|
||||
@@ -118,7 +118,7 @@ class MetadataManager:
|
||||
"retries": 0,
|
||||
}
|
||||
|
||||
_required_args_count = 8
|
||||
_required_args_count = 9
|
||||
if len(task_args) != _required_args_count:
|
||||
logger.error("Invalid number of arguments to enable metadata manager.")
|
||||
return
|
||||
|
||||
@@ -133,6 +133,7 @@ def process_audio_transcribe_summarize_v2(
|
||||
room: Optional[str],
|
||||
recording_date: Optional[str],
|
||||
recording_time: Optional[str],
|
||||
language: Optional[str],
|
||||
):
|
||||
"""Process an audio file by transcribing it and generating a summary.
|
||||
|
||||
@@ -193,14 +194,16 @@ def process_audio_transcribe_summarize_v2(
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
"Querying transcription for %s seconds of audio …", audio_file.info.length
|
||||
"Querying transcription for %s seconds of audio in %s …",
|
||||
audio_file.info.length,
|
||||
language,
|
||||
)
|
||||
transcription_start_time = time.time()
|
||||
with open(temp_file_path, "rb") as audio_file:
|
||||
transcription = whisperx_client.audio.transcriptions.create(
|
||||
model=settings.whisperx_asr_model,
|
||||
file=audio_file,
|
||||
language=settings.whisperx_default_language,
|
||||
language=language or settings.whisperx_default_language,
|
||||
)
|
||||
|
||||
transcription_time = round(time.time() - transcription_start_time, 2)
|
||||
|
||||
@@ -42,6 +42,7 @@ class Settings(BaseSettings):
|
||||
whisperx_max_retries: int = 0
|
||||
# ISO 639-1 language code (e.g., "en", "fr", "es")
|
||||
whisperx_default_language: Optional[str] = None
|
||||
whisperx_allowed_languages: Set[str] = {"en", "fr"}
|
||||
llm_base_url: str
|
||||
llm_api_key: SecretStr
|
||||
llm_model: str
|
||||
|
||||
Reference in New Issue
Block a user