🔧(summary) add configurable language settings for WhisperX transcription

Make WhisperX language detection configurable through FastAPI settings to handle empty audio start scenarios where automatic detection fails and incorrectly defaults to English despite 99% French usage. Quick fix acknowledging long-term solution should allow dynamic per-recording language selection configured by users through web interface rather than global server settings.
2025-10-10 11:33:15 +02:00
parent 4353db4a5f
commit aecc48f928
4 changed files with 9 additions and 1 deletions
--- a/env.d/development/summary.dist
+++ b/env.d/development/summary.dist
@@ -11,6 +11,7 @@ AWS_S3_SECRET_ACCESS_KEY="password"
 WHISPERX_BASE_URL="https://configure-your-url.com"
 WHISPERX_ASR_MODEL="large-v2"
 WHISPERX_API_KEY="your-secret-key"
+WHISPERX_DEFAULT_LANGUAGE="fr"

 LLM_BASE_URL="https://configure-your-url.com"
 LLM_API_KEY="dev-apikey"
--- a/src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl
+++ b/src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl
@@ -155,6 +155,7 @@ summary:
    WHISPERX_API_KEY: your-secret-value
    WHISPERX_BASE_URL: https://configure-your-url.com
    WHISPERX_ASR_MODEL: large-v2
+    WHISPERX_DEFAULT_LANGUAGE: fr
    LLM_BASE_URL: https://configure-your-url.com
    LLM_API_KEY: your-secret-value
    LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct
@@ -191,6 +192,7 @@ celeryTranscribe:
    WHISPERX_API_KEY: your-secret-value
    WHISPERX_BASE_URL: https://configure-your-url.com
    WHISPERX_ASR_MODEL: large-v2
+    WHISPERX_DEFAULT_LANGUAGE: fr
    LLM_BASE_URL: https://configure-your-url.com
    LLM_API_KEY: your-secret-value
    LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct
@@ -228,6 +230,7 @@ celerySummarize:
    WHISPERX_API_KEY: your-secret-value
    WHISPERX_BASE_URL: https://configure-your-url.com
    WHISPERX_ASR_MODEL: large-v2
+    WHISPERX_DEFAULT_LANGUAGE: fr
    LLM_BASE_URL: https://configure-your-url.com
    LLM_API_KEY: your-secret-value
    LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct
--- a/src/summary/summary/core/celery_worker.py
+++ b/src/summary/summary/core/celery_worker.py
@@ -270,7 +270,9 @@ def process_audio_transcribe_summarize_v2(
        transcription_start_time = time.time()
        with open(temp_file_path, "rb") as audio_file:
            transcription = whisperx_client.audio.transcriptions.create(
-                model=settings.whisperx_asr_model, file=audio_file
+                model=settings.whisperx_asr_model,
+                file=audio_file,
+                language=settings.whisperx_default_language,
            )
            metadata_manager.track(
                task_id,
--- a/src/summary/summary/core/config.py
+++ b/src/summary/summary/core/config.py
@@ -39,6 +39,8 @@ class Settings(BaseSettings):
    whisperx_base_url: str = "https://api.openai.com/v1"
    whisperx_asr_model: str = "whisper-1"
    whisperx_max_retries: int = 0
+    # ISO 639-1 language code (e.g., "en", "fr", "es")
+    whisperx_default_language: Optional[str] = None
    llm_base_url: str
    llm_api_key: str
    llm_model: str