From aecc48f92822804b5b72d44067c1f4bfb409ed2e Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Fri, 10 Oct 2025 11:33:15 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7(summary)=20add=20configurable=20la?= =?UTF-8?q?nguage=20settings=20for=20WhisperX=20transcription?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make WhisperX language detection configurable through FastAPI settings to handle empty audio start scenarios where automatic detection fails and incorrectly defaults to English despite 99% French usage. Quick fix acknowledging long-term solution should allow dynamic per-recording language selection configured by users through web interface rather than global server settings. --- env.d/development/summary.dist | 1 + src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl | 3 +++ src/summary/summary/core/celery_worker.py | 4 +++- src/summary/summary/core/config.py | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/env.d/development/summary.dist b/env.d/development/summary.dist index e5777226..878fef35 100644 --- a/env.d/development/summary.dist +++ b/env.d/development/summary.dist @@ -11,6 +11,7 @@ AWS_S3_SECRET_ACCESS_KEY="password" WHISPERX_BASE_URL="https://configure-your-url.com" WHISPERX_ASR_MODEL="large-v2" WHISPERX_API_KEY="your-secret-key" +WHISPERX_DEFAULT_LANGUAGE="fr" LLM_BASE_URL="https://configure-your-url.com" LLM_API_KEY="dev-apikey" diff --git a/src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl b/src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl index 40322953..44f9d245 100644 --- a/src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl +++ b/src/helm/env.d/dev-keycloak/values.meet.yaml.gotmpl @@ -155,6 +155,7 @@ summary: WHISPERX_API_KEY: your-secret-value WHISPERX_BASE_URL: https://configure-your-url.com WHISPERX_ASR_MODEL: large-v2 + WHISPERX_DEFAULT_LANGUAGE: fr LLM_BASE_URL: https://configure-your-url.com LLM_API_KEY: your-secret-value LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct @@ -191,6 +192,7 @@ celeryTranscribe: WHISPERX_API_KEY: your-secret-value WHISPERX_BASE_URL: https://configure-your-url.com WHISPERX_ASR_MODEL: large-v2 + WHISPERX_DEFAULT_LANGUAGE: fr LLM_BASE_URL: https://configure-your-url.com LLM_API_KEY: your-secret-value LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct @@ -228,6 +230,7 @@ celerySummarize: WHISPERX_API_KEY: your-secret-value WHISPERX_BASE_URL: https://configure-your-url.com WHISPERX_ASR_MODEL: large-v2 + WHISPERX_DEFAULT_LANGUAGE: fr LLM_BASE_URL: https://configure-your-url.com LLM_API_KEY: your-secret-value LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py index 5a2d5e2f..7e06d2e1 100644 --- a/src/summary/summary/core/celery_worker.py +++ b/src/summary/summary/core/celery_worker.py @@ -270,7 +270,9 @@ def process_audio_transcribe_summarize_v2( transcription_start_time = time.time() with open(temp_file_path, "rb") as audio_file: transcription = whisperx_client.audio.transcriptions.create( - model=settings.whisperx_asr_model, file=audio_file + model=settings.whisperx_asr_model, + file=audio_file, + language=settings.whisperx_default_language, ) metadata_manager.track( task_id, diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py index 77dc7aa3..b4e39520 100644 --- a/src/summary/summary/core/config.py +++ b/src/summary/summary/core/config.py @@ -39,6 +39,8 @@ class Settings(BaseSettings): whisperx_base_url: str = "https://api.openai.com/v1" whisperx_asr_model: str = "whisper-1" whisperx_max_retries: int = 0 + # ISO 639-1 language code (e.g., "en", "fr", "es") + whisperx_default_language: Optional[str] = None llm_base_url: str llm_api_key: str llm_model: str