🔧(summary) add configurable language settings for WhisperX transcription

Make WhisperX language detection configurable through FastAPI settings
to handle empty audio start scenarios where automatic detection fails and
incorrectly defaults to English despite 99% French usage.

Quick fix acknowledging long-term solution should allow dynamic
per-recording language selection configured by users through web
interface rather than global server settings.
This commit is contained in:
lebaudantoine
2025-10-10 11:33:15 +02:00
committed by aleb_the_flash
parent 4353db4a5f
commit aecc48f928
4 changed files with 9 additions and 1 deletions

View File

@@ -11,6 +11,7 @@ AWS_S3_SECRET_ACCESS_KEY="password"
WHISPERX_BASE_URL="https://configure-your-url.com"
WHISPERX_ASR_MODEL="large-v2"
WHISPERX_API_KEY="your-secret-key"
WHISPERX_DEFAULT_LANGUAGE="fr"
LLM_BASE_URL="https://configure-your-url.com"
LLM_API_KEY="dev-apikey"

View File

@@ -155,6 +155,7 @@ summary:
WHISPERX_API_KEY: your-secret-value
WHISPERX_BASE_URL: https://configure-your-url.com
WHISPERX_ASR_MODEL: large-v2
WHISPERX_DEFAULT_LANGUAGE: fr
LLM_BASE_URL: https://configure-your-url.com
LLM_API_KEY: your-secret-value
LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct
@@ -191,6 +192,7 @@ celeryTranscribe:
WHISPERX_API_KEY: your-secret-value
WHISPERX_BASE_URL: https://configure-your-url.com
WHISPERX_ASR_MODEL: large-v2
WHISPERX_DEFAULT_LANGUAGE: fr
LLM_BASE_URL: https://configure-your-url.com
LLM_API_KEY: your-secret-value
LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct
@@ -228,6 +230,7 @@ celerySummarize:
WHISPERX_API_KEY: your-secret-value
WHISPERX_BASE_URL: https://configure-your-url.com
WHISPERX_ASR_MODEL: large-v2
WHISPERX_DEFAULT_LANGUAGE: fr
LLM_BASE_URL: https://configure-your-url.com
LLM_API_KEY: your-secret-value
LLM_MODEL: meta-llama/Llama-3.1-8B-Instruct

View File

@@ -270,7 +270,9 @@ def process_audio_transcribe_summarize_v2(
transcription_start_time = time.time()
with open(temp_file_path, "rb") as audio_file:
transcription = whisperx_client.audio.transcriptions.create(
model=settings.whisperx_asr_model, file=audio_file
model=settings.whisperx_asr_model,
file=audio_file,
language=settings.whisperx_default_language,
)
metadata_manager.track(
task_id,

View File

@@ -39,6 +39,8 @@ class Settings(BaseSettings):
whisperx_base_url: str = "https://api.openai.com/v1"
whisperx_asr_model: str = "whisper-1"
whisperx_max_retries: int = 0
# ISO 639-1 language code (e.g., "en", "fr", "es")
whisperx_default_language: Optional[str] = None
llm_base_url: str
llm_api_key: str
llm_model: str