From 9f9cef7e2ae049b266fccb115dad3e9364cc07d3 Mon Sep 17 00:00:00 2001 From: Ghislain LE MEUR Date: Fri, 24 Oct 2025 16:02:45 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(agents)=20add=20multilingual=20suppor?= =?UTF-8?q?t=20for=20real-time=20subtitles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dynamic configuration for Deepgram STT via environment variables, enabling multilingual real-time subtitles with automatic language detection. Changes: - Add DEEPGRAM_STT_* environment variables pattern for configuration - Implement _build_deepgram_stt_kwargs() to dynamically build STT parameters from environment variables - Add whitelist of supported parameters (model, language) for LiveKit Deepgram plugin - Log warnings for unsupported parameters (diarize, smart_format, etc) - Set default configuration: model=nova-3, language=multi - Document supported parameters in Helm values.yaml Configuration: - DEEPGRAM_STT_MODEL: Deepgram model (default: nova-3) - DEEPGRAM_STT_LANGUAGE: Language or 'multi' for automatic detection of 10 languages (en, es, fr, de, hi, ru, pt, ja, it, nl) Note: Advanced features like diarization and smart_format are not supported by the LiveKit Deepgram plugin in streaming mode. --- src/agents/multi-user-transcriber.py | 59 +++++++++++++++++++++++++++- src/helm/meet/values.yaml | 12 ++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/agents/multi-user-transcriber.py b/src/agents/multi-user-transcriber.py index 16fb78ff..9b6c3521 100644 --- a/src/agents/multi-user-transcriber.py +++ b/src/agents/multi-user-transcriber.py @@ -29,15 +29,72 @@ logger = logging.getLogger("transcriber") TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber") +# Default Deepgram STT configuration +DEEPGRAM_STT_DEFAULTS = { + "model": "nova-3", + "language": "multi", +} + +# Supported parameters for LiveKit's deepgram.STT() in streaming mode +# Note: Not all Deepgram API parameters are supported by the LiveKit plugin +# detect_language is NOT supported for real-time streaming +# Use language="multi" instead for automatic multilingual support +DEEPGRAM_STT_SUPPORTED_PARAMS = { + "model", + "language", +} + + +def _build_deepgram_stt_kwargs(): + """Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables. + + Only parameters supported by LiveKit's deepgram.STT() are included. + Unsupported parameters are logged as warnings. + """ + stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy() + + # Scan environment variables for DEEPGRAM_STT_* pattern + for key, value in os.environ.items(): + if key.startswith("DEEPGRAM_STT_"): + # Extract parameter name and convert to lowercase + param_name = key.replace("DEEPGRAM_STT_", "", 1).lower() + + # Check if parameter is supported by LiveKit plugin + if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS: + supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS)) + logger.warning( + f"Ignoring unsupported Deepgram STT parameter: {param_name}. " + f"Supported parameters: {supported}" + ) + continue + + # Parse value type + value_lower = value.lower() + if value_lower in ("true", "false"): + # Boolean values + stt_kwargs[param_name] = value_lower == "true" + elif value.isdigit(): + # Integer values + stt_kwargs[param_name] = int(value) + else: + # String values + stt_kwargs[param_name] = value + + logger.info(f"Deepgram STT configuration: {stt_kwargs}") + return stt_kwargs + class Transcriber(Agent): """Create a transcription agent for a specific participant.""" def __init__(self, *, participant_identity: str): """Init transcription agent.""" + # Build STT configuration from environment variables + stt_kwargs = _build_deepgram_stt_kwargs() + super().__init__( instructions="not-needed", - stt=deepgram.STT(), + stt=deepgram.STT(**stt_kwargs), ) self.participant_identity = participant_identity diff --git a/src/helm/meet/values.yaml b/src/helm/meet/values.yaml index 9a395d26..eca7d54d 100644 --- a/src/helm/meet/values.yaml +++ b/src/helm/meet/values.yaml @@ -788,9 +788,21 @@ agents: ## @extra agents.envVars.FROM_CONFIGMAP.configMapKeyRef.key Key within a ConfigMap when configuring env vars from a ConfigMap ## @extra agents.envVars.FROM_SECRET.secretKeyRef.name Name of a Secret when configuring env vars from a Secret ## @extra agents.envVars.FROM_SECRET.secretKeyRef.key Key within a Secret when configuring env vars from a Secret + ## @extra agents.envVars.DEEPGRAM_STT_MODEL Deepgram model to use for speech-to-text (default: nova-3) + ## @extra agents.envVars.DEEPGRAM_STT_LANGUAGE Language code for transcription or 'multi' for automatic multilingual support with real-time code-switching (default: multi, supports: en, es, fr, de, hi, ru, pt, ja, it, nl) ## @skip agents.envVars envVars: <<: *commonEnvVars + # Deepgram Speech-to-Text configuration for real-time streaming + # Only 'model' and 'language' parameters are supported by the LiveKit plugin + # + # DEEPGRAM_STT_MODEL: "nova-3" # Model selection (default) + # DEEPGRAM_STT_LANGUAGE: "multi" # Multilingual mode with auto-detection (default) + # DEEPGRAM_STT_LANGUAGE: "fr" # Force French only + # + # Note: Advanced features (diarization, smart_format, punctuate, detect_language) + # are NOT supported by the LiveKit Deepgram plugin in streaming mode. + # Use language="multi" for automatic multilingual support (10 languages). ## @param agents.podAnnotations Annotations to add to the agents Pod podAnnotations: {}