(agents) add multilingual support for real-time subtitles

Add dynamic configuration for Deepgram STT via environment variables,
enabling multilingual real-time subtitles with automatic language
detection.

Changes:
- Add DEEPGRAM_STT_* environment variables pattern for configuration
- Implement _build_deepgram_stt_kwargs() to dynamically build STT
  parameters from environment variables
- Add whitelist of supported parameters (model, language) for LiveKit
  Deepgram plugin
- Log warnings for unsupported parameters (diarize, smart_format, etc)
- Set default configuration: model=nova-3, language=multi
- Document supported parameters in Helm values.yaml

Configuration:
- DEEPGRAM_STT_MODEL: Deepgram model (default: nova-3)
- DEEPGRAM_STT_LANGUAGE: Language or 'multi' for automatic detection
  of 10 languages (en, es, fr, de, hi, ru, pt, ja, it, nl)

Note: Advanced features like diarization and smart_format are not
supported by the LiveKit Deepgram plugin in streaming mode.
This commit is contained in:
Ghislain LE MEUR
2025-10-24 16:02:45 +02:00
committed by aleb_the_flash
parent b403ac56bf
commit 9f9cef7e2a
2 changed files with 70 additions and 1 deletions

View File

@@ -29,15 +29,72 @@ logger = logging.getLogger("transcriber")
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
# Default Deepgram STT configuration
DEEPGRAM_STT_DEFAULTS = {
"model": "nova-3",
"language": "multi",
}
# Supported parameters for LiveKit's deepgram.STT() in streaming mode
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
# detect_language is NOT supported for real-time streaming
# Use language="multi" instead for automatic multilingual support
DEEPGRAM_STT_SUPPORTED_PARAMS = {
"model",
"language",
}
def _build_deepgram_stt_kwargs():
"""Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables.
Only parameters supported by LiveKit's deepgram.STT() are included.
Unsupported parameters are logged as warnings.
"""
stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy()
# Scan environment variables for DEEPGRAM_STT_* pattern
for key, value in os.environ.items():
if key.startswith("DEEPGRAM_STT_"):
# Extract parameter name and convert to lowercase
param_name = key.replace("DEEPGRAM_STT_", "", 1).lower()
# Check if parameter is supported by LiveKit plugin
if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS:
supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS))
logger.warning(
f"Ignoring unsupported Deepgram STT parameter: {param_name}. "
f"Supported parameters: {supported}"
)
continue
# Parse value type
value_lower = value.lower()
if value_lower in ("true", "false"):
# Boolean values
stt_kwargs[param_name] = value_lower == "true"
elif value.isdigit():
# Integer values
stt_kwargs[param_name] = int(value)
else:
# String values
stt_kwargs[param_name] = value
logger.info(f"Deepgram STT configuration: {stt_kwargs}")
return stt_kwargs
class Transcriber(Agent):
"""Create a transcription agent for a specific participant."""
def __init__(self, *, participant_identity: str):
"""Init transcription agent."""
# Build STT configuration from environment variables
stt_kwargs = _build_deepgram_stt_kwargs()
super().__init__(
instructions="not-needed",
stt=deepgram.STT(),
stt=deepgram.STT(**stt_kwargs),
)
self.participant_identity = participant_identity

View File

@@ -788,9 +788,21 @@ agents:
## @extra agents.envVars.FROM_CONFIGMAP.configMapKeyRef.key Key within a ConfigMap when configuring env vars from a ConfigMap
## @extra agents.envVars.FROM_SECRET.secretKeyRef.name Name of a Secret when configuring env vars from a Secret
## @extra agents.envVars.FROM_SECRET.secretKeyRef.key Key within a Secret when configuring env vars from a Secret
## @extra agents.envVars.DEEPGRAM_STT_MODEL Deepgram model to use for speech-to-text (default: nova-3)
## @extra agents.envVars.DEEPGRAM_STT_LANGUAGE Language code for transcription or 'multi' for automatic multilingual support with real-time code-switching (default: multi, supports: en, es, fr, de, hi, ru, pt, ja, it, nl)
## @skip agents.envVars
envVars:
<<: *commonEnvVars
# Deepgram Speech-to-Text configuration for real-time streaming
# Only 'model' and 'language' parameters are supported by the LiveKit plugin
#
# DEEPGRAM_STT_MODEL: "nova-3" # Model selection (default)
# DEEPGRAM_STT_LANGUAGE: "multi" # Multilingual mode with auto-detection (default)
# DEEPGRAM_STT_LANGUAGE: "fr" # Force French only
#
# Note: Advanced features (diarization, smart_format, punctuate, detect_language)
# are NOT supported by the LiveKit Deepgram plugin in streaming mode.
# Use language="multi" for automatic multilingual support (10 languages).
## @param agents.podAnnotations Annotations to add to the agents Pod
podAnnotations: {}