✨(agents) add multilingual support for real-time subtitles
Add dynamic configuration for Deepgram STT via environment variables, enabling multilingual real-time subtitles with automatic language detection. Changes: - Add DEEPGRAM_STT_* environment variables pattern for configuration - Implement _build_deepgram_stt_kwargs() to dynamically build STT parameters from environment variables - Add whitelist of supported parameters (model, language) for LiveKit Deepgram plugin - Log warnings for unsupported parameters (diarize, smart_format, etc) - Set default configuration: model=nova-3, language=multi - Document supported parameters in Helm values.yaml Configuration: - DEEPGRAM_STT_MODEL: Deepgram model (default: nova-3) - DEEPGRAM_STT_LANGUAGE: Language or 'multi' for automatic detection of 10 languages (en, es, fr, de, hi, ru, pt, ja, it, nl) Note: Advanced features like diarization and smart_format are not supported by the LiveKit Deepgram plugin in streaming mode.
This commit is contained in:
committed by
aleb_the_flash
parent
b403ac56bf
commit
9f9cef7e2a
@@ -29,15 +29,72 @@ logger = logging.getLogger("transcriber")
|
||||
|
||||
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
||||
|
||||
# Default Deepgram STT configuration
|
||||
DEEPGRAM_STT_DEFAULTS = {
|
||||
"model": "nova-3",
|
||||
"language": "multi",
|
||||
}
|
||||
|
||||
# Supported parameters for LiveKit's deepgram.STT() in streaming mode
|
||||
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
|
||||
# detect_language is NOT supported for real-time streaming
|
||||
# Use language="multi" instead for automatic multilingual support
|
||||
DEEPGRAM_STT_SUPPORTED_PARAMS = {
|
||||
"model",
|
||||
"language",
|
||||
}
|
||||
|
||||
|
||||
def _build_deepgram_stt_kwargs():
|
||||
"""Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables.
|
||||
|
||||
Only parameters supported by LiveKit's deepgram.STT() are included.
|
||||
Unsupported parameters are logged as warnings.
|
||||
"""
|
||||
stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy()
|
||||
|
||||
# Scan environment variables for DEEPGRAM_STT_* pattern
|
||||
for key, value in os.environ.items():
|
||||
if key.startswith("DEEPGRAM_STT_"):
|
||||
# Extract parameter name and convert to lowercase
|
||||
param_name = key.replace("DEEPGRAM_STT_", "", 1).lower()
|
||||
|
||||
# Check if parameter is supported by LiveKit plugin
|
||||
if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS:
|
||||
supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS))
|
||||
logger.warning(
|
||||
f"Ignoring unsupported Deepgram STT parameter: {param_name}. "
|
||||
f"Supported parameters: {supported}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Parse value type
|
||||
value_lower = value.lower()
|
||||
if value_lower in ("true", "false"):
|
||||
# Boolean values
|
||||
stt_kwargs[param_name] = value_lower == "true"
|
||||
elif value.isdigit():
|
||||
# Integer values
|
||||
stt_kwargs[param_name] = int(value)
|
||||
else:
|
||||
# String values
|
||||
stt_kwargs[param_name] = value
|
||||
|
||||
logger.info(f"Deepgram STT configuration: {stt_kwargs}")
|
||||
return stt_kwargs
|
||||
|
||||
|
||||
class Transcriber(Agent):
|
||||
"""Create a transcription agent for a specific participant."""
|
||||
|
||||
def __init__(self, *, participant_identity: str):
|
||||
"""Init transcription agent."""
|
||||
# Build STT configuration from environment variables
|
||||
stt_kwargs = _build_deepgram_stt_kwargs()
|
||||
|
||||
super().__init__(
|
||||
instructions="not-needed",
|
||||
stt=deepgram.STT(),
|
||||
stt=deepgram.STT(**stt_kwargs),
|
||||
)
|
||||
self.participant_identity = participant_identity
|
||||
|
||||
|
||||
@@ -788,9 +788,21 @@ agents:
|
||||
## @extra agents.envVars.FROM_CONFIGMAP.configMapKeyRef.key Key within a ConfigMap when configuring env vars from a ConfigMap
|
||||
## @extra agents.envVars.FROM_SECRET.secretKeyRef.name Name of a Secret when configuring env vars from a Secret
|
||||
## @extra agents.envVars.FROM_SECRET.secretKeyRef.key Key within a Secret when configuring env vars from a Secret
|
||||
## @extra agents.envVars.DEEPGRAM_STT_MODEL Deepgram model to use for speech-to-text (default: nova-3)
|
||||
## @extra agents.envVars.DEEPGRAM_STT_LANGUAGE Language code for transcription or 'multi' for automatic multilingual support with real-time code-switching (default: multi, supports: en, es, fr, de, hi, ru, pt, ja, it, nl)
|
||||
## @skip agents.envVars
|
||||
envVars:
|
||||
<<: *commonEnvVars
|
||||
# Deepgram Speech-to-Text configuration for real-time streaming
|
||||
# Only 'model' and 'language' parameters are supported by the LiveKit plugin
|
||||
#
|
||||
# DEEPGRAM_STT_MODEL: "nova-3" # Model selection (default)
|
||||
# DEEPGRAM_STT_LANGUAGE: "multi" # Multilingual mode with auto-detection (default)
|
||||
# DEEPGRAM_STT_LANGUAGE: "fr" # Force French only
|
||||
#
|
||||
# Note: Advanced features (diarization, smart_format, punctuate, detect_language)
|
||||
# are NOT supported by the LiveKit Deepgram plugin in streaming mode.
|
||||
# Use language="multi" for automatic multilingual support (10 languages).
|
||||
|
||||
## @param agents.podAnnotations Annotations to add to the agents Pod
|
||||
podAnnotations: {}
|
||||
|
||||
Reference in New Issue
Block a user