✨(agents) add multilingual support for real-time subtitles
Add dynamic configuration for Deepgram STT via environment variables, enabling multilingual real-time subtitles with automatic language detection. Changes: - Add DEEPGRAM_STT_* environment variables pattern for configuration - Implement _build_deepgram_stt_kwargs() to dynamically build STT parameters from environment variables - Add whitelist of supported parameters (model, language) for LiveKit Deepgram plugin - Log warnings for unsupported parameters (diarize, smart_format, etc) - Set default configuration: model=nova-3, language=multi - Document supported parameters in Helm values.yaml Configuration: - DEEPGRAM_STT_MODEL: Deepgram model (default: nova-3) - DEEPGRAM_STT_LANGUAGE: Language or 'multi' for automatic detection of 10 languages (en, es, fr, de, hi, ru, pt, ja, it, nl) Note: Advanced features like diarization and smart_format are not supported by the LiveKit Deepgram plugin in streaming mode.
This commit is contained in:
committed by
aleb_the_flash
parent
b403ac56bf
commit
9f9cef7e2a
@@ -29,15 +29,72 @@ logger = logging.getLogger("transcriber")
|
|||||||
|
|
||||||
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
||||||
|
|
||||||
|
# Default Deepgram STT configuration
|
||||||
|
DEEPGRAM_STT_DEFAULTS = {
|
||||||
|
"model": "nova-3",
|
||||||
|
"language": "multi",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Supported parameters for LiveKit's deepgram.STT() in streaming mode
|
||||||
|
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
|
||||||
|
# detect_language is NOT supported for real-time streaming
|
||||||
|
# Use language="multi" instead for automatic multilingual support
|
||||||
|
DEEPGRAM_STT_SUPPORTED_PARAMS = {
|
||||||
|
"model",
|
||||||
|
"language",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_deepgram_stt_kwargs():
|
||||||
|
"""Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables.
|
||||||
|
|
||||||
|
Only parameters supported by LiveKit's deepgram.STT() are included.
|
||||||
|
Unsupported parameters are logged as warnings.
|
||||||
|
"""
|
||||||
|
stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy()
|
||||||
|
|
||||||
|
# Scan environment variables for DEEPGRAM_STT_* pattern
|
||||||
|
for key, value in os.environ.items():
|
||||||
|
if key.startswith("DEEPGRAM_STT_"):
|
||||||
|
# Extract parameter name and convert to lowercase
|
||||||
|
param_name = key.replace("DEEPGRAM_STT_", "", 1).lower()
|
||||||
|
|
||||||
|
# Check if parameter is supported by LiveKit plugin
|
||||||
|
if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS:
|
||||||
|
supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS))
|
||||||
|
logger.warning(
|
||||||
|
f"Ignoring unsupported Deepgram STT parameter: {param_name}. "
|
||||||
|
f"Supported parameters: {supported}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Parse value type
|
||||||
|
value_lower = value.lower()
|
||||||
|
if value_lower in ("true", "false"):
|
||||||
|
# Boolean values
|
||||||
|
stt_kwargs[param_name] = value_lower == "true"
|
||||||
|
elif value.isdigit():
|
||||||
|
# Integer values
|
||||||
|
stt_kwargs[param_name] = int(value)
|
||||||
|
else:
|
||||||
|
# String values
|
||||||
|
stt_kwargs[param_name] = value
|
||||||
|
|
||||||
|
logger.info(f"Deepgram STT configuration: {stt_kwargs}")
|
||||||
|
return stt_kwargs
|
||||||
|
|
||||||
|
|
||||||
class Transcriber(Agent):
|
class Transcriber(Agent):
|
||||||
"""Create a transcription agent for a specific participant."""
|
"""Create a transcription agent for a specific participant."""
|
||||||
|
|
||||||
def __init__(self, *, participant_identity: str):
|
def __init__(self, *, participant_identity: str):
|
||||||
"""Init transcription agent."""
|
"""Init transcription agent."""
|
||||||
|
# Build STT configuration from environment variables
|
||||||
|
stt_kwargs = _build_deepgram_stt_kwargs()
|
||||||
|
|
||||||
super().__init__(
|
super().__init__(
|
||||||
instructions="not-needed",
|
instructions="not-needed",
|
||||||
stt=deepgram.STT(),
|
stt=deepgram.STT(**stt_kwargs),
|
||||||
)
|
)
|
||||||
self.participant_identity = participant_identity
|
self.participant_identity = participant_identity
|
||||||
|
|
||||||
|
|||||||
@@ -788,9 +788,21 @@ agents:
|
|||||||
## @extra agents.envVars.FROM_CONFIGMAP.configMapKeyRef.key Key within a ConfigMap when configuring env vars from a ConfigMap
|
## @extra agents.envVars.FROM_CONFIGMAP.configMapKeyRef.key Key within a ConfigMap when configuring env vars from a ConfigMap
|
||||||
## @extra agents.envVars.FROM_SECRET.secretKeyRef.name Name of a Secret when configuring env vars from a Secret
|
## @extra agents.envVars.FROM_SECRET.secretKeyRef.name Name of a Secret when configuring env vars from a Secret
|
||||||
## @extra agents.envVars.FROM_SECRET.secretKeyRef.key Key within a Secret when configuring env vars from a Secret
|
## @extra agents.envVars.FROM_SECRET.secretKeyRef.key Key within a Secret when configuring env vars from a Secret
|
||||||
|
## @extra agents.envVars.DEEPGRAM_STT_MODEL Deepgram model to use for speech-to-text (default: nova-3)
|
||||||
|
## @extra agents.envVars.DEEPGRAM_STT_LANGUAGE Language code for transcription or 'multi' for automatic multilingual support with real-time code-switching (default: multi, supports: en, es, fr, de, hi, ru, pt, ja, it, nl)
|
||||||
## @skip agents.envVars
|
## @skip agents.envVars
|
||||||
envVars:
|
envVars:
|
||||||
<<: *commonEnvVars
|
<<: *commonEnvVars
|
||||||
|
# Deepgram Speech-to-Text configuration for real-time streaming
|
||||||
|
# Only 'model' and 'language' parameters are supported by the LiveKit plugin
|
||||||
|
#
|
||||||
|
# DEEPGRAM_STT_MODEL: "nova-3" # Model selection (default)
|
||||||
|
# DEEPGRAM_STT_LANGUAGE: "multi" # Multilingual mode with auto-detection (default)
|
||||||
|
# DEEPGRAM_STT_LANGUAGE: "fr" # Force French only
|
||||||
|
#
|
||||||
|
# Note: Advanced features (diarization, smart_format, punctuate, detect_language)
|
||||||
|
# are NOT supported by the LiveKit Deepgram plugin in streaming mode.
|
||||||
|
# Use language="multi" for automatic multilingual support (10 languages).
|
||||||
|
|
||||||
## @param agents.podAnnotations Annotations to add to the agents Pod
|
## @param agents.podAnnotations Annotations to add to the agents Pod
|
||||||
podAnnotations: {}
|
podAnnotations: {}
|
||||||
|
|||||||
Reference in New Issue
Block a user