♻️(agent) simplify Deepgram config and support Kyutai

The previous attempt to make the Deepgram configuration extensible
introduced unnecessary complexity for a very limited use case and
made it harder to add new STT backends.

Refactor to a deliberately simple and explicit design with minimal
cognitive overhead. Configuration is now fully driven by environment
variables and provides enough flexibility for ops to select and
parameterize the STT backend.
This commit is contained in:
lebaudantoine
2025-12-26 17:37:05 +01:00
committed by aleb_the_flash
parent b466515306
commit cff1dbf39e
2 changed files with 21 additions and 54 deletions

View File

@@ -8,6 +8,8 @@ and this project adheres to
## [Unreleased]
- ✨(agent) support Kyutai client for subtitle
## [1.1.0] - 2025-12-22
### Added

View File

@@ -5,6 +5,7 @@ import logging
import os
from dotenv import load_dotenv
from lasuite.plugins import kyutai
from livekit import api, rtc
from livekit.agents import (
Agent,
@@ -28,60 +29,25 @@ load_dotenv()
logger = logging.getLogger("transcriber")
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
# Default Deepgram STT configuration
DEEPGRAM_STT_DEFAULTS = {
"model": "nova-3",
"language": "multi",
}
# Supported parameters for LiveKit's deepgram.STT() in streaming mode
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
# detect_language is NOT supported for real-time streaming
# Use language="multi" instead for automatic multilingual support
DEEPGRAM_STT_SUPPORTED_PARAMS = {
"model",
"language",
}
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
def _build_deepgram_stt_kwargs():
"""Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables.
def create_stt_provider():
"""Create STT provider based on environment configuration."""
if STT_PROVIDER == "deepgram":
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
# detect_language is NOT supported for real-time streaming
# Use language="multi" instead for automatic multilingual support
_stt_instance = deepgram.STT(
model=os.getenv("DEEPGRAM_STT_MODEL", "nova-3"),
language=os.getenv("DEEPGRAM_STT_LANGUAGE", "multi"),
)
elif STT_PROVIDER == "kyutai":
_stt_instance = kyutai.STT(base_url=os.getenv("KYUTAI_STT_BASE_URL"))
else:
raise ValueError(f"Unknown STT_PROVIDER: {STT_PROVIDER}")
Only parameters supported by LiveKit's deepgram.STT() are included.
Unsupported parameters are logged as warnings.
"""
stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy()
# Scan environment variables for DEEPGRAM_STT_* pattern
for key, value in os.environ.items():
if key.startswith("DEEPGRAM_STT_"):
# Extract parameter name and convert to lowercase
param_name = key.replace("DEEPGRAM_STT_", "", 1).lower()
# Check if parameter is supported by LiveKit plugin
if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS:
supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS))
logger.warning(
f"Ignoring unsupported Deepgram STT parameter: {param_name}. "
f"Supported parameters: {supported}"
)
continue
# Parse value type
value_lower = value.lower()
if value_lower in ("true", "false"):
# Boolean values
stt_kwargs[param_name] = value_lower == "true"
elif value.isdigit():
# Integer values
stt_kwargs[param_name] = int(value)
else:
# String values
stt_kwargs[param_name] = value
logger.info(f"Deepgram STT configuration: {stt_kwargs}")
return stt_kwargs
return _stt_instance
class Transcriber(Agent):
@@ -89,12 +55,11 @@ class Transcriber(Agent):
def __init__(self, *, participant_identity: str):
"""Init transcription agent."""
# Build STT configuration from environment variables
stt_kwargs = _build_deepgram_stt_kwargs()
stt = create_stt_provider()
super().__init__(
instructions="not-needed",
stt=deepgram.STT(**stt_kwargs),
stt=stt,
)
self.participant_identity = participant_identity