♻️(agent) simplify Deepgram config and support Kyutai
The previous attempt to make the Deepgram configuration extensible introduced unnecessary complexity for a very limited use case and made it harder to add new STT backends. Refactor to a deliberately simple and explicit design with minimal cognitive overhead. Configuration is now fully driven by environment variables and provides enough flexibility for ops to select and parameterize the STT backend.
This commit is contained in:
committed by
aleb_the_flash
parent
b466515306
commit
cff1dbf39e
@@ -8,6 +8,8 @@ and this project adheres to
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
- ✨(agent) support Kyutai client for subtitle
|
||||
|
||||
## [1.1.0] - 2025-12-22
|
||||
|
||||
### Added
|
||||
|
||||
@@ -5,6 +5,7 @@ import logging
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from lasuite.plugins import kyutai
|
||||
from livekit import api, rtc
|
||||
from livekit.agents import (
|
||||
Agent,
|
||||
@@ -28,60 +29,25 @@ load_dotenv()
|
||||
logger = logging.getLogger("transcriber")
|
||||
|
||||
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
||||
|
||||
# Default Deepgram STT configuration
|
||||
DEEPGRAM_STT_DEFAULTS = {
|
||||
"model": "nova-3",
|
||||
"language": "multi",
|
||||
}
|
||||
|
||||
# Supported parameters for LiveKit's deepgram.STT() in streaming mode
|
||||
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
|
||||
# detect_language is NOT supported for real-time streaming
|
||||
# Use language="multi" instead for automatic multilingual support
|
||||
DEEPGRAM_STT_SUPPORTED_PARAMS = {
|
||||
"model",
|
||||
"language",
|
||||
}
|
||||
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
|
||||
|
||||
|
||||
def _build_deepgram_stt_kwargs():
|
||||
"""Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables.
|
||||
def create_stt_provider():
|
||||
"""Create STT provider based on environment configuration."""
|
||||
if STT_PROVIDER == "deepgram":
|
||||
# Note: Not all Deepgram API parameters are supported by the LiveKit plugin
|
||||
# detect_language is NOT supported for real-time streaming
|
||||
# Use language="multi" instead for automatic multilingual support
|
||||
_stt_instance = deepgram.STT(
|
||||
model=os.getenv("DEEPGRAM_STT_MODEL", "nova-3"),
|
||||
language=os.getenv("DEEPGRAM_STT_LANGUAGE", "multi"),
|
||||
)
|
||||
elif STT_PROVIDER == "kyutai":
|
||||
_stt_instance = kyutai.STT(base_url=os.getenv("KYUTAI_STT_BASE_URL"))
|
||||
else:
|
||||
raise ValueError(f"Unknown STT_PROVIDER: {STT_PROVIDER}")
|
||||
|
||||
Only parameters supported by LiveKit's deepgram.STT() are included.
|
||||
Unsupported parameters are logged as warnings.
|
||||
"""
|
||||
stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy()
|
||||
|
||||
# Scan environment variables for DEEPGRAM_STT_* pattern
|
||||
for key, value in os.environ.items():
|
||||
if key.startswith("DEEPGRAM_STT_"):
|
||||
# Extract parameter name and convert to lowercase
|
||||
param_name = key.replace("DEEPGRAM_STT_", "", 1).lower()
|
||||
|
||||
# Check if parameter is supported by LiveKit plugin
|
||||
if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS:
|
||||
supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS))
|
||||
logger.warning(
|
||||
f"Ignoring unsupported Deepgram STT parameter: {param_name}. "
|
||||
f"Supported parameters: {supported}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Parse value type
|
||||
value_lower = value.lower()
|
||||
if value_lower in ("true", "false"):
|
||||
# Boolean values
|
||||
stt_kwargs[param_name] = value_lower == "true"
|
||||
elif value.isdigit():
|
||||
# Integer values
|
||||
stt_kwargs[param_name] = int(value)
|
||||
else:
|
||||
# String values
|
||||
stt_kwargs[param_name] = value
|
||||
|
||||
logger.info(f"Deepgram STT configuration: {stt_kwargs}")
|
||||
return stt_kwargs
|
||||
return _stt_instance
|
||||
|
||||
|
||||
class Transcriber(Agent):
|
||||
@@ -89,12 +55,11 @@ class Transcriber(Agent):
|
||||
|
||||
def __init__(self, *, participant_identity: str):
|
||||
"""Init transcription agent."""
|
||||
# Build STT configuration from environment variables
|
||||
stt_kwargs = _build_deepgram_stt_kwargs()
|
||||
stt = create_stt_provider()
|
||||
|
||||
super().__init__(
|
||||
instructions="not-needed",
|
||||
stt=deepgram.STT(**stt_kwargs),
|
||||
stt=stt,
|
||||
)
|
||||
self.participant_identity = participant_identity
|
||||
|
||||
|
||||
Reference in New Issue
Block a user