From cff1dbf39eb5372f197331a43e146554b921f2e2 Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Fri, 26 Dec 2025 17:37:05 +0100 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F(agent)=20simplify=20Deepgram?= =?UTF-8?q?=20config=20and=20support=20Kyutai?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous attempt to make the Deepgram configuration extensible introduced unnecessary complexity for a very limited use case and made it harder to add new STT backends. Refactor to a deliberately simple and explicit design with minimal cognitive overhead. Configuration is now fully driven by environment variables and provides enough flexibility for ops to select and parameterize the STT backend. --- CHANGELOG.md | 2 + src/agents/multi-user-transcriber.py | 73 ++++++++-------------------- 2 files changed, 21 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 010d11a0..32bd83ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to ## [Unreleased] +- ✨(agent) support Kyutai client for subtitle + ## [1.1.0] - 2025-12-22 ### Added diff --git a/src/agents/multi-user-transcriber.py b/src/agents/multi-user-transcriber.py index 9b6c3521..d13e562b 100644 --- a/src/agents/multi-user-transcriber.py +++ b/src/agents/multi-user-transcriber.py @@ -5,6 +5,7 @@ import logging import os from dotenv import load_dotenv +from lasuite.plugins import kyutai from livekit import api, rtc from livekit.agents import ( Agent, @@ -28,60 +29,25 @@ load_dotenv() logger = logging.getLogger("transcriber") TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber") - -# Default Deepgram STT configuration -DEEPGRAM_STT_DEFAULTS = { - "model": "nova-3", - "language": "multi", -} - -# Supported parameters for LiveKit's deepgram.STT() in streaming mode -# Note: Not all Deepgram API parameters are supported by the LiveKit plugin -# detect_language is NOT supported for real-time streaming -# Use language="multi" instead for automatic multilingual support -DEEPGRAM_STT_SUPPORTED_PARAMS = { - "model", - "language", -} +STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram") -def _build_deepgram_stt_kwargs(): - """Build Deepgram STT kwargs from DEEPGRAM_STT_* environment variables. +def create_stt_provider(): + """Create STT provider based on environment configuration.""" + if STT_PROVIDER == "deepgram": + # Note: Not all Deepgram API parameters are supported by the LiveKit plugin + # detect_language is NOT supported for real-time streaming + # Use language="multi" instead for automatic multilingual support + _stt_instance = deepgram.STT( + model=os.getenv("DEEPGRAM_STT_MODEL", "nova-3"), + language=os.getenv("DEEPGRAM_STT_LANGUAGE", "multi"), + ) + elif STT_PROVIDER == "kyutai": + _stt_instance = kyutai.STT(base_url=os.getenv("KYUTAI_STT_BASE_URL")) + else: + raise ValueError(f"Unknown STT_PROVIDER: {STT_PROVIDER}") - Only parameters supported by LiveKit's deepgram.STT() are included. - Unsupported parameters are logged as warnings. - """ - stt_kwargs = DEEPGRAM_STT_DEFAULTS.copy() - - # Scan environment variables for DEEPGRAM_STT_* pattern - for key, value in os.environ.items(): - if key.startswith("DEEPGRAM_STT_"): - # Extract parameter name and convert to lowercase - param_name = key.replace("DEEPGRAM_STT_", "", 1).lower() - - # Check if parameter is supported by LiveKit plugin - if param_name not in DEEPGRAM_STT_SUPPORTED_PARAMS: - supported = ", ".join(sorted(DEEPGRAM_STT_SUPPORTED_PARAMS)) - logger.warning( - f"Ignoring unsupported Deepgram STT parameter: {param_name}. " - f"Supported parameters: {supported}" - ) - continue - - # Parse value type - value_lower = value.lower() - if value_lower in ("true", "false"): - # Boolean values - stt_kwargs[param_name] = value_lower == "true" - elif value.isdigit(): - # Integer values - stt_kwargs[param_name] = int(value) - else: - # String values - stt_kwargs[param_name] = value - - logger.info(f"Deepgram STT configuration: {stt_kwargs}") - return stt_kwargs + return _stt_instance class Transcriber(Agent): @@ -89,12 +55,11 @@ class Transcriber(Agent): def __init__(self, *, participant_identity: str): """Init transcription agent.""" - # Build STT configuration from environment variables - stt_kwargs = _build_deepgram_stt_kwargs() + stt = create_stt_provider() super().__init__( instructions="not-needed", - stt=deepgram.STT(**stt_kwargs), + stt=stt, ) self.participant_identity = participant_identity