🔧(agents) make Silero VAD optional

Allow configuring whether a VAD model runs before calling an external ASR API.
Running VAD can save API calls (and costs) when no audible sound is detected,
but comes with the trade-off of additional computational overhead.
This commit is contained in:
lebaudantoine
2026-01-08 17:50:12 +01:00
committed by aleb_the_flash
parent 137a2c7f6f
commit 35b3bcad63
3 changed files with 7 additions and 4 deletions

View File

@@ -11,6 +11,7 @@ and this project adheres to
### Added ### Added
- ✨(summary) add dutch and german languages - ✨(summary) add dutch and german languages
- 🔧(agents) make Silero VAD optional
### Changed ### Changed

View File

@@ -31,6 +31,7 @@ logger = logging.getLogger("transcriber")
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber") TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram") STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
ENABLE_SILERO_VAD = os.getenv("ENABLE_SILERO_VAD", "true").lower() == "true"
def create_stt_provider(): def create_stt_provider():
@@ -122,9 +123,8 @@ class MultiUserTranscriber:
if participant.identity in self._sessions: if participant.identity in self._sessions:
return self._sessions[participant.identity] return self._sessions[participant.identity]
session = AgentSession( vad = self.ctx.proc.userdata.get("vad", None)
vad=self.ctx.proc.userdata["vad"], session = AgentSession(vad=vad)
)
room_io = RoomIO( room_io = RoomIO(
agent_session=session, agent_session=session,
room=self.ctx.room, room=self.ctx.room,
@@ -193,7 +193,8 @@ async def handle_transcriber_job_request(job_req: JobRequest) -> None:
def prewarm(proc: JobProcess): def prewarm(proc: JobProcess):
"""Preload voice activity detection model.""" """Preload voice activity detection model."""
proc.userdata["vad"] = silero.VAD.load() if ENABLE_SILERO_VAD:
proc.userdata["vad"] = silero.VAD.load()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -267,6 +267,7 @@ agents:
LIVEKIT_API_KEY: {{ $key }} LIVEKIT_API_KEY: {{ $key }}
{{- end }} {{- end }}
{{- end }} {{- end }}
ENABLE_SILERO_VAD: "false"
image: image:
repository: localhost:5001/meet-agents repository: localhost:5001/meet-agents