🔧(agents) make Silero VAD optional

Allow configuring whether a VAD model runs before calling an external ASR API.
Running VAD can save API calls (and costs) when no audible sound is detected,
but comes with the trade-off of additional computational overhead.
This commit is contained in:
lebaudantoine
2026-01-08 17:50:12 +01:00
committed by aleb_the_flash
parent 137a2c7f6f
commit 35b3bcad63
3 changed files with 7 additions and 4 deletions

View File

@@ -11,6 +11,7 @@ and this project adheres to
### Added
- ✨(summary) add dutch and german languages
- 🔧(agents) make Silero VAD optional
### Changed

View File

@@ -31,6 +31,7 @@ logger = logging.getLogger("transcriber")
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
ENABLE_SILERO_VAD = os.getenv("ENABLE_SILERO_VAD", "true").lower() == "true"
def create_stt_provider():
@@ -122,9 +123,8 @@ class MultiUserTranscriber:
if participant.identity in self._sessions:
return self._sessions[participant.identity]
session = AgentSession(
vad=self.ctx.proc.userdata["vad"],
)
vad = self.ctx.proc.userdata.get("vad", None)
session = AgentSession(vad=vad)
room_io = RoomIO(
agent_session=session,
room=self.ctx.room,
@@ -193,7 +193,8 @@ async def handle_transcriber_job_request(job_req: JobRequest) -> None:
def prewarm(proc: JobProcess):
"""Preload voice activity detection model."""
proc.userdata["vad"] = silero.VAD.load()
if ENABLE_SILERO_VAD:
proc.userdata["vad"] = silero.VAD.load()
if __name__ == "__main__":

View File

@@ -267,6 +267,7 @@ agents:
LIVEKIT_API_KEY: {{ $key }}
{{- end }}
{{- end }}
ENABLE_SILERO_VAD: "false"
image:
repository: localhost:5001/meet-agents