🔧(agents) make Silero VAD optional
Allow configuring whether a VAD model runs before calling an external ASR API. Running VAD can save API calls (and costs) when no audible sound is detected, but comes with the trade-off of additional computational overhead.
This commit is contained in:
committed by
aleb_the_flash
parent
137a2c7f6f
commit
35b3bcad63
@@ -11,6 +11,7 @@ and this project adheres to
|
||||
### Added
|
||||
|
||||
- ✨(summary) add dutch and german languages
|
||||
- 🔧(agents) make Silero VAD optional
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ logger = logging.getLogger("transcriber")
|
||||
|
||||
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
||||
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
|
||||
ENABLE_SILERO_VAD = os.getenv("ENABLE_SILERO_VAD", "true").lower() == "true"
|
||||
|
||||
|
||||
def create_stt_provider():
|
||||
@@ -122,9 +123,8 @@ class MultiUserTranscriber:
|
||||
if participant.identity in self._sessions:
|
||||
return self._sessions[participant.identity]
|
||||
|
||||
session = AgentSession(
|
||||
vad=self.ctx.proc.userdata["vad"],
|
||||
)
|
||||
vad = self.ctx.proc.userdata.get("vad", None)
|
||||
session = AgentSession(vad=vad)
|
||||
room_io = RoomIO(
|
||||
agent_session=session,
|
||||
room=self.ctx.room,
|
||||
@@ -193,7 +193,8 @@ async def handle_transcriber_job_request(job_req: JobRequest) -> None:
|
||||
|
||||
def prewarm(proc: JobProcess):
|
||||
"""Preload voice activity detection model."""
|
||||
proc.userdata["vad"] = silero.VAD.load()
|
||||
if ENABLE_SILERO_VAD:
|
||||
proc.userdata["vad"] = silero.VAD.load()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -267,6 +267,7 @@ agents:
|
||||
LIVEKIT_API_KEY: {{ $key }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
ENABLE_SILERO_VAD: "false"
|
||||
|
||||
image:
|
||||
repository: localhost:5001/meet-agents
|
||||
|
||||
Reference in New Issue
Block a user