🔧(agents) make Silero VAD optional
Allow configuring whether a VAD model runs before calling an external ASR API. Running VAD can save API calls (and costs) when no audible sound is detected, but comes with the trade-off of additional computational overhead.
This commit is contained in:
committed by
aleb_the_flash
parent
137a2c7f6f
commit
35b3bcad63
@@ -11,6 +11,7 @@ and this project adheres to
|
|||||||
### Added
|
### Added
|
||||||
|
|
||||||
- ✨(summary) add dutch and german languages
|
- ✨(summary) add dutch and german languages
|
||||||
|
- 🔧(agents) make Silero VAD optional
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ logger = logging.getLogger("transcriber")
|
|||||||
|
|
||||||
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
TRANSCRIBER_AGENT_NAME = os.getenv("TRANSCRIBER_AGENT_NAME", "multi-user-transcriber")
|
||||||
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
|
STT_PROVIDER = os.getenv("STT_PROVIDER", "deepgram")
|
||||||
|
ENABLE_SILERO_VAD = os.getenv("ENABLE_SILERO_VAD", "true").lower() == "true"
|
||||||
|
|
||||||
|
|
||||||
def create_stt_provider():
|
def create_stt_provider():
|
||||||
@@ -122,9 +123,8 @@ class MultiUserTranscriber:
|
|||||||
if participant.identity in self._sessions:
|
if participant.identity in self._sessions:
|
||||||
return self._sessions[participant.identity]
|
return self._sessions[participant.identity]
|
||||||
|
|
||||||
session = AgentSession(
|
vad = self.ctx.proc.userdata.get("vad", None)
|
||||||
vad=self.ctx.proc.userdata["vad"],
|
session = AgentSession(vad=vad)
|
||||||
)
|
|
||||||
room_io = RoomIO(
|
room_io = RoomIO(
|
||||||
agent_session=session,
|
agent_session=session,
|
||||||
room=self.ctx.room,
|
room=self.ctx.room,
|
||||||
@@ -193,7 +193,8 @@ async def handle_transcriber_job_request(job_req: JobRequest) -> None:
|
|||||||
|
|
||||||
def prewarm(proc: JobProcess):
|
def prewarm(proc: JobProcess):
|
||||||
"""Preload voice activity detection model."""
|
"""Preload voice activity detection model."""
|
||||||
proc.userdata["vad"] = silero.VAD.load()
|
if ENABLE_SILERO_VAD:
|
||||||
|
proc.userdata["vad"] = silero.VAD.load()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -267,6 +267,7 @@ agents:
|
|||||||
LIVEKIT_API_KEY: {{ $key }}
|
LIVEKIT_API_KEY: {{ $key }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
ENABLE_SILERO_VAD: "false"
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: localhost:5001/meet-agents
|
repository: localhost:5001/meet-agents
|
||||||
|
|||||||
Reference in New Issue
Block a user