(backend) add start-subtitle endpoint

Allow any user, anonymous or authenticated, to start subtitling
in a room only if they are an active participant of it.

Subtitling a room consists of starting the multi-user transcriber agent.
This agent forwards all participants' audio to an STT server and returns
transcription segments for any active voice to the room.

User roles in the backend room system cannot be used
to determine subtitle permissions.

The transcriber agent can be triggered multiple times but will only join a
room once. Unicity is managed by the agent itself.
Any user with a valid LiveKit token can initiate subtitles. Feature flag
logic is implemented on the frontend. The frontend ensures the "start
subtitle" action is only available to users who should see it. The backend
does not enforce feature flags in this version.

Authentication in our system does not imply access to a room. The only
valid proof of access is the LiveKit API token issued by the backend.
Security consideration: A LiveKit API token is valid for 6 hours and
cannot be revoked at the end of a meeting. It is important to verify
that the token was issued for the correct room.

Calls to the agent dispatch endpoint must be server-initiated. The backend
proxies these calls, as clients cannot securely contact the agent dispatch
endpoint directly (per LiveKit documentation).

Room ID is passed as a query parameter. There is currently no validation
ensuring that the room exists prior to agent dispatch.
TODO: implement validation or error handling for non-existent rooms.

The backend does not forward LiveKit tokens to the agent. Default API
rate limiting is applied to prevent abuse.
This commit is contained in:
lebaudantoine
2025-08-19 15:29:19 +02:00
committed by aleb_the_flash
parent 49ee46438b
commit f48dd5cea1
9 changed files with 422 additions and 0 deletions

View File

@@ -50,6 +50,7 @@ def get_frontend_configuration(request):
else None,
"default_country": settings.ROOM_TELEPHONY_DEFAULT_COUNTRY,
},
"subtitle": {"enabled": settings.ROOM_SUBTITLE_ENABLED},
"livekit": {
"url": settings.LIVEKIT_CONFIGURATION["url"],
"force_wss_protocol": settings.LIVEKIT_FORCE_WSS_PROTOCOL,

View File

@@ -119,3 +119,22 @@ class IsStorageEventEnabled(permissions.BasePermission):
def has_permission(self, request, view):
"""Determine if access is allowed based on settings."""
return settings.RECORDING_STORAGE_EVENT_ENABLE
class IsSubtitleEnabled(permissions.BasePermission):
"""Check if the subtitle feature is enabled."""
message = "Access denied, subtitles are disabled."
def has_permission(self, request, view):
"""Determine if access is allowed based on settings."""
return settings.ROOM_SUBTITLE_ENABLED
class HasLiveKitRoomAccess(permissions.BasePermission):
"""Check if authenticated user's LiveKit token is for the specific room."""
def has_object_permission(self, request, view, obj):
if not request.auth or not hasattr(request.auth, "video"):
return False
return request.auth.video.room == str(obj.id)

View File

@@ -51,7 +51,9 @@ from core.services.lobby import (
LobbyService,
)
from core.services.room_creation import RoomCreation
from core.services.subtitle import SubtitleException, SubtitleService
from ..authentication.livekit import LiveKitTokenAuthentication
from . import permissions, serializers
# pylint: disable=too-many-ancestors
@@ -530,6 +532,37 @@ class RoomViewSet(
status=drf_status.HTTP_200_OK,
)
@decorators.action(
detail=True,
methods=["post"],
url_path="start-subtitle",
permission_classes=[
permissions.IsSubtitleEnabled,
permissions.HasLiveKitRoomAccess,
],
authentication_classes=[LiveKitTokenAuthentication],
)
def start_subtitle(self, request, pk=None): # pylint: disable=unused-argument
"""Start realtime transcription for the room.
Requires valid LiveKit token for room authorization.
Anonymous users can start subtitles if they have room access tokens.
"""
room = self.get_object()
try:
SubtitleService().start_subtitle(room)
except SubtitleException:
return drf_response.Response(
{"error": f"Subtitles failed to start for room {room.slug}"},
status=drf_status.HTTP_500_INTERNAL_SERVER_ERROR,
)
return drf_response.Response(
{"status": "success"}, status=drf_status.HTTP_200_OK
)
class ResourceAccessViewSet(
mixins.CreateModelMixin,

View File

@@ -0,0 +1,42 @@
"""Authentication using LiveKit token for the Meet core app."""
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.auth.models import AnonymousUser
from livekit.api import TokenVerifier
from rest_framework import authentication, exceptions
UserModel = get_user_model()
class LiveKitTokenAuthentication(authentication.BaseAuthentication):
"""Authenticate using LiveKit token and load the associated Django user."""
def authenticate(self, request):
token = request.data.get("token")
if not token:
return None # No authentication attempted
try:
verifier = TokenVerifier(
api_key=settings.LIVEKIT_CONFIGURATION["api_key"],
api_secret=settings.LIVEKIT_CONFIGURATION["api_secret"],
)
claims = verifier.verify(token)
user_id = claims.identity
if not user_id:
raise exceptions.AuthenticationFailed("Token missing user identity")
try:
user = UserModel.objects.get(id=user_id)
except UserModel.DoesNotExist:
user = AnonymousUser()
return (user, claims)
except Exception as e:
raise exceptions.AuthenticationFailed(
f"Invalid LiveKit token: {str(e)}"
) from e

View File

@@ -0,0 +1,47 @@
"""Service for managing subtitle agents in LiveKit rooms."""
from logging import getLogger
from django.conf import settings
from asgiref.sync import async_to_sync
from livekit.protocol.agent_dispatch import CreateAgentDispatchRequest
from core import utils
logger = getLogger(__name__)
class SubtitleException(Exception):
"""Exception raised when subtitle operations fail."""
class SubtitleService:
"""Service for managing subtitle agents in LiveKit rooms."""
@async_to_sync
async def start_subtitle(self, room):
"""Start subtitle agent for the specified room."""
lkapi = utils.create_livekit_client()
try:
# Transcriber agent prevents duplicate subtitle agents per room
# No error is raised if agent already exists
await lkapi.agent_dispatch.create_dispatch(
CreateAgentDispatchRequest(
agent_name=settings.ROOM_SUBTITLE_AGENT_NAME, room=str(room.id)
)
)
except Exception as e:
logger.exception("Failed to create agent dispatch for room %s", room.id)
raise SubtitleException("Failed to create subtitle agent") from e
finally:
await lkapi.aclose()
@async_to_sync
async def stop_subtitle(self, room) -> None:
"""Stop subtitle agent for the specified room."""
raise NotImplementedError("Subtitle agent stopping not yet implemented")

View File

@@ -0,0 +1,221 @@
"""
Test rooms API endpoints in the Meet core app: start subtitle.
"""
# pylint: disable=W0621
import uuid
from unittest import mock
from django.conf import settings
import pytest
from livekit.api import AccessToken, TwirpError, VideoGrants
from rest_framework.test import APIClient
from ...factories import RoomFactory, UserFactory
pytestmark = pytest.mark.django_db
@pytest.fixture
def mock_room_id() -> str:
"""Mock room's id."""
return "d2aeb774-1ecd-4d73-a3ac-3d3530cad7ff"
@pytest.fixture
def mock_livekit_token(mock_room_id):
"""Mock LiveKit JWT token."""
video_grants = VideoGrants(
room=mock_room_id,
room_join=True,
room_admin=True,
can_update_own_metadata=True,
can_publish_sources=[
"camera",
"microphone",
"screen_share",
"screen_share_audio",
],
)
token = (
AccessToken(
api_key=settings.LIVEKIT_CONFIGURATION["api_key"],
api_secret=settings.LIVEKIT_CONFIGURATION["api_secret"],
)
.with_grants(video_grants)
.with_identity(str(uuid.uuid4()))
)
return token.to_jwt()
@pytest.fixture
def mock_livekit_client():
"""Mock LiveKit API client."""
with mock.patch("core.utils.create_livekit_client") as mock_create:
mock_client = mock.AsyncMock()
mock_create.return_value = mock_client
yield mock_client
def test_start_subtitle_missing_token_anonymous(settings):
"""Test that anonymous users cannot start subtitles without a valid LiveKit token."""
settings.ROOM_SUBTITLE_ENABLED = True
room = RoomFactory()
client = APIClient()
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
)
assert response.status_code == 403
assert response.json() == {
"detail": "Authentication credentials were not provided."
}
def test_start_subtitle_missing_token_authenticated(settings):
"""Test that authenticated users still need a valid LiveKit token to start subtitles."""
settings.ROOM_SUBTITLE_ENABLED = True
room = RoomFactory()
user = UserFactory()
client = APIClient()
client.force_login(user)
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
)
assert response.status_code == 403
assert response.json() == {
"detail": "Authentication credentials were not provided."
}
def test_start_subtitle_invalid_token():
"""Test that malformed or invalid LiveKit tokens are rejected."""
room = RoomFactory()
user = UserFactory()
client = APIClient()
client.force_login(user)
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/", {"token": "invalid-token"}
)
assert response.status_code == 403
assert response.json() == {"detail": "Invalid LiveKit token: Not enough segments"}
def test_start_subtitle_disabled_by_default(mock_livekit_token):
"""Test that subtitle functionality is disabled when feature flag is off."""
room = RoomFactory()
user = UserFactory()
client = APIClient()
client.force_login(user)
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
{"token": mock_livekit_token},
)
assert response.status_code == 403
assert response.json() == {"detail": "Access denied, subtitles are disabled."}
def test_start_subtitle_valid_token(
settings, mock_livekit_client, mock_livekit_token, mock_room_id
):
"""Test successful subtitle initiation with valid token and enabled feature."""
settings.ROOM_SUBTITLE_ENABLED = True
room = RoomFactory(id=mock_room_id)
client = APIClient()
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
{"token": mock_livekit_token},
)
assert response.status_code == 200
assert response.json() == {"status": "success"}
mock_livekit_client.agent_dispatch.create_dispatch.assert_called_once()
call_args = mock_livekit_client.agent_dispatch.create_dispatch.call_args[0][0]
assert call_args.agent_name == "multi-user-transcriber"
assert call_args.room == "d2aeb774-1ecd-4d73-a3ac-3d3530cad7ff"
def test_start_subtitle_twirp_error(
settings, mock_livekit_client, mock_livekit_token, mock_room_id
):
"""Test handling of LiveKit service errors during subtitle initiation."""
settings.ROOM_SUBTITLE_ENABLED = True
room = RoomFactory(id=mock_room_id)
client = APIClient()
mock_livekit_client.agent_dispatch.create_dispatch.side_effect = TwirpError(
msg="Internal server error", code=500, status=500
)
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
{"token": mock_livekit_token},
)
assert response.status_code == 500
assert response.json() == {
"error": f"Subtitles failed to start for room {room.slug}"
}
def test_start_subtitle_wrong_room(settings, mock_livekit_token):
"""Test that tokens are validated against the correct room ID."""
settings.ROOM_SUBTITLE_ENABLED = True
room = RoomFactory()
client = APIClient()
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
{"token": mock_livekit_token},
)
assert response.status_code == 403
assert response.json() == {
"detail": "You do not have permission to perform this action."
}
def test_start_subtitle_wrong_signature(settings, mock_livekit_token):
"""Test that tokens signed with incorrect signature are rejected."""
settings.ROOM_SUBTITLE_ENABLED = True
settings.LIVEKIT_CONFIGURATION["api_secret"] = "wrong-secret"
room = RoomFactory()
client = APIClient()
response = client.post(
f"/api/v1.0/rooms/{room.id}/start-subtitle/",
{"token": mock_livekit_token},
)
assert response.status_code == 403
assert response.json() == {
"detail": "Invalid LiveKit token: Signature verification failed"
}

View File

@@ -0,0 +1,48 @@
"""
Test subtitle service.
"""
# pylint: disable=W0621
from unittest import mock
import pytest
from core.factories import RoomFactory
from core.services.subtitle import SubtitleService
pytestmark = pytest.mark.django_db
@pytest.fixture
def mock_livekit_client():
"""Mock LiveKit API client."""
with mock.patch("core.utils.create_livekit_client") as mock_create:
mock_client = mock.AsyncMock()
mock_create.return_value = mock_client
yield mock_client
def test_start_subtitle_settings(mock_livekit_client, settings):
"""Test that start_subtitle uses the configured agent name from Django settings."""
settings.ROOM_SUBTITLE_AGENT_NAME = "fake-subtitle-agent-name"
room = RoomFactory(name="my room")
SubtitleService().start_subtitle(room)
mock_livekit_client.agent_dispatch.create_dispatch.assert_called_once()
call_args = mock_livekit_client.agent_dispatch.create_dispatch.call_args[0][0]
assert call_args.agent_name == "fake-subtitle-agent-name"
assert call_args.room == str(room.id)
def test_stop_subtitle_not_implemented():
"""Test that stop_subtitle raises NotImplementedError."""
room = RoomFactory(name="my room")
with pytest.raises(
NotImplementedError, match="Subtitle agent stopping not yet implemented"
):
SubtitleService().stop_subtitle(room)

View File

@@ -644,6 +644,16 @@ class Base(Configuration):
environ_prefix=None,
)
# Subtitles settings
ROOM_SUBTITLE_ENABLED = values.BooleanValue(
False, environ_name="ROOM_SUBTITLE_ENABLED", environ_prefix=None
)
ROOM_SUBTITLE_AGENT_NAME = values.Value(
"multi-user-transcriber",
environ_name="ROOM_SUBTITLE_AGENT_NAME",
environ_prefix=None,
)
# pylint: disable=invalid-name
@property
def ENVIRONMENT(self):

View File

@@ -72,6 +72,7 @@ backend:
ROOM_TELEPHONY_DEFAULT_COUNTRY: 'FR'
ROOM_TELEPHONY_PHONE_NUMBER: '+33901020304'
SSL_CERT_FILE: /usr/local/lib/python3.13/site-packages/certifi/cacert.pem
ROOM_SUBTITLE_ENABLED: True
migrate: