(backend) introduce general recording worker concepts

Introducing a new worker service architecture. Sorry for the long commit.
This design adheres to several key principles, primarily the  Single
Responsibility Principle. Dependency Injection and composition are
prioritized over inheritance, enhancing modularity and maintainability.

Interactions between the backend and external workers are encapsulated in
classes implementing a common `WorkerService` interface. I chose Protocol
over an abstract class for agility, aligning closely with static typing
without requiring inheritance. Each `WorkerService` implementation can
independently manage recordings according to its specific requirements.
This flexibility ensures that adding a new worker service, such as for
LiveKit, can be done without any change to existing components.

Configuration management is centralized in a single `WorkerServiceConfig`
class, which loads and provides all settings for different worker
implementations, keeping configurations organized and extensible. The worker
service class itself handles accessing relevant configurations as needed,
simplifying the configuration process.

A basic dictionary in Django settings acts as a factory, responsible for
instantiating the correct worker service based on the client's request mode.
This approach aligns with Django development conventions, emphasizing
simplicity. While a full factory class with a builder pattern could provide
future flexibility, YAGNI (You Aren't Gonna Need It) suggests deferring
such complexity until it’s necessary.

At the core of this design is the worker mediator, which decouples worker
service implementations from the Django ORM and manages database state
according to worker state. The mediator is purposefully limited in
responsibility, handling only what’s essential. It doesn’t instantiate
worker services directly; instead, services are injected via composition,
allowing the mediator to manage any object conforming to the `WorkerService`
interface. This setup preserves flexibility and maintains a clear
separation of responsibilities. The factory create worker services,
the mediator runs it.

(sorry for this long commit)
This commit is contained in:
lebaudantoine
2024-11-07 18:56:27 +01:00
committed by aleb_the_flash
parent 7278613b20
commit f6f1222f47
12 changed files with 1046 additions and 0 deletions

View File

View File

@@ -0,0 +1 @@
"""Meet worker services classes and exceptions."""

View File

@@ -0,0 +1,21 @@
"""Recording and worker services specific exceptions."""
class WorkerRequestError(Exception):
"""Raised when there is an issue with the worker request"""
class WorkerConnectionError(Exception):
"""Raised when there is an issue connecting to the worker."""
class WorkerResponseError(Exception):
"""Raised when the worker's response is not as expected."""
class RecordingStartError(Exception):
"""Raised when there is an error starting the recording."""
class RecordingStopError(Exception):
"""Raised when there is an error stopping the recording."""

View File

@@ -0,0 +1,75 @@
"""Factory, configurations and Protocol to create worker services"""
import logging
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, ClassVar, Dict, Optional, Protocol, Type
from django.conf import settings
from django.utils.module_loading import import_string
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class WorkerServiceConfig:
"""Declare Worker Service common configurations"""
output_folder: str
server_configurations: Dict[str, Any]
verify_ssl: Optional[bool]
bucket_args: Optional[dict]
@classmethod
@lru_cache
def from_settings(cls) -> "WorkerServiceConfig":
"""Load configuration from Django settings with caching for efficiency."""
logger.debug("Loading WorkerServiceConfig from settings.")
return cls(
output_folder=settings.RECORDING_OUTPUT_FOLDER,
server_configurations=settings.LIVEKIT_CONFIGURATION,
verify_ssl=settings.RECORDING_VERIFY_SSL,
bucket_args={
"endpoint": settings.AWS_S3_ENDPOINT_URL,
"access_key": settings.AWS_S3_ACCESS_KEY_ID,
"secret": settings.AWS_S3_SECRET_ACCESS_KEY,
"region": settings.AWS_S3_REGION_NAME,
"bucket": settings.AWS_STORAGE_BUCKET_NAME,
"force_path_style": True,
},
)
class WorkerService(Protocol):
"""Define the interface for interacting with a worker service."""
hrid: ClassVar[str]
def __init__(self, config: WorkerServiceConfig):
"""Initialize the service with the given configuration."""
def start(self, room_id: str, recording_id: str) -> str:
"""Start a recording for a specified room."""
def stop(self, worker_id: str) -> str:
"""Stop recording for a specified worker."""
def get_worker_service(mode: str) -> WorkerService:
"""Instantiate a worker service by its mode."""
worker_registry: Dict[str, str] = settings.RECORDING_WORKER_CLASSES
try:
worker_class_path = worker_registry[mode]
except KeyError as e:
raise ValueError(
f"Recording mode '{mode}' not found in RECORDING_WORKER_CLASSES. "
f"Available modes: {list(worker_registry.keys())}"
) from e
worker_class: Type[WorkerService] = import_string(worker_class_path)
config = WorkerServiceConfig.from_settings()
return worker_class(config=config)

View File

@@ -0,0 +1,100 @@
"""Mediator between the worker service and recording instances in the Django ORM."""
import logging
from core.models import Recording, RecordingStatusChoices
from .exceptions import (
RecordingStartError,
RecordingStopError,
WorkerConnectionError,
WorkerRequestError,
WorkerResponseError,
)
from .factories import WorkerService
logger = logging.getLogger(__name__)
class WorkerServiceMediator:
"""Mediate interactions between a worker service and a recording instance.
A mediator class that decouples the worker from Django ORM, handles recording updates
based on worker status, and transforms worker errors into user-friendly exceptions.
Implements Mediator pattern.
"""
def __init__(self, worker_service: WorkerService):
"""Initialize the WorkerServiceMediator with the provided worker service."""
self._worker_service = worker_service
def start(self, recording: Recording):
"""Start the recording process using the worker service.
If the operation is successful, the recording's status will
transition from INITIATED to ACTIVE, else to FAILED_TO_START to keep track of errors.
Args:
recording (Recording): The recording instance to start.
Raises:
RecordingStartError: If there is an error starting the recording.
"""
# FIXME - no manipulations of room_name should be required
room_name = f"{recording.room.id!s}".replace("-", "")
if recording.status != RecordingStatusChoices.INITIATED:
logger.error("Cannot start recording in %s status.", recording.status)
raise RecordingStartError()
try:
worker_id = self._worker_service.start(room_name, recording.id)
except (WorkerRequestError, WorkerConnectionError, WorkerResponseError) as e:
logger.exception(
"Failed to start recording for room %s: %s", recording.room.slug, e
)
recording.status = RecordingStatusChoices.FAILED_TO_START
raise RecordingStartError() from e
else:
recording.worker_id = worker_id
recording.status = RecordingStatusChoices.ACTIVE
finally:
recording.save()
logger.info(
"Worker started for room %s (worker ID: %s)",
recording.room,
recording.worker_id,
)
def stop(self, recording: Recording):
"""Stop the recording process using the worker service.
If the operation is successful, the recording's status will transition
from ACTIVE to STOPPED, else to FAILED_TO_STOP to keep track of errors.
Args:
recording (Recording): The recording instance to stop.
Raises:
RecordingStopError: If there is an error stopping the recording.
"""
if recording.status != RecordingStatusChoices.ACTIVE:
logger.error("Cannot stop recording in %s status.", recording.status)
raise RecordingStopError()
try:
response = self._worker_service.stop(worker_id=recording.worker_id)
except (WorkerConnectionError, WorkerResponseError) as e:
logger.exception(
"Failed to stop recording for room %s: %s", recording.room.slug, e
)
recording.status = RecordingStatusChoices.FAILED_TO_STOP
raise RecordingStopError() from e
else:
recording.status = RecordingStatusChoices[response]
finally:
recording.save()
logger.info("Worker stopped for room %s", recording.room)

View File

@@ -0,0 +1,140 @@
"""Worker services in charge of recording a room."""
# pylint: disable=no-member
import aiohttp
from asgiref.sync import async_to_sync
from livekit import api as livekit_api
from livekit.api.egress_service import EgressService
from .exceptions import WorkerConnectionError, WorkerResponseError
from .factories import WorkerServiceConfig
class BaseEgressService:
"""Base egress defining common methods to manage and interact with LiveKit egress processes."""
def __init__(self, config: WorkerServiceConfig):
self._config = config
self._s3 = livekit_api.S3Upload(**config.bucket_args)
def _get_filepath(self, filename: str, extension: str) -> str:
"""Construct the file path for a given filename and extension.
Unsecure method, doesn't handle paths robustly and securely.
"""
return f"{self._config.output_folder}/{filename}.{extension}"
@async_to_sync
async def _handle_request(self, request, method_name: str):
"""Handle making a request to the LiveKit API and returns the response."""
# Use HTTP connector for local development with Tilt,
# where cluster communications are unsecure
connector = aiohttp.TCPConnector(ssl=self._config.verify_ssl)
async with aiohttp.ClientSession(connector=connector) as session:
client = EgressService(session, **self._config.server_configurations)
method = getattr(client, method_name)
try:
response = await method(request)
except livekit_api.TwirpError as e:
raise WorkerConnectionError(
f"LiveKit client connection error, {e.message}."
) from e
return response
def stop(self, worker_id: str) -> str:
"""Stop an ongoing egress worker.
The StopEgressRequest is shared among all types of egress,
so a single implementation in the base class should be sufficient.
"""
request = livekit_api.StopEgressRequest(
egress_id=worker_id,
)
response = self._handle_request(request, "stop_egress")
if not response.status:
raise WorkerResponseError(
"LiveKit response is missing the recording status."
)
# To avoid exposing EgressStatus values and coupling with LiveKit outside of this class,
# the response status is mapped to simpler "ABORTED", "STOPPED" or "FAILED_TO_STOP" strings.
if response.status == livekit_api.EgressStatus.EGRESS_ABORTED:
return "ABORTED"
if response.status == livekit_api.EgressStatus.EGRESS_ENDING:
return "STOPPED"
return "FAILED_TO_STOP"
def start(self, room_name, recording_id):
"""Start the egress process for a recording (not implemented in the base class).
Each derived class must implement this method, providing the necessary parameters for
its specific egress type (e.g. audio_only, streaming output).
"""
raise NotImplementedError("Subclass must implement this method.")
class VideoCompositeEgressService(BaseEgressService):
"""Record multiple participant video and audio tracks into a single output '.mp4' file."""
hrid = "video-recording-composite-livekit-egress"
def start(self, room_name, recording_id):
"""Start the video composite egress process for a recording."""
# Save room's recording as a mp4 video file.
file_type = livekit_api.EncodedFileType.MP4
filepath = self._get_filepath(filename=recording_id, extension="mp4")
file_output = livekit_api.EncodedFileOutput(
file_type=file_type,
filepath=filepath,
s3=self._s3,
)
request = livekit_api.RoomCompositeEgressRequest(
room_name=room_name,
file_outputs=[file_output],
)
response = self._handle_request(request, "start_room_composite_egress")
if not response.egress_id:
raise WorkerResponseError("Egress ID not found in the response.")
return response.egress_id
class AudioCompositeEgressService(BaseEgressService):
"""Record multiple participant audio tracks into a single output '.ogg' file."""
hrid = "audio-recording-composite-livekit-egress"
def start(self, room_name, recording_id):
"""Start the audio composite egress process for a recording."""
# Save room's recording as an ogg audio file.
file_type = livekit_api.EncodedFileType.OGG
filepath = self._get_filepath(filename=recording_id, extension="ogg")
file_output = livekit_api.EncodedFileOutput(
file_type=file_type,
filepath=filepath,
s3=self._s3,
)
request = livekit_api.RoomCompositeEgressRequest(
room_name=room_name, file_outputs=[file_output], audio_only=True
)
response = self._handle_request(request, "start_room_composite_egress")
if not response.egress_id:
raise WorkerResponseError("Egress ID not found in the response.")
return response.egress_id