✨(backend) introduce general recording worker concepts

Introducing a new worker service architecture. Sorry for the long commit. This design adheres to several key principles, primarily the Single Responsibility Principle. Dependency Injection and composition are prioritized over inheritance, enhancing modularity and maintainability. Interactions between the backend and external workers are encapsulated in classes implementing a common `WorkerService` interface. I chose Protocol over an abstract class for agility, aligning closely with static typing without requiring inheritance. Each `WorkerService` implementation can independently manage recordings according to its specific requirements. This flexibility ensures that adding a new worker service, such as for LiveKit, can be done without any change to existing components. Configuration management is centralized in a single `WorkerServiceConfig` class, which loads and provides all settings for different worker implementations, keeping configurations organized and extensible. The worker service class itself handles accessing relevant configurations as needed, simplifying the configuration process. A basic dictionary in Django settings acts as a factory, responsible for instantiating the correct worker service based on the client's request mode. This approach aligns with Django development conventions, emphasizing simplicity. While a full factory class with a builder pattern could provide future flexibility, YAGNI (You Aren't Gonna Need It) suggests deferring such complexity until it’s necessary. At the core of this design is the worker mediator, which decouples worker service implementations from the Django ORM and manages database state according to worker state. The mediator is purposefully limited in responsibility, handling only what’s essential. It doesn’t instantiate worker services directly; instead, services are injected via composition, allowing the mediator to manage any object conforming to the `WorkerService` interface. This setup preserves flexibility and maintains a clear separation of responsibilities. The factory create worker services, the mediator runs it. (sorry for this long commit)
2024-11-07 18:56:27 +01:00
parent 7278613b20
commit f6f1222f47
12 changed files with 1046 additions and 0 deletions
--- a/src/backend/core/recording/init.py
+++ b/src/backend/core/recording/init.py
--- a/src/backend/core/recording/worker/init.py
+++ b/src/backend/core/recording/worker/init.py
@@ -0,0 +1 @@
+"""Meet worker services classes and exceptions."""
--- a/src/backend/core/recording/worker/exceptions.py
+++ b/src/backend/core/recording/worker/exceptions.py
@@ -0,0 +1,21 @@
+"""Recording and worker services specific exceptions."""
+
+
+class WorkerRequestError(Exception):
+    """Raised when there is an issue with the worker request"""
+
+
+class WorkerConnectionError(Exception):
+    """Raised when there is an issue connecting to the worker."""
+
+
+class WorkerResponseError(Exception):
+    """Raised when the worker's response is not as expected."""
+
+
+class RecordingStartError(Exception):
+    """Raised when there is an error starting the recording."""
+
+
+class RecordingStopError(Exception):
+    """Raised when there is an error stopping the recording."""
--- a/src/backend/core/recording/worker/factories.py
+++ b/src/backend/core/recording/worker/factories.py
@@ -0,0 +1,75 @@
+"""Factory, configurations and Protocol to create worker services"""
+
+import logging
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import Any, ClassVar, Dict, Optional, Protocol, Type
+
+from django.conf import settings
+from django.utils.module_loading import import_string
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class WorkerServiceConfig:
+    """Declare Worker Service common configurations"""
+
+    output_folder: str
+    server_configurations: Dict[str, Any]
+    verify_ssl: Optional[bool]
+    bucket_args: Optional[dict]
+
+    @classmethod
+    @lru_cache
+    def from_settings(cls) -> "WorkerServiceConfig":
+        """Load configuration from Django settings with caching for efficiency."""
+
+        logger.debug("Loading WorkerServiceConfig from settings.")
+        return cls(
+            output_folder=settings.RECORDING_OUTPUT_FOLDER,
+            server_configurations=settings.LIVEKIT_CONFIGURATION,
+            verify_ssl=settings.RECORDING_VERIFY_SSL,
+            bucket_args={
+                "endpoint": settings.AWS_S3_ENDPOINT_URL,
+                "access_key": settings.AWS_S3_ACCESS_KEY_ID,
+                "secret": settings.AWS_S3_SECRET_ACCESS_KEY,
+                "region": settings.AWS_S3_REGION_NAME,
+                "bucket": settings.AWS_STORAGE_BUCKET_NAME,
+                "force_path_style": True,
+            },
+        )
+
+
+class WorkerService(Protocol):
+    """Define the interface for interacting with a worker service."""
+
+    hrid: ClassVar[str]
+
+    def __init__(self, config: WorkerServiceConfig):
+        """Initialize the service with the given configuration."""
+
+    def start(self, room_id: str, recording_id: str) -> str:
+        """Start a recording for a specified room."""
+
+    def stop(self, worker_id: str) -> str:
+        """Stop recording for a specified worker."""
+
+
+def get_worker_service(mode: str) -> WorkerService:
+    """Instantiate a worker service by its mode."""
+
+    worker_registry: Dict[str, str] = settings.RECORDING_WORKER_CLASSES
+
+    try:
+        worker_class_path = worker_registry[mode]
+    except KeyError as e:
+        raise ValueError(
+            f"Recording mode '{mode}' not found in RECORDING_WORKER_CLASSES. "
+            f"Available modes: {list(worker_registry.keys())}"
+        ) from e
+
+    worker_class: Type[WorkerService] = import_string(worker_class_path)
+
+    config = WorkerServiceConfig.from_settings()
+    return worker_class(config=config)
--- a/src/backend/core/recording/worker/mediator.py
+++ b/src/backend/core/recording/worker/mediator.py
@@ -0,0 +1,100 @@
+"""Mediator between the worker service and recording instances in the Django ORM."""
+
+import logging
+
+from core.models import Recording, RecordingStatusChoices
+
+from .exceptions import (
+    RecordingStartError,
+    RecordingStopError,
+    WorkerConnectionError,
+    WorkerRequestError,
+    WorkerResponseError,
+)
+from .factories import WorkerService
+
+logger = logging.getLogger(__name__)
+
+
+class WorkerServiceMediator:
+    """Mediate interactions between a worker service and a recording instance.
+
+    A mediator class that decouples the worker from Django ORM, handles recording updates
+    based on worker status, and transforms worker errors into user-friendly exceptions.
+    Implements Mediator pattern.
+    """
+
+    def __init__(self, worker_service: WorkerService):
+        """Initialize the WorkerServiceMediator with the provided worker service."""
+
+        self._worker_service = worker_service
+
+    def start(self, recording: Recording):
+        """Start the recording process using the worker service.
+
+        If the operation is successful, the recording's status will
+        transition from INITIATED to ACTIVE, else to FAILED_TO_START to keep track of errors.
+
+        Args:
+            recording (Recording): The recording instance to start.
+        Raises:
+            RecordingStartError: If there is an error starting the recording.
+        """
+
+        # FIXME - no manipulations of room_name should be required
+        room_name = f"{recording.room.id!s}".replace("-", "")
+
+        if recording.status != RecordingStatusChoices.INITIATED:
+            logger.error("Cannot start recording in %s status.", recording.status)
+            raise RecordingStartError()
+
+        try:
+            worker_id = self._worker_service.start(room_name, recording.id)
+        except (WorkerRequestError, WorkerConnectionError, WorkerResponseError) as e:
+            logger.exception(
+                "Failed to start recording for room %s: %s", recording.room.slug, e
+            )
+            recording.status = RecordingStatusChoices.FAILED_TO_START
+            raise RecordingStartError() from e
+        else:
+            recording.worker_id = worker_id
+            recording.status = RecordingStatusChoices.ACTIVE
+        finally:
+            recording.save()
+
+        logger.info(
+            "Worker started for room %s (worker ID: %s)",
+            recording.room,
+            recording.worker_id,
+        )
+
+    def stop(self, recording: Recording):
+        """Stop the recording process using the worker service.
+
+        If the operation is successful, the recording's status will transition
+        from ACTIVE to STOPPED, else to FAILED_TO_STOP to keep track of errors.
+
+        Args:
+            recording (Recording): The recording instance to stop.
+        Raises:
+            RecordingStopError: If there is an error stopping the recording.
+        """
+
+        if recording.status != RecordingStatusChoices.ACTIVE:
+            logger.error("Cannot stop recording in %s status.", recording.status)
+            raise RecordingStopError()
+
+        try:
+            response = self._worker_service.stop(worker_id=recording.worker_id)
+        except (WorkerConnectionError, WorkerResponseError) as e:
+            logger.exception(
+                "Failed to stop recording for room %s: %s", recording.room.slug, e
+            )
+            recording.status = RecordingStatusChoices.FAILED_TO_STOP
+            raise RecordingStopError() from e
+        else:
+            recording.status = RecordingStatusChoices[response]
+        finally:
+            recording.save()
+
+        logger.info("Worker stopped for room %s", recording.room)
--- a/src/backend/core/recording/worker/services.py
+++ b/src/backend/core/recording/worker/services.py
@@ -0,0 +1,140 @@
+"""Worker services in charge of recording a room."""
+
+# pylint: disable=no-member
+
+import aiohttp
+from asgiref.sync import async_to_sync
+from livekit import api as livekit_api
+from livekit.api.egress_service import EgressService
+
+from .exceptions import WorkerConnectionError, WorkerResponseError
+from .factories import WorkerServiceConfig
+
+
+class BaseEgressService:
+    """Base egress defining common methods to manage and interact with LiveKit egress processes."""
+
+    def __init__(self, config: WorkerServiceConfig):
+        self._config = config
+        self._s3 = livekit_api.S3Upload(**config.bucket_args)
+
+    def _get_filepath(self, filename: str, extension: str) -> str:
+        """Construct the file path for a given filename and extension.
+        Unsecure method, doesn't handle paths robustly and securely.
+        """
+        return f"{self._config.output_folder}/{filename}.{extension}"
+
+    @async_to_sync
+    async def _handle_request(self, request, method_name: str):
+        """Handle making a request to the LiveKit API and returns the response."""
+
+        # Use HTTP connector for local development with Tilt,
+        # where cluster communications are unsecure
+        connector = aiohttp.TCPConnector(ssl=self._config.verify_ssl)
+
+        async with aiohttp.ClientSession(connector=connector) as session:
+            client = EgressService(session, **self._config.server_configurations)
+            method = getattr(client, method_name)
+            try:
+                response = await method(request)
+            except livekit_api.TwirpError as e:
+                raise WorkerConnectionError(
+                    f"LiveKit client connection error, {e.message}."
+                ) from e
+
+            return response
+
+    def stop(self, worker_id: str) -> str:
+        """Stop an ongoing egress worker.
+        The StopEgressRequest is shared among all types of egress,
+        so a single implementation in the base class should be sufficient.
+        """
+
+        request = livekit_api.StopEgressRequest(
+            egress_id=worker_id,
+        )
+
+        response = self._handle_request(request, "stop_egress")
+
+        if not response.status:
+            raise WorkerResponseError(
+                "LiveKit response is missing the recording status."
+            )
+
+        # To avoid exposing EgressStatus values and coupling with LiveKit outside of this class,
+        # the response status is mapped to simpler "ABORTED", "STOPPED" or "FAILED_TO_STOP" strings.
+        if response.status == livekit_api.EgressStatus.EGRESS_ABORTED:
+            return "ABORTED"
+
+        if response.status == livekit_api.EgressStatus.EGRESS_ENDING:
+            return "STOPPED"
+
+        return "FAILED_TO_STOP"
+
+    def start(self, room_name, recording_id):
+        """Start the egress process for a recording (not implemented in the base class).
+        Each derived class must implement this method, providing the necessary parameters for
+        its specific egress type (e.g. audio_only, streaming output).
+        """
+        raise NotImplementedError("Subclass must implement this method.")
+
+
+class VideoCompositeEgressService(BaseEgressService):
+    """Record multiple participant video and audio tracks into a single output '.mp4' file."""
+
+    hrid = "video-recording-composite-livekit-egress"
+
+    def start(self, room_name, recording_id):
+        """Start the video composite egress process for a recording."""
+
+        # Save room's recording as a mp4 video file.
+        file_type = livekit_api.EncodedFileType.MP4
+        filepath = self._get_filepath(filename=recording_id, extension="mp4")
+
+        file_output = livekit_api.EncodedFileOutput(
+            file_type=file_type,
+            filepath=filepath,
+            s3=self._s3,
+        )
+
+        request = livekit_api.RoomCompositeEgressRequest(
+            room_name=room_name,
+            file_outputs=[file_output],
+        )
+
+        response = self._handle_request(request, "start_room_composite_egress")
+
+        if not response.egress_id:
+            raise WorkerResponseError("Egress ID not found in the response.")
+
+        return response.egress_id
+
+
+class AudioCompositeEgressService(BaseEgressService):
+    """Record multiple participant audio tracks into a single output '.ogg' file."""
+
+    hrid = "audio-recording-composite-livekit-egress"
+
+    def start(self, room_name, recording_id):
+        """Start the audio composite egress process for a recording."""
+
+        # Save room's recording as an ogg audio file.
+        file_type = livekit_api.EncodedFileType.OGG
+        filepath = self._get_filepath(filename=recording_id, extension="ogg")
+
+        file_output = livekit_api.EncodedFileOutput(
+            file_type=file_type,
+            filepath=filepath,
+            s3=self._s3,
+        )
+
+        request = livekit_api.RoomCompositeEgressRequest(
+            room_name=room_name, file_outputs=[file_output], audio_only=True
+        )
+
+        response = self._handle_request(request, "start_room_composite_egress")
+
+        if not response.egress_id:
+            raise WorkerResponseError("Egress ID not found in the response.")
+
+        return response.egress_id
				`@@ -0,0 +1 @@`
				`"""Meet worker services classes and exceptions."""`