diff --git a/src/backend/core/recording/event/__init__.py b/src/backend/core/recording/event/__init__.py new file mode 100644 index 00000000..309aeb42 --- /dev/null +++ b/src/backend/core/recording/event/__init__.py @@ -0,0 +1 @@ +"""Meet event parser classes, authentication and exceptions.""" diff --git a/src/backend/core/recording/event/authentication.py b/src/backend/core/recording/event/authentication.py new file mode 100644 index 00000000..158a247f --- /dev/null +++ b/src/backend/core/recording/event/authentication.py @@ -0,0 +1,93 @@ +"""Authentication class for storage event token validation.""" + +import logging +import secrets + +from django.conf import settings +from django.utils.translation import gettext_lazy as _ + +from rest_framework.authentication import BaseAuthentication +from rest_framework.exceptions import AuthenticationFailed + +logger = logging.getLogger(__name__) + + +class MachineUser: + """Represent a non-interactive system user for automated storage operations.""" + + def __init__(self) -> None: + self.pk = None + self.username = "storage_event_user" + self.is_active = True + + @property + def is_authenticated(self): + """Indicate if this machine user is authenticated.""" + return True + + @property + def is_anonymous(self) -> bool: + """Indicate if this is an anonymous user.""" + return False + + def get_username(self) -> str: + """Return the machine user identifier.""" + return self.username + + +class StorageEventAuthentication(BaseAuthentication): + """Authenticate requests using a Bearer token for storage event integration. + This class validates Bearer tokens for storage events that don't map to database users. + It's designed for S3-compatible storage integrations and similar use cases. + Events are submitted when a webhook is configured on some bucket's events. + """ + + AUTH_HEADER = "Authorization" + TOKEN_TYPE = "Bearer" # noqa S105 + + def authenticate(self, request): + """Validate the Bearer token from the Authorization header.""" + if not settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH: + return MachineUser(), None + + required_token = settings.RECORDING_STORAGE_EVENT_TOKEN + if not required_token: + if settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH: + raise AuthenticationFailed( + _("Authentication is enabled but token is not configured.") + ) + + return MachineUser(), None + + auth_header = request.headers.get(self.AUTH_HEADER) + + if not auth_header: + logger.warning( + "Authentication failed: Missing Authorization header (ip: %s)", + request.META.get("REMOTE_ADDR"), + ) + raise AuthenticationFailed(_("Authorization header is required")) + + auth_parts = auth_header.split(" ") + if len(auth_parts) != 2 or auth_parts[0] != self.TOKEN_TYPE: + logger.warning( + "Authentication failed: Invalid authorization header (ip: %s)", + request.META.get("REMOTE_ADDR"), + ) + raise AuthenticationFailed(_("Invalid authorization header.")) + + token = auth_parts[1] + + # Use constant-time comparison to prevent timing attacks + if not secrets.compare_digest(token.encode(), required_token.encode()): + logger.warning( + "Authentication failed: Invalid token (ip: %s)", + request.META.get("REMOTE_ADDR"), + ) + raise AuthenticationFailed(_("Invalid token")) + + return MachineUser(), token + + def authenticate_header(self, request): + """Return the WWW-Authenticate header value.""" + return f"{self.TOKEN_TYPE} realm='Storage event API'" diff --git a/src/backend/core/recording/event/exceptions.py b/src/backend/core/recording/event/exceptions.py new file mode 100644 index 00000000..d562a9d3 --- /dev/null +++ b/src/backend/core/recording/event/exceptions.py @@ -0,0 +1,17 @@ +"""Storage parsers specific exceptions.""" + + +class ParsingEventDataError(Exception): + """Raised when the request data is malformed, incomplete, or missing.""" + + +class InvalidBucketError(Exception): + """Raised when the bucket name in the request does not match the expected one.""" + + +class InvalidFileTypeError(Exception): + """Raised when the file type in the request is not supported.""" + + +class InvalidFilepathError(Exception): + """Raised when the filepath in the request is invalid.""" diff --git a/src/backend/core/recording/event/parsers.py b/src/backend/core/recording/event/parsers.py new file mode 100644 index 00000000..680b1623 --- /dev/null +++ b/src/backend/core/recording/event/parsers.py @@ -0,0 +1,147 @@ +"""Meet storage event parser classes.""" + +import logging +import re +from dataclasses import dataclass +from functools import lru_cache +from typing import Any, Dict, Optional, Protocol + +from django.conf import settings +from django.utils.module_loading import import_string + +from .exceptions import ( + InvalidBucketError, + InvalidFilepathError, + InvalidFileTypeError, + ParsingEventDataError, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class StorageEvent: + """Represents a storage event with relevant metadata. + Attributes: + filepath: Identifier for the affected recording + filetype: Type of storage event + bucket_name: When the event occurred + metadata: Additional event data + """ + + filepath: str + filetype: str + bucket_name: str + metadata: Optional[Dict[str, Any]] + + def __post_init__(self): + if self.filepath is None: + raise TypeError("filepath cannot be None") + if self.filetype is None: + raise TypeError("filetype cannot be None") + if self.bucket_name is None: + raise TypeError("bucket_name cannot be None") + + +class EventParser(Protocol): + """Interface for parsing storage events.""" + + def __init__(self, bucket_name, allowed_filetypes=None): + """Initialize parser with bucket name and optional allowed filetypes.""" + + def parse(self, data: Dict) -> StorageEvent: + """Extract storage event data from raw dictionary input.""" + + def validate(self, data: StorageEvent) -> None: + """Verify storage event data meets all requirements.""" + + def get_recording_id(self, data: Dict) -> str: + """Extract recording ID from event dictionary.""" + + +@lru_cache(maxsize=1) +def get_parser() -> EventParser: + """Return cached instance of configured event parser. + Uses function memoization instead of a factory class since the only + varying parameter is the parser class from settings. A factory class + would add unnecessary complexity when a cached function provides the + same singleton behavior with simpler code. + """ + + event_parser_cls = import_string(settings.RECORDING_EVENT_PARSER_CLASS) + return event_parser_cls(bucket_name=settings.AWS_STORAGE_BUCKET_NAME) + + +class MinioParser: + """Handle parsing and validation of Minio storage events.""" + + def __init__(self, bucket_name: str, allowed_filetypes=None): + """Initialize parser with target bucket name and accepted filetypes.""" + + if not bucket_name: + raise ValueError("Bucket name cannot be None or empty") + + self._bucket_name = bucket_name + self._allowed_filetypes = allowed_filetypes or {"audio/ogg", "video/mp4"} + + # pylint: disable=line-too-long + self._filepath_regex = re.compile( + r"(?P(?:[^%]+%2F)*)?(?P[0-9a-fA-F\-]{36})\.(?P[a-zA-Z0-9]+)" + ) + + @staticmethod + def parse(data): + """Convert raw Minio event dictionary to StorageEvent object.""" + + if not data: + raise ParsingEventDataError("Received empty data.") + + try: + record = data["Records"][0] + s3 = record["s3"] + bucket_name = s3["bucket"]["name"] + file_object = s3["object"] + filepath = file_object["key"] + filetype = file_object["contentType"] + except (KeyError, IndexError) as e: + raise ParsingEventDataError(f"Missing or malformed key: {e}.") from e + try: + return StorageEvent( + filepath=filepath, + filetype=filetype, + bucket_name=bucket_name, + metadata=None, + ) + except TypeError as e: + raise ParsingEventDataError(f"Missing essential data fields: {e}") from e + + def validate(self, event_data: StorageEvent) -> str: + """Verify StorageEvent matches bucket, filetype and filepath requirements.""" + + if event_data.bucket_name != self._bucket_name: + raise InvalidBucketError( + f"Invalid bucket: expected {self._bucket_name}, got {event_data.bucket_name}" + ) + + if not event_data.filetype in self._allowed_filetypes: + raise InvalidFileTypeError( + f"Invalid file type, expected {self._allowed_filetypes}," + f"got '{event_data.filetype}'" + ) + + match = self._filepath_regex.match(event_data.filepath) + if not match: + raise InvalidFilepathError( + f"Invalid filepath structure: {event_data.filepath}" + ) + + recording_id = match.group("recording_id") + return recording_id + + def get_recording_id(self, data): + """Extract recording ID from Minio event through parsing and validation.""" + + event_data = self.parse(data) + recording_id = self.validate(event_data) + + return recording_id diff --git a/src/backend/core/tests/recording/event/__init__.py b/src/backend/core/tests/recording/event/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/backend/core/tests/recording/event/test_authentication.py b/src/backend/core/tests/recording/event/test_authentication.py new file mode 100644 index 00000000..a207ada1 --- /dev/null +++ b/src/backend/core/tests/recording/event/test_authentication.py @@ -0,0 +1,145 @@ +""" +Test event authentication. +""" + +# pylint: disable=E1128 + +from django.test import RequestFactory + +import pytest +from rest_framework.exceptions import AuthenticationFailed + +from core.recording.event.authentication import ( + MachineUser, + StorageEventAuthentication, +) + + +def test_successful_authentication(settings): + """Test successful authentication with valid token.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {"Authorization": "Bearer valid-test-token"} + + user, token = StorageEventAuthentication().authenticate(request) + assert token == "valid-test-token" + assert isinstance(user, MachineUser) + + +def test_disabled_authentication_with_header(settings): + """Authentication should pass when no auth is configured, and header is present.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = None + settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH = False + + request = RequestFactory().get("/") + request.headers = {"Authorization": "Bearer some-token"} + + user, token = StorageEventAuthentication().authenticate(request) + assert token is None + assert isinstance(user, MachineUser) + + +def test_disabled_authentication_without_header(settings): + """Authentication should pass when no auth is configured, and no header is present.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = None + settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH = False + + request = RequestFactory().get("/") + + user, token = StorageEventAuthentication().authenticate(request) + assert token is None + assert isinstance(user, MachineUser) + + +def test_authentication_when_disabled(settings): + """Authentication should pass when disabled, regardless of token configuration.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "some-token" + settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH = False + + request = RequestFactory().get("/") + + user, token = StorageEventAuthentication().authenticate(request) + assert token is None + assert isinstance(user, MachineUser) + + +def test_authentication_fails_when_token_not_configured(settings): + """Authentication should fail when authentication is enabled but no token is configured.""" + + # By default RECORDING_ENABLE_STORAGE_EVENT_AUTH should be True + settings.RECORDING_STORAGE_EVENT_TOKEN = None + + request = RequestFactory().get("/") + + with pytest.raises( + AuthenticationFailed, + match="Authentication is enabled but token is not configured", + ): + StorageEventAuthentication().authenticate(request) + + +def test_missing_auth_header(settings): + """Test failure when Authorization header is missing.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {} + + with pytest.raises(AuthenticationFailed, match="Authorization header is required"): + StorageEventAuthentication().authenticate(request) + + +def test_invalid_auth_header_format(settings): + """Test failure when Authorization header has invalid format.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {"Authorization": "InvalidFormat"} + + with pytest.raises(AuthenticationFailed, match="Invalid authorization header"): + StorageEventAuthentication().authenticate(request) + + +def test_invalid_token_type(settings): + """Test failure when token type is not Bearer.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {"Authorization": "Basic some-token"} + + with pytest.raises(AuthenticationFailed, match="Invalid authorization header"): + StorageEventAuthentication().authenticate(request) + + +def test_invalid_token(settings): + """Test failure when token is invalid.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {"Authorization": "Bearer wrong-token"} + + with pytest.raises(AuthenticationFailed, match="Invalid token"): + StorageEventAuthentication().authenticate(request) + + +def test_malformed_auth_header(settings): + """Test failure when Authorization header is malformed.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {"Authorization": "Bearer"} # Missing token part + + with pytest.raises(AuthenticationFailed, match="Invalid authorization header"): + StorageEventAuthentication().authenticate(request) + + +def test_authenticate_header(): + """Test the WWW-Authenticate header value.""" + request = RequestFactory().get("/") + header = StorageEventAuthentication().authenticate_header(request) + assert header == "Bearer realm='Storage event API'" + + +def test_multiple_spaces_in_auth_header(settings): + """Test failure when Authorization header contains multiple spaces.""" + settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token" + request = RequestFactory().get("/") + request.headers = {"Authorization": "Bearer extra-spaces-token"} + + with pytest.raises(AuthenticationFailed, match="Invalid authorization header"): + StorageEventAuthentication().authenticate(request) diff --git a/src/backend/core/tests/recording/event/test_parsers.py b/src/backend/core/tests/recording/event/test_parsers.py new file mode 100644 index 00000000..080d98c3 --- /dev/null +++ b/src/backend/core/tests/recording/event/test_parsers.py @@ -0,0 +1,310 @@ +""" +Test event parsers. +""" + +# pylint: disable=W0212,W0621,W0613 + +from unittest import mock + +from django.conf import settings + +import pytest + +from core.recording.event.exceptions import ( + InvalidBucketError, + InvalidFilepathError, + InvalidFileTypeError, + ParsingEventDataError, +) +from core.recording.event.parsers import ( + MinioParser, + StorageEvent, + get_parser, +) + + +@pytest.fixture +def valid_minio_event(): + """Mock a valid Minio event.""" + return { + "Records": [ + { + "s3": { + "bucket": {"name": "test-bucket"}, + "object": { + "key": "recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + "contentType": "audio/ogg", + }, + } + } + ] + } + + +@pytest.fixture +def minio_parser(): + """Mock a Minio parser.""" + return MinioParser(bucket_name="test-bucket") + + +def test_parse_valid_event(minio_parser, valid_minio_event): + """Test parsing a valid Minio event.""" + event = minio_parser.parse(valid_minio_event) + assert isinstance(event, StorageEvent) + assert event.filepath == "recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg" + assert event.filetype == "audio/ogg" + assert event.bucket_name == "test-bucket" + assert event.metadata is None + + +def test_parse_empty_data(minio_parser): + """Test parsing empty event data raises error.""" + with pytest.raises(ParsingEventDataError, match="Received empty data."): + minio_parser.parse({}) + + +def test_parse_missing_keys(minio_parser): + """Test parsing event with missing key.""" + + invalid_minio_event = { + "Records": [ + { + "s3": { + "bucket": {"name": None}, + # Missing 'object' key + } + } + ] + } + + with pytest.raises(ParsingEventDataError, match="Missing or malformed key"): + minio_parser.parse(invalid_minio_event) + + +def test_parse_none_key(minio_parser): + """Test parsing event with None field.""" + + invalid_minio_event = { + "Records": [ + { + "s3": { + "bucket": {"name": "test-bucket"}, + "object": { + "key": "recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + "contentType": None, # 'contentType' should not be None + }, + } + } + ] + } + + with pytest.raises(ParsingEventDataError, match="Missing essential data fields"): + minio_parser.parse(invalid_minio_event) + + +def test_validate_invalid_bucket(minio_parser): + """Test validation with wrong bucket name.""" + event = StorageEvent( + filepath="recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + filetype="audio/ogg", + bucket_name="wrong-bucket", + metadata=None, + ) + with pytest.raises(InvalidBucketError): + minio_parser.validate(event) + + +def test_validate_invalid_filetype(minio_parser): + """Test validation with unsupported file type.""" + event = StorageEvent( + filepath="recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.txt", + filetype="text/plain", # Not included in the default allowed filetypes + bucket_name="test-bucket", + metadata=None, + ) + with pytest.raises(InvalidFileTypeError): + minio_parser.validate(event) + + +@pytest.mark.parametrize( + "invalid_filepath", + [ + "invalid_filepath", + "recording/46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + "recording%2F46d1a1212426484d8fb309b5d886f7a8.ogg", + ], +) +def test_validate_invalid_filepath(invalid_filepath, minio_parser): + """Test validation with malformed filepath.""" + event = StorageEvent( + filepath=invalid_filepath, + filetype="audio/ogg", + bucket_name="test-bucket", + metadata=None, + ) + with pytest.raises(InvalidFilepathError): + minio_parser.validate(event) + + +def test_validate_valid_event(minio_parser): + """Test validation with valid event data.""" + event = StorageEvent( + filepath="recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + filetype="audio/ogg", + bucket_name="test-bucket", + metadata=None, + ) + recording_id = minio_parser.validate(event) + assert recording_id == "46d1a121-2426-484d-8fb3-09b5d886f7a8" + + +def test_get_recording_id_success(minio_parser, valid_minio_event): + """Test successful extraction of recording ID.""" + recording_id = minio_parser.get_recording_id(valid_minio_event) + assert recording_id == "46d1a121-2426-484d-8fb3-09b5d886f7a8" + + +def test_validate_filepath_with_folder(minio_parser): + """Test validation of filepath with folder structure.""" + event = StorageEvent( + filepath="parent_folder%2Ffolder%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + filetype="audio/ogg", + bucket_name="test-bucket", + metadata=None, + ) + recording_id = minio_parser.validate(event) + assert recording_id == "46d1a121-2426-484d-8fb3-09b5d886f7a8" + + +def test_parse_with_video_type(minio_parser): + """Test parsing event with video file type.""" + video_event = { + "Records": [ + { + "s3": { + "bucket": {"name": "test-bucket"}, + "object": { + "key": "46d1a121-2426-484d-8fb3-09b5d886f7a8.mp4", + "contentType": "video/mp4", + }, + } + } + ] + } + event = minio_parser.parse(video_event) + assert event.filetype == "video/mp4" + assert event.filepath.endswith(".mp4") + + +def test_empty_allowed_filetypes(): + """Test MinioParser with empty allowed_filetypes.""" + empty_types = set() + parser = MinioParser(bucket_name="test-bucket", allowed_filetypes=empty_types) + assert parser._allowed_filetypes == {"audio/ogg", "video/mp4"} + + +def test_custom_allowed_filetypes(): + """Test MinioParser with empty allowed_filetypes.""" + custom_types = {"audio/mp3", "video/mov"} + parser = MinioParser(bucket_name="test-bucket", allowed_filetypes=custom_types) + assert parser._allowed_filetypes == {"audio/mp3", "video/mov"} + + +def test_validate_custom_filetypes(): + """Test validation of filepath with folder structure.""" + + parser = MinioParser(bucket_name="test-bucket", allowed_filetypes={"audio/mp3"}) + + event = StorageEvent( + filepath="parent_folder%2Ffolder%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg", + filetype="audio/mp3", + bucket_name="test-bucket", + metadata=None, + ) + parser.validate(event) + + +def test_constructor_none_bucket(): + """Test MinioParser constructor with None bucket name.""" + with pytest.raises(ValueError, match="Bucket name cannot be None or empty"): + MinioParser(bucket_name=None) + + +def test_constructor_empty_bucket(): + """Test MinioParser constructor with empty bucket name.""" + with pytest.raises(ValueError, match="Bucket name cannot be None or empty"): + MinioParser(bucket_name="") + + +@pytest.fixture +def clear_lru_cache(): + """Fixture to clear the LRU cache between tests.""" + get_parser.cache_clear() + yield + get_parser.cache_clear() + + +def test_returns_correct_instance(clear_lru_cache): + """Test if get_parser returns the correct parser instance.""" + settings.AWS_STORAGE_BUCKET_NAME = "test-bucket" + parser = get_parser() + assert isinstance(parser, MinioParser) + assert parser._bucket_name == "test-bucket" + + +def test_caching_behavior(clear_lru_cache): + """Test if the function properly caches the parser instance.""" + settings.AWS_STORAGE_BUCKET_NAME = "test-bucket" + parser1 = get_parser() + parser2 = get_parser() + assert parser1 is parser2 # Check object identity + + +def test_different_settings_new_instance(): + """Test if changing settings creates a new instance.""" + settings.AWS_STORAGE_BUCKET_NAME = "different-bucket" + parser = get_parser() + assert parser._bucket_name == "different-bucket" + + +def test_import_error_handling(clear_lru_cache): + """Test handling of import errors for invalid parser class.""" + settings.RECORDING_EVENT_PARSER_CLASS = "invalid.parser.path" + with pytest.raises(ImportError): + get_parser() + + +@mock.patch("core.recording.event.parsers.import_string") +def test_parser_instantiation_called_once(mock_import_string, clear_lru_cache): + """Test that parser class is instantiated only once due to caching.""" + mock_parser_cls = type( + "MockParser", + (), + { + "__init__": lambda self, bucket_name: setattr( + self, "_bucket_name", bucket_name + ) + }, + ) + mock_import_string.return_value = mock_parser_cls + + # First call + parser1 = get_parser() + # Second call + parser2 = get_parser() + + # Verify import_string was called only once + mock_import_string.assert_called_once_with(settings.RECORDING_EVENT_PARSER_CLASS) + assert parser1 is parser2 + + +def test_cache_clear_behavior(clear_lru_cache, settings): + """Test that cache clearing creates new instance.""" + + settings.RECORDING_EVENT_PARSER_CLASS = "core.recording.event.parsers.MinioParser" + + parser1 = get_parser() + get_parser.cache_clear() + parser2 = get_parser() + + assert parser1 is not parser2 # Should be different instances after cache clear diff --git a/src/backend/meet/settings.py b/src/backend/meet/settings.py index a288ccff..fef21a63 100755 --- a/src/backend/meet/settings.py +++ b/src/backend/meet/settings.py @@ -424,6 +424,14 @@ class Base(Configuration): environ_name="RECORDING_WORKER_CLASSES", environ_prefix=None, ) + RECORDING_EVENT_PARSER_CLASS = values.Value( + "core.recording.event.parsers.MinioParser", + environ_name="RECORDING_EVENT_PARSER_CLASS", + environ_prefix=None, + ) + RECORDING_ENABLE_STORAGE_EVENT_AUTH = values.BooleanValue( + True, environ_name="RECORDING_ENABLE_STORAGE_EVENT_AUTH", environ_prefix=None + ) # pylint: disable=invalid-name @property