(backend) add minio event parser

When a new file is uploaded to a Minio Bucket, a webhook can be
configured to notify third parties about the event. Basically,
it's a POST call with a payload providing informations on the
event that just happened.

When a recording worker will stop, it will upload its data to a Minio
bucket, which will trigger the webhook.

Try to introduce the minimalest code to parse these events, discard
them whener it's relevant, and extract the recording ID, thus we
know which recording was successfully saved to the Minio bucket.

In the longer runner, it will trigger a callback.
This commit is contained in:
lebaudantoine
2024-11-08 11:27:37 +01:00
committed by aleb_the_flash
parent 840033fcbc
commit 8309545ec6
8 changed files with 721 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Meet event parser classes, authentication and exceptions."""

View File

@@ -0,0 +1,93 @@
"""Authentication class for storage event token validation."""
import logging
import secrets
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from rest_framework.authentication import BaseAuthentication
from rest_framework.exceptions import AuthenticationFailed
logger = logging.getLogger(__name__)
class MachineUser:
"""Represent a non-interactive system user for automated storage operations."""
def __init__(self) -> None:
self.pk = None
self.username = "storage_event_user"
self.is_active = True
@property
def is_authenticated(self):
"""Indicate if this machine user is authenticated."""
return True
@property
def is_anonymous(self) -> bool:
"""Indicate if this is an anonymous user."""
return False
def get_username(self) -> str:
"""Return the machine user identifier."""
return self.username
class StorageEventAuthentication(BaseAuthentication):
"""Authenticate requests using a Bearer token for storage event integration.
This class validates Bearer tokens for storage events that don't map to database users.
It's designed for S3-compatible storage integrations and similar use cases.
Events are submitted when a webhook is configured on some bucket's events.
"""
AUTH_HEADER = "Authorization"
TOKEN_TYPE = "Bearer" # noqa S105
def authenticate(self, request):
"""Validate the Bearer token from the Authorization header."""
if not settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH:
return MachineUser(), None
required_token = settings.RECORDING_STORAGE_EVENT_TOKEN
if not required_token:
if settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH:
raise AuthenticationFailed(
_("Authentication is enabled but token is not configured.")
)
return MachineUser(), None
auth_header = request.headers.get(self.AUTH_HEADER)
if not auth_header:
logger.warning(
"Authentication failed: Missing Authorization header (ip: %s)",
request.META.get("REMOTE_ADDR"),
)
raise AuthenticationFailed(_("Authorization header is required"))
auth_parts = auth_header.split(" ")
if len(auth_parts) != 2 or auth_parts[0] != self.TOKEN_TYPE:
logger.warning(
"Authentication failed: Invalid authorization header (ip: %s)",
request.META.get("REMOTE_ADDR"),
)
raise AuthenticationFailed(_("Invalid authorization header."))
token = auth_parts[1]
# Use constant-time comparison to prevent timing attacks
if not secrets.compare_digest(token.encode(), required_token.encode()):
logger.warning(
"Authentication failed: Invalid token (ip: %s)",
request.META.get("REMOTE_ADDR"),
)
raise AuthenticationFailed(_("Invalid token"))
return MachineUser(), token
def authenticate_header(self, request):
"""Return the WWW-Authenticate header value."""
return f"{self.TOKEN_TYPE} realm='Storage event API'"

View File

@@ -0,0 +1,17 @@
"""Storage parsers specific exceptions."""
class ParsingEventDataError(Exception):
"""Raised when the request data is malformed, incomplete, or missing."""
class InvalidBucketError(Exception):
"""Raised when the bucket name in the request does not match the expected one."""
class InvalidFileTypeError(Exception):
"""Raised when the file type in the request is not supported."""
class InvalidFilepathError(Exception):
"""Raised when the filepath in the request is invalid."""

View File

@@ -0,0 +1,147 @@
"""Meet storage event parser classes."""
import logging
import re
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, Dict, Optional, Protocol
from django.conf import settings
from django.utils.module_loading import import_string
from .exceptions import (
InvalidBucketError,
InvalidFilepathError,
InvalidFileTypeError,
ParsingEventDataError,
)
logger = logging.getLogger(__name__)
@dataclass
class StorageEvent:
"""Represents a storage event with relevant metadata.
Attributes:
filepath: Identifier for the affected recording
filetype: Type of storage event
bucket_name: When the event occurred
metadata: Additional event data
"""
filepath: str
filetype: str
bucket_name: str
metadata: Optional[Dict[str, Any]]
def __post_init__(self):
if self.filepath is None:
raise TypeError("filepath cannot be None")
if self.filetype is None:
raise TypeError("filetype cannot be None")
if self.bucket_name is None:
raise TypeError("bucket_name cannot be None")
class EventParser(Protocol):
"""Interface for parsing storage events."""
def __init__(self, bucket_name, allowed_filetypes=None):
"""Initialize parser with bucket name and optional allowed filetypes."""
def parse(self, data: Dict) -> StorageEvent:
"""Extract storage event data from raw dictionary input."""
def validate(self, data: StorageEvent) -> None:
"""Verify storage event data meets all requirements."""
def get_recording_id(self, data: Dict) -> str:
"""Extract recording ID from event dictionary."""
@lru_cache(maxsize=1)
def get_parser() -> EventParser:
"""Return cached instance of configured event parser.
Uses function memoization instead of a factory class since the only
varying parameter is the parser class from settings. A factory class
would add unnecessary complexity when a cached function provides the
same singleton behavior with simpler code.
"""
event_parser_cls = import_string(settings.RECORDING_EVENT_PARSER_CLASS)
return event_parser_cls(bucket_name=settings.AWS_STORAGE_BUCKET_NAME)
class MinioParser:
"""Handle parsing and validation of Minio storage events."""
def __init__(self, bucket_name: str, allowed_filetypes=None):
"""Initialize parser with target bucket name and accepted filetypes."""
if not bucket_name:
raise ValueError("Bucket name cannot be None or empty")
self._bucket_name = bucket_name
self._allowed_filetypes = allowed_filetypes or {"audio/ogg", "video/mp4"}
# pylint: disable=line-too-long
self._filepath_regex = re.compile(
r"(?P<url_encoded_folder_path>(?:[^%]+%2F)*)?(?P<recording_id>[0-9a-fA-F\-]{36})\.(?P<extension>[a-zA-Z0-9]+)"
)
@staticmethod
def parse(data):
"""Convert raw Minio event dictionary to StorageEvent object."""
if not data:
raise ParsingEventDataError("Received empty data.")
try:
record = data["Records"][0]
s3 = record["s3"]
bucket_name = s3["bucket"]["name"]
file_object = s3["object"]
filepath = file_object["key"]
filetype = file_object["contentType"]
except (KeyError, IndexError) as e:
raise ParsingEventDataError(f"Missing or malformed key: {e}.") from e
try:
return StorageEvent(
filepath=filepath,
filetype=filetype,
bucket_name=bucket_name,
metadata=None,
)
except TypeError as e:
raise ParsingEventDataError(f"Missing essential data fields: {e}") from e
def validate(self, event_data: StorageEvent) -> str:
"""Verify StorageEvent matches bucket, filetype and filepath requirements."""
if event_data.bucket_name != self._bucket_name:
raise InvalidBucketError(
f"Invalid bucket: expected {self._bucket_name}, got {event_data.bucket_name}"
)
if not event_data.filetype in self._allowed_filetypes:
raise InvalidFileTypeError(
f"Invalid file type, expected {self._allowed_filetypes},"
f"got '{event_data.filetype}'"
)
match = self._filepath_regex.match(event_data.filepath)
if not match:
raise InvalidFilepathError(
f"Invalid filepath structure: {event_data.filepath}"
)
recording_id = match.group("recording_id")
return recording_id
def get_recording_id(self, data):
"""Extract recording ID from Minio event through parsing and validation."""
event_data = self.parse(data)
recording_id = self.validate(event_data)
return recording_id

View File

@@ -0,0 +1,145 @@
"""
Test event authentication.
"""
# pylint: disable=E1128
from django.test import RequestFactory
import pytest
from rest_framework.exceptions import AuthenticationFailed
from core.recording.event.authentication import (
MachineUser,
StorageEventAuthentication,
)
def test_successful_authentication(settings):
"""Test successful authentication with valid token."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {"Authorization": "Bearer valid-test-token"}
user, token = StorageEventAuthentication().authenticate(request)
assert token == "valid-test-token"
assert isinstance(user, MachineUser)
def test_disabled_authentication_with_header(settings):
"""Authentication should pass when no auth is configured, and header is present."""
settings.RECORDING_STORAGE_EVENT_TOKEN = None
settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH = False
request = RequestFactory().get("/")
request.headers = {"Authorization": "Bearer some-token"}
user, token = StorageEventAuthentication().authenticate(request)
assert token is None
assert isinstance(user, MachineUser)
def test_disabled_authentication_without_header(settings):
"""Authentication should pass when no auth is configured, and no header is present."""
settings.RECORDING_STORAGE_EVENT_TOKEN = None
settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH = False
request = RequestFactory().get("/")
user, token = StorageEventAuthentication().authenticate(request)
assert token is None
assert isinstance(user, MachineUser)
def test_authentication_when_disabled(settings):
"""Authentication should pass when disabled, regardless of token configuration."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "some-token"
settings.RECORDING_ENABLE_STORAGE_EVENT_AUTH = False
request = RequestFactory().get("/")
user, token = StorageEventAuthentication().authenticate(request)
assert token is None
assert isinstance(user, MachineUser)
def test_authentication_fails_when_token_not_configured(settings):
"""Authentication should fail when authentication is enabled but no token is configured."""
# By default RECORDING_ENABLE_STORAGE_EVENT_AUTH should be True
settings.RECORDING_STORAGE_EVENT_TOKEN = None
request = RequestFactory().get("/")
with pytest.raises(
AuthenticationFailed,
match="Authentication is enabled but token is not configured",
):
StorageEventAuthentication().authenticate(request)
def test_missing_auth_header(settings):
"""Test failure when Authorization header is missing."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {}
with pytest.raises(AuthenticationFailed, match="Authorization header is required"):
StorageEventAuthentication().authenticate(request)
def test_invalid_auth_header_format(settings):
"""Test failure when Authorization header has invalid format."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {"Authorization": "InvalidFormat"}
with pytest.raises(AuthenticationFailed, match="Invalid authorization header"):
StorageEventAuthentication().authenticate(request)
def test_invalid_token_type(settings):
"""Test failure when token type is not Bearer."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {"Authorization": "Basic some-token"}
with pytest.raises(AuthenticationFailed, match="Invalid authorization header"):
StorageEventAuthentication().authenticate(request)
def test_invalid_token(settings):
"""Test failure when token is invalid."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {"Authorization": "Bearer wrong-token"}
with pytest.raises(AuthenticationFailed, match="Invalid token"):
StorageEventAuthentication().authenticate(request)
def test_malformed_auth_header(settings):
"""Test failure when Authorization header is malformed."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {"Authorization": "Bearer"} # Missing token part
with pytest.raises(AuthenticationFailed, match="Invalid authorization header"):
StorageEventAuthentication().authenticate(request)
def test_authenticate_header():
"""Test the WWW-Authenticate header value."""
request = RequestFactory().get("/")
header = StorageEventAuthentication().authenticate_header(request)
assert header == "Bearer realm='Storage event API'"
def test_multiple_spaces_in_auth_header(settings):
"""Test failure when Authorization header contains multiple spaces."""
settings.RECORDING_STORAGE_EVENT_TOKEN = "valid-test-token"
request = RequestFactory().get("/")
request.headers = {"Authorization": "Bearer extra-spaces-token"}
with pytest.raises(AuthenticationFailed, match="Invalid authorization header"):
StorageEventAuthentication().authenticate(request)

View File

@@ -0,0 +1,310 @@
"""
Test event parsers.
"""
# pylint: disable=W0212,W0621,W0613
from unittest import mock
from django.conf import settings
import pytest
from core.recording.event.exceptions import (
InvalidBucketError,
InvalidFilepathError,
InvalidFileTypeError,
ParsingEventDataError,
)
from core.recording.event.parsers import (
MinioParser,
StorageEvent,
get_parser,
)
@pytest.fixture
def valid_minio_event():
"""Mock a valid Minio event."""
return {
"Records": [
{
"s3": {
"bucket": {"name": "test-bucket"},
"object": {
"key": "recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
"contentType": "audio/ogg",
},
}
}
]
}
@pytest.fixture
def minio_parser():
"""Mock a Minio parser."""
return MinioParser(bucket_name="test-bucket")
def test_parse_valid_event(minio_parser, valid_minio_event):
"""Test parsing a valid Minio event."""
event = minio_parser.parse(valid_minio_event)
assert isinstance(event, StorageEvent)
assert event.filepath == "recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg"
assert event.filetype == "audio/ogg"
assert event.bucket_name == "test-bucket"
assert event.metadata is None
def test_parse_empty_data(minio_parser):
"""Test parsing empty event data raises error."""
with pytest.raises(ParsingEventDataError, match="Received empty data."):
minio_parser.parse({})
def test_parse_missing_keys(minio_parser):
"""Test parsing event with missing key."""
invalid_minio_event = {
"Records": [
{
"s3": {
"bucket": {"name": None},
# Missing 'object' key
}
}
]
}
with pytest.raises(ParsingEventDataError, match="Missing or malformed key"):
minio_parser.parse(invalid_minio_event)
def test_parse_none_key(minio_parser):
"""Test parsing event with None field."""
invalid_minio_event = {
"Records": [
{
"s3": {
"bucket": {"name": "test-bucket"},
"object": {
"key": "recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
"contentType": None, # 'contentType' should not be None
},
}
}
]
}
with pytest.raises(ParsingEventDataError, match="Missing essential data fields"):
minio_parser.parse(invalid_minio_event)
def test_validate_invalid_bucket(minio_parser):
"""Test validation with wrong bucket name."""
event = StorageEvent(
filepath="recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
filetype="audio/ogg",
bucket_name="wrong-bucket",
metadata=None,
)
with pytest.raises(InvalidBucketError):
minio_parser.validate(event)
def test_validate_invalid_filetype(minio_parser):
"""Test validation with unsupported file type."""
event = StorageEvent(
filepath="recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.txt",
filetype="text/plain", # Not included in the default allowed filetypes
bucket_name="test-bucket",
metadata=None,
)
with pytest.raises(InvalidFileTypeError):
minio_parser.validate(event)
@pytest.mark.parametrize(
"invalid_filepath",
[
"invalid_filepath",
"recording/46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
"recording%2F46d1a1212426484d8fb309b5d886f7a8.ogg",
],
)
def test_validate_invalid_filepath(invalid_filepath, minio_parser):
"""Test validation with malformed filepath."""
event = StorageEvent(
filepath=invalid_filepath,
filetype="audio/ogg",
bucket_name="test-bucket",
metadata=None,
)
with pytest.raises(InvalidFilepathError):
minio_parser.validate(event)
def test_validate_valid_event(minio_parser):
"""Test validation with valid event data."""
event = StorageEvent(
filepath="recording%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
filetype="audio/ogg",
bucket_name="test-bucket",
metadata=None,
)
recording_id = minio_parser.validate(event)
assert recording_id == "46d1a121-2426-484d-8fb3-09b5d886f7a8"
def test_get_recording_id_success(minio_parser, valid_minio_event):
"""Test successful extraction of recording ID."""
recording_id = minio_parser.get_recording_id(valid_minio_event)
assert recording_id == "46d1a121-2426-484d-8fb3-09b5d886f7a8"
def test_validate_filepath_with_folder(minio_parser):
"""Test validation of filepath with folder structure."""
event = StorageEvent(
filepath="parent_folder%2Ffolder%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
filetype="audio/ogg",
bucket_name="test-bucket",
metadata=None,
)
recording_id = minio_parser.validate(event)
assert recording_id == "46d1a121-2426-484d-8fb3-09b5d886f7a8"
def test_parse_with_video_type(minio_parser):
"""Test parsing event with video file type."""
video_event = {
"Records": [
{
"s3": {
"bucket": {"name": "test-bucket"},
"object": {
"key": "46d1a121-2426-484d-8fb3-09b5d886f7a8.mp4",
"contentType": "video/mp4",
},
}
}
]
}
event = minio_parser.parse(video_event)
assert event.filetype == "video/mp4"
assert event.filepath.endswith(".mp4")
def test_empty_allowed_filetypes():
"""Test MinioParser with empty allowed_filetypes."""
empty_types = set()
parser = MinioParser(bucket_name="test-bucket", allowed_filetypes=empty_types)
assert parser._allowed_filetypes == {"audio/ogg", "video/mp4"}
def test_custom_allowed_filetypes():
"""Test MinioParser with empty allowed_filetypes."""
custom_types = {"audio/mp3", "video/mov"}
parser = MinioParser(bucket_name="test-bucket", allowed_filetypes=custom_types)
assert parser._allowed_filetypes == {"audio/mp3", "video/mov"}
def test_validate_custom_filetypes():
"""Test validation of filepath with folder structure."""
parser = MinioParser(bucket_name="test-bucket", allowed_filetypes={"audio/mp3"})
event = StorageEvent(
filepath="parent_folder%2Ffolder%2F46d1a121-2426-484d-8fb3-09b5d886f7a8.ogg",
filetype="audio/mp3",
bucket_name="test-bucket",
metadata=None,
)
parser.validate(event)
def test_constructor_none_bucket():
"""Test MinioParser constructor with None bucket name."""
with pytest.raises(ValueError, match="Bucket name cannot be None or empty"):
MinioParser(bucket_name=None)
def test_constructor_empty_bucket():
"""Test MinioParser constructor with empty bucket name."""
with pytest.raises(ValueError, match="Bucket name cannot be None or empty"):
MinioParser(bucket_name="")
@pytest.fixture
def clear_lru_cache():
"""Fixture to clear the LRU cache between tests."""
get_parser.cache_clear()
yield
get_parser.cache_clear()
def test_returns_correct_instance(clear_lru_cache):
"""Test if get_parser returns the correct parser instance."""
settings.AWS_STORAGE_BUCKET_NAME = "test-bucket"
parser = get_parser()
assert isinstance(parser, MinioParser)
assert parser._bucket_name == "test-bucket"
def test_caching_behavior(clear_lru_cache):
"""Test if the function properly caches the parser instance."""
settings.AWS_STORAGE_BUCKET_NAME = "test-bucket"
parser1 = get_parser()
parser2 = get_parser()
assert parser1 is parser2 # Check object identity
def test_different_settings_new_instance():
"""Test if changing settings creates a new instance."""
settings.AWS_STORAGE_BUCKET_NAME = "different-bucket"
parser = get_parser()
assert parser._bucket_name == "different-bucket"
def test_import_error_handling(clear_lru_cache):
"""Test handling of import errors for invalid parser class."""
settings.RECORDING_EVENT_PARSER_CLASS = "invalid.parser.path"
with pytest.raises(ImportError):
get_parser()
@mock.patch("core.recording.event.parsers.import_string")
def test_parser_instantiation_called_once(mock_import_string, clear_lru_cache):
"""Test that parser class is instantiated only once due to caching."""
mock_parser_cls = type(
"MockParser",
(),
{
"__init__": lambda self, bucket_name: setattr(
self, "_bucket_name", bucket_name
)
},
)
mock_import_string.return_value = mock_parser_cls
# First call
parser1 = get_parser()
# Second call
parser2 = get_parser()
# Verify import_string was called only once
mock_import_string.assert_called_once_with(settings.RECORDING_EVENT_PARSER_CLASS)
assert parser1 is parser2
def test_cache_clear_behavior(clear_lru_cache, settings):
"""Test that cache clearing creates new instance."""
settings.RECORDING_EVENT_PARSER_CLASS = "core.recording.event.parsers.MinioParser"
parser1 = get_parser()
get_parser.cache_clear()
parser2 = get_parser()
assert parser1 is not parser2 # Should be different instances after cache clear

View File

@@ -424,6 +424,14 @@ class Base(Configuration):
environ_name="RECORDING_WORKER_CLASSES",
environ_prefix=None,
)
RECORDING_EVENT_PARSER_CLASS = values.Value(
"core.recording.event.parsers.MinioParser",
environ_name="RECORDING_EVENT_PARSER_CLASS",
environ_prefix=None,
)
RECORDING_ENABLE_STORAGE_EVENT_AUTH = values.BooleanValue(
True, environ_name="RECORDING_ENABLE_STORAGE_EVENT_AUTH", environ_prefix=None
)
# pylint: disable=invalid-name
@property