✨(backend) add url to download media attachments with access rights
We make use of nginx subrequests to block media file downloads while we check for access rights. The request is then proxied to the object storage engine and authorization is added via the "Authorization" header. This way the media urls are static and can be stored in the document's json content without compromising on security: access control is done on all requests based on the user cookie session.
This commit is contained in:
committed by
Samuel Paccoud
parent
c9f1356d3e
commit
67a20f249e
33
src/backend/core/api/utils.py
Normal file
33
src/backend/core/api/utils.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Util to generate S3 authorization headers for object storage access control"""
|
||||
|
||||
from django.core.files.storage import default_storage
|
||||
|
||||
import botocore
|
||||
|
||||
|
||||
def generate_s3_authorization_headers(key):
|
||||
"""
|
||||
Generate authorization headers for an s3 object.
|
||||
These headers can be used as an alternative to signed urls with many benefits:
|
||||
- the urls of our files never expire and can be stored in our documents' content
|
||||
- we don't leak authorized urls that could be shared (file access can only be done
|
||||
with cookies)
|
||||
- access control is truly realtime
|
||||
- the object storage service does not need to be exposed on internet
|
||||
"""
|
||||
url = default_storage.unsigned_connection.meta.client.generate_presigned_url(
|
||||
"get_object",
|
||||
ExpiresIn=0,
|
||||
Params={"Bucket": default_storage.bucket_name, "Key": key},
|
||||
)
|
||||
request = botocore.awsrequest.AWSRequest(method="get", url=url)
|
||||
|
||||
s3_client = default_storage.connection.meta.client
|
||||
# pylint: disable=protected-access
|
||||
credentials = s3_client._request_signer._credentials # noqa: SLF001
|
||||
frozen_credentials = credentials.get_frozen_credentials()
|
||||
region = s3_client.meta.region_name
|
||||
auth = botocore.auth.S3SigV4Auth(frozen_credentials, "s3", region)
|
||||
auth.add_auth(request)
|
||||
|
||||
return request
|
||||
@@ -1,7 +1,9 @@
|
||||
"""API endpoints"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.postgres.aggregates import ArrayAgg
|
||||
@@ -30,7 +32,17 @@ from rest_framework import (
|
||||
from core import models
|
||||
from core.utils import email_invitation
|
||||
|
||||
from . import permissions, serializers
|
||||
from . import permissions, serializers, utils
|
||||
|
||||
ATTACHMENTS_FOLDER = "attachments"
|
||||
UUID_REGEX = (
|
||||
r"[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"
|
||||
)
|
||||
FILE_EXT_REGEX = r"\.[a-zA-Z]{3,4}"
|
||||
MEDIA_URL_PATTERN = re.compile(
|
||||
f"{settings.MEDIA_URL:s}({UUID_REGEX:s})/"
|
||||
f"({ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}{FILE_EXT_REGEX:s})$"
|
||||
)
|
||||
|
||||
# pylint: disable=too-many-ancestors
|
||||
|
||||
@@ -422,6 +434,48 @@ class DocumentViewSet(
|
||||
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
|
||||
)
|
||||
|
||||
@decorators.action(detail=False, methods=["get"], url_path="retrieve-auth")
|
||||
def retrieve_auth(self, request, *args, **kwargs):
|
||||
"""
|
||||
This view is used by an Nginx subrequest to control access to a document's
|
||||
attachment file.
|
||||
|
||||
The original url is passed by nginx in the "HTTP_X_ORIGINAL_URL" header.
|
||||
See corresponding ingress configuration in Helm chart and read about the
|
||||
nginx.ingress.kubernetes.io/auth-url annotation to understand how the Nginx ingress
|
||||
is configured to do this.
|
||||
|
||||
Based on the original url and the logged in user, we must decide if we authorize Nginx
|
||||
to let this request go through (by returning a 200 code) or if we block it (by returning
|
||||
a 403 error). Note that we return 403 errors without any further details for security
|
||||
reasons.
|
||||
|
||||
When we let the request go through, we compute authorization headers that will be added to
|
||||
the request going through thanks to the nginx.ingress.kubernetes.io/auth-response-headers
|
||||
annotation. The request will then be proxied to the object storage backend who will
|
||||
respond with the file after checking the signature included in headers.
|
||||
"""
|
||||
original_url = urlparse(request.META.get("HTTP_X_ORIGINAL_URL"))
|
||||
match = MEDIA_URL_PATTERN.search(original_url.path)
|
||||
|
||||
try:
|
||||
pk, attachment_key = match.groups()
|
||||
except AttributeError as excpt:
|
||||
raise exceptions.PermissionDenied() from excpt
|
||||
|
||||
# Check permission
|
||||
try:
|
||||
document = models.Document.objects.get(pk=pk)
|
||||
except models.Document.DoesNotExist as excpt:
|
||||
raise exceptions.PermissionDenied() from excpt
|
||||
|
||||
if not document.get_abilities(request.user).get("retrieve", False):
|
||||
raise exceptions.PermissionDenied()
|
||||
|
||||
# Generate authorization headers and return an authorization to proceed with the request
|
||||
request = utils.generate_s3_authorization_headers(f"{pk:s}/{attachment_key:s}")
|
||||
return drf_response.Response("authorized", headers=request.headers, status=200)
|
||||
|
||||
|
||||
class DocumentAccessViewSet(
|
||||
ResourceAccessViewsetMixin,
|
||||
|
||||
@@ -103,9 +103,7 @@ def test_api_documents_attachment_upload_reader(via, mock_user_get_teams):
|
||||
|
||||
@pytest.mark.parametrize("role", ["editor", "administrator", "owner"])
|
||||
@pytest.mark.parametrize("via", VIA)
|
||||
def test_api_documents_attachment_upload_success(
|
||||
via, role, mock_user_get_teams, settings
|
||||
):
|
||||
def test_api_documents_attachment_upload_success(via, role, mock_user_get_teams):
|
||||
"""
|
||||
Editors, administrators and owners of a document should be able to upload an attachment.
|
||||
"""
|
||||
@@ -130,7 +128,7 @@ def test_api_documents_attachment_upload_success(
|
||||
|
||||
assert response.status_code == 201
|
||||
|
||||
pattern = re.compile(rf"^{settings.MEDIA_URL}{document.id!s}/attachments/(.*)\.jpg")
|
||||
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
|
||||
match = pattern.search(response.json()["file"])
|
||||
file_id = match.group(1)
|
||||
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Test file uploads API endpoint for users in impress's core app.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.files.storage import default_storage
|
||||
from django.utils import timezone
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories
|
||||
from core.tests.conftest import TEAM, USER, VIA
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_api_documents_retrieve_auth_anonymous_public():
|
||||
"""Anonymous users should be able to retrieve attachments linked to a public document"""
|
||||
document = factories.DocumentFactory(is_public=True)
|
||||
|
||||
filename = f"{uuid.uuid4()!s}.jpg"
|
||||
key = f"{document.pk!s}/attachments/{filename:s}"
|
||||
|
||||
default_storage.connection.meta.client.put_object(
|
||||
Bucket=default_storage.bucket_name,
|
||||
Key=key,
|
||||
Body=BytesIO(b"my prose"),
|
||||
ContentType="text/plain",
|
||||
)
|
||||
|
||||
original_url = f"http://localhost/media/{key:s}"
|
||||
response = APIClient().get(
|
||||
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=original_url
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
authorization = response["Authorization"]
|
||||
assert "AWS4-HMAC-SHA256 Credential=" in authorization
|
||||
assert (
|
||||
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
|
||||
in authorization
|
||||
)
|
||||
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")
|
||||
|
||||
s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
|
||||
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
|
||||
response = requests.get(
|
||||
file_url,
|
||||
headers={
|
||||
"authorization": authorization,
|
||||
"x-amz-date": response["x-amz-date"],
|
||||
"x-amz-content-sha256": response["x-amz-content-sha256"],
|
||||
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
|
||||
},
|
||||
timeout=1,
|
||||
)
|
||||
assert response.content.decode("utf-8") == "my prose"
|
||||
|
||||
|
||||
def test_api_documents_retrieve_auth_anonymous_not_public():
|
||||
"""
|
||||
Anonymous users should not be allowed to retrieve attachments linked to a document
|
||||
that is not public.
|
||||
"""
|
||||
document = factories.DocumentFactory(is_public=False)
|
||||
|
||||
filename = f"{uuid.uuid4()!s}.jpg"
|
||||
media_url = f"http://localhost/media/{document.pk!s}/attachments/{filename:s}"
|
||||
|
||||
response = APIClient().get(
|
||||
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=media_url
|
||||
)
|
||||
|
||||
assert response.status_code == 403
|
||||
assert "Authorization" not in response
|
||||
|
||||
|
||||
def test_api_documents_retrieve_auth_authenticated_public():
|
||||
"""
|
||||
Authenticated users who are not related to a document should be able to
|
||||
retrieve attachments linked to a public document.
|
||||
"""
|
||||
document = factories.DocumentFactory(is_public=True)
|
||||
|
||||
user = factories.UserFactory()
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
filename = f"{uuid.uuid4()!s}.jpg"
|
||||
key = f"{document.pk!s}/attachments/{filename:s}"
|
||||
|
||||
default_storage.connection.meta.client.put_object(
|
||||
Bucket=default_storage.bucket_name,
|
||||
Key=key,
|
||||
Body=BytesIO(b"my prose"),
|
||||
ContentType="text/plain",
|
||||
)
|
||||
|
||||
original_url = f"http://localhost/media/{key:s}"
|
||||
response = APIClient().get(
|
||||
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=original_url
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
authorization = response["Authorization"]
|
||||
assert "AWS4-HMAC-SHA256 Credential=" in authorization
|
||||
assert (
|
||||
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
|
||||
in authorization
|
||||
)
|
||||
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")
|
||||
|
||||
s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
|
||||
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
|
||||
response = requests.get(
|
||||
file_url,
|
||||
headers={
|
||||
"authorization": authorization,
|
||||
"x-amz-date": response["x-amz-date"],
|
||||
"x-amz-content-sha256": response["x-amz-content-sha256"],
|
||||
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
|
||||
},
|
||||
timeout=1,
|
||||
)
|
||||
assert response.content.decode("utf-8") == "my prose"
|
||||
|
||||
|
||||
def test_api_documents_retrieve_auth_authenticated_not_public():
|
||||
"""
|
||||
Authenticated users who are not related to a document should not be allowed to
|
||||
retrieve attachments linked to a document that is not public.
|
||||
"""
|
||||
document = factories.DocumentFactory(is_public=False)
|
||||
|
||||
user = factories.UserFactory()
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
filename = f"{uuid.uuid4()!s}.jpg"
|
||||
media_url = f"http://localhost/media/{document.pk!s}/attachments/{filename:s}"
|
||||
|
||||
response = APIClient().get(
|
||||
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=media_url
|
||||
)
|
||||
|
||||
assert response.status_code == 403
|
||||
assert "Authorization" not in response
|
||||
|
||||
|
||||
@pytest.mark.parametrize("is_public", [True, False])
|
||||
@pytest.mark.parametrize("via", VIA)
|
||||
def test_api_documents_retrieve_auth_related(via, is_public, mock_user_get_teams):
|
||||
"""
|
||||
Users who have a role on a document, whatever the role, should be able to
|
||||
retrieve related attachments.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
document = factories.DocumentFactory(is_public=is_public)
|
||||
if via == USER:
|
||||
factories.UserDocumentAccessFactory(document=document, user=user)
|
||||
elif via == TEAM:
|
||||
mock_user_get_teams.return_value = ["lasuite", "unknown"]
|
||||
factories.TeamDocumentAccessFactory(document=document, team="lasuite")
|
||||
|
||||
filename = f"{uuid.uuid4()!s}.jpg"
|
||||
key = f"{document.pk!s}/attachments/{filename:s}"
|
||||
|
||||
default_storage.connection.meta.client.put_object(
|
||||
Bucket=default_storage.bucket_name,
|
||||
Key=key,
|
||||
Body=BytesIO(b"my prose"),
|
||||
ContentType="text/plain",
|
||||
)
|
||||
|
||||
original_url = f"http://localhost/media/{key:s}"
|
||||
response = client.get(
|
||||
"/api/v1.0/documents/retrieve-auth/", HTTP_X_ORIGINAL_URL=original_url
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
authorization = response["Authorization"]
|
||||
assert "AWS4-HMAC-SHA256 Credential=" in authorization
|
||||
assert (
|
||||
"SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature="
|
||||
in authorization
|
||||
)
|
||||
assert response["X-Amz-Date"] == timezone.now().strftime("%Y%m%dT%H%M%SZ")
|
||||
|
||||
s3_url = urlparse(settings.AWS_S3_ENDPOINT_URL)
|
||||
file_url = f"{settings.AWS_S3_ENDPOINT_URL:s}/impress-media-storage/{key:s}"
|
||||
response = requests.get(
|
||||
file_url,
|
||||
headers={
|
||||
"authorization": authorization,
|
||||
"x-amz-date": response["x-amz-date"],
|
||||
"x-amz-content-sha256": response["x-amz-content-sha256"],
|
||||
"Host": f"{s3_url.hostname:s}:{s3_url.port:d}",
|
||||
},
|
||||
timeout=1,
|
||||
)
|
||||
assert response.content.decode("utf-8") == "my prose"
|
||||
Reference in New Issue
Block a user