(back) create a cors proxy fetching docs external resources

When exporting a document in PDF and if the doc contains external
resources, we want to fetch them using a proxy bypassing CORS
restrictions. To ensure this endpoint is not used for something else
than fetching urls contains in the doc, we use access control and check
if the url really exists in the document.
This commit is contained in:
Anthony LC
2025-03-10 10:11:38 +01:00
committed by Anthony LC
parent 1c02b0ad8e
commit 6efc2377fe
6 changed files with 158 additions and 2 deletions

View File

@@ -4,7 +4,7 @@
import logging
import re
import uuid
from urllib.parse import urlparse
from urllib.parse import unquote, urlparse
from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
@@ -16,8 +16,9 @@ from django.db import models as db
from django.db import transaction
from django.db.models.expressions import RawSQL
from django.db.models.functions import Left, Length
from django.http import Http404
from django.http import Http404, StreamingHttpResponse
import requests
import rest_framework as drf
from botocore.exceptions import ClientError
from rest_framework import filters, status, viewsets
@@ -1237,6 +1238,58 @@ class DocumentViewSet(
return drf.response.Response(response, status=drf.status.HTTP_200_OK)
@drf.decorators.action(
detail=True,
methods=["get"],
name="",
url_path="cors-proxy",
)
def cors_proxy(self, request, *args, **kwargs):
"""
GET /api/v1.0/documents/<resource_id>/cors-proxy
Act like a proxy to fetch external resources and bypass CORS restrictions.
"""
url = request.query_params.get("url")
if not url:
return drf.response.Response(
{"detail": "Missing 'url' query parameter"},
status=drf.status.HTTP_400_BAD_REQUEST,
)
# Check for permissions.
self.get_object()
url = unquote(url)
try:
response = requests.get(
url,
stream=True,
headers={
"User-Agent": request.headers.get("User-Agent", ""),
"Accept": request.headers.get("Accept", ""),
},
timeout=10,
)
# Use StreamingHttpResponse with the response's iter_content to properly stream the data
proxy_response = StreamingHttpResponse(
streaming_content=response.iter_content(chunk_size=8192),
content_type=response.headers.get(
"Content-Type", "application/octet-stream"
),
status=response.status_code,
)
return proxy_response
except requests.RequestException as e:
logger.error("Proxy request failed: %s", str(e))
return drf_response.Response(
{"error": f"Failed to fetch resource: {e!s}"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
class DocumentAccessViewSet(
ResourceAccessViewsetMixin,

View File

@@ -793,6 +793,7 @@ class Document(MP_Node, BaseModel):
"children_list": can_get,
"children_create": can_update and user.is_authenticated,
"collaboration_auth": can_get,
"cors_proxy": can_get,
"descendants": can_get,
"destroy": is_owner,
"favorite": can_get and user.is_authenticated,

View File

@@ -0,0 +1,88 @@
"""Test on the CORS proxy API for documents."""
import pytest
from rest_framework.test import APIClient
from core import factories
pytestmark = pytest.mark.django_db
def test_api_docs_cors_proxy_valid_url():
"""Test the CORS proxy API for documents with a valid URL."""
document = factories.DocumentFactory(link_reach="public")
client = APIClient()
url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png"
response = client.get(
f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}"
)
assert response.status_code == 200
assert response.headers["Content-Type"] == "image/png"
assert response.streaming_content
def test_api_docs_cors_proxy_without_url_query_string():
"""Test the CORS proxy API for documents without a URL query string."""
document = factories.DocumentFactory(link_reach="public")
client = APIClient()
response = client.get(f"/api/v1.0/documents/{document.id!s}/cors-proxy/")
assert response.status_code == 400
assert response.json() == {"detail": "Missing 'url' query parameter"}
def test_api_docs_cors_proxy_anonymous_document_not_public():
"""Test the CORS proxy API for documents with an anonymous user and a non-public document."""
document = factories.DocumentFactory(link_reach="authenticated")
client = APIClient()
url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png"
response = client.get(
f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}"
)
assert response.status_code == 401
assert response.json() == {
"detail": "Authentication credentials were not provided."
}
def test_api_docs_cors_proxy_authenticated_user_accessing_protected_doc():
"""
Test the CORS proxy API for documents with an authenticated user accessing a protected
document.
"""
document = factories.DocumentFactory(link_reach="authenticated")
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png"
response = client.get(
f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}"
)
assert response.status_code == 200
assert response.headers["Content-Type"] == "image/png"
assert response.streaming_content
def test_api_docs_cors_proxy_authenticated_not_accessing_restricted_doc():
"""
Test the CORS proxy API for documents with an authenticated user not accessing a restricted
document.
"""
document = factories.DocumentFactory(link_reach="restricted")
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png"
response = client.get(
f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}"
)
assert response.status_code == 403
assert response.json() == {
"detail": "You do not have permission to perform this action."
}

View File

@@ -34,6 +34,7 @@ def test_api_documents_retrieve_anonymous_public_standalone():
"children_create": False,
"children_list": True,
"collaboration_auth": True,
"cors_proxy": True,
"descendants": True,
"destroy": False,
# Anonymous user can't favorite a document even with read access
@@ -100,6 +101,7 @@ def test_api_documents_retrieve_anonymous_public_parent():
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
# Anonymous user can't favorite a document even with read access
"favorite": False,
@@ -194,6 +196,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated(
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": True,
"invite_owner": False,
@@ -266,6 +269,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": True,
"invite_owner": False,
@@ -444,6 +448,7 @@ def test_api_documents_retrieve_authenticated_related_parent():
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": access.role == "owner",
"favorite": True,
"invite_owner": access.role == "owner",

View File

@@ -79,6 +79,7 @@ def test_api_documents_trashbin_format():
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": True,
"favorite": True,
"invite_owner": True,

View File

@@ -159,6 +159,7 @@ def test_models_documents_get_abilities_forbidden(
"children_list": False,
"collaboration_auth": False,
"descendants": False,
"cors_proxy": False,
"destroy": False,
"favorite": False,
"invite_owner": False,
@@ -217,6 +218,7 @@ def test_models_documents_get_abilities_reader(
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": is_authenticated,
"invite_owner": False,
@@ -277,6 +279,7 @@ def test_models_documents_get_abilities_editor(
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": is_authenticated,
"invite_owner": False,
@@ -326,6 +329,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries):
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": True,
"favorite": True,
"invite_owner": True,
@@ -372,6 +376,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries)
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": True,
"invite_owner": False,
@@ -421,6 +426,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries):
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": True,
"invite_owner": False,
@@ -477,6 +483,7 @@ def test_models_documents_get_abilities_reader_user(
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": True,
"invite_owner": False,
@@ -531,6 +538,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
"children_list": True,
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"destroy": False,
"favorite": True,
"invite_owner": False,