From 6efc2377fe56b6116aac3003ff1a84169e916287 Mon Sep 17 00:00:00 2001 From: Anthony LC Date: Mon, 10 Mar 2025 10:11:38 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(back)=20create=20a=20cors=20proxy=20f?= =?UTF-8?q?etching=20docs=20external=20resources?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When exporting a document in PDF and if the doc contains external resources, we want to fetch them using a proxy bypassing CORS restrictions. To ensure this endpoint is not used for something else than fetching urls contains in the doc, we use access control and check if the url really exists in the document. --- src/backend/core/api/viewsets.py | 57 +++++++++++- src/backend/core/models.py | 1 + .../test_api_documents_cors_proxy.py | 88 +++++++++++++++++++ .../documents/test_api_documents_retrieve.py | 5 ++ .../documents/test_api_documents_trashbin.py | 1 + .../core/tests/test_models_documents.py | 8 ++ 6 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 src/backend/core/tests/documents/test_api_documents_cors_proxy.py diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b01b07fb..910e0a86 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -4,7 +4,7 @@ import logging import re import uuid -from urllib.parse import urlparse +from urllib.parse import unquote, urlparse from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg @@ -16,8 +16,9 @@ from django.db import models as db from django.db import transaction from django.db.models.expressions import RawSQL from django.db.models.functions import Left, Length -from django.http import Http404 +from django.http import Http404, StreamingHttpResponse +import requests import rest_framework as drf from botocore.exceptions import ClientError from rest_framework import filters, status, viewsets @@ -1237,6 +1238,58 @@ class DocumentViewSet( return drf.response.Response(response, status=drf.status.HTTP_200_OK) + @drf.decorators.action( + detail=True, + methods=["get"], + name="", + url_path="cors-proxy", + ) + def cors_proxy(self, request, *args, **kwargs): + """ + GET /api/v1.0/documents//cors-proxy + Act like a proxy to fetch external resources and bypass CORS restrictions. + """ + url = request.query_params.get("url") + if not url: + return drf.response.Response( + {"detail": "Missing 'url' query parameter"}, + status=drf.status.HTTP_400_BAD_REQUEST, + ) + + # Check for permissions. + self.get_object() + + url = unquote(url) + + try: + response = requests.get( + url, + stream=True, + headers={ + "User-Agent": request.headers.get("User-Agent", ""), + "Accept": request.headers.get("Accept", ""), + }, + timeout=10, + ) + + # Use StreamingHttpResponse with the response's iter_content to properly stream the data + proxy_response = StreamingHttpResponse( + streaming_content=response.iter_content(chunk_size=8192), + content_type=response.headers.get( + "Content-Type", "application/octet-stream" + ), + status=response.status_code, + ) + + return proxy_response + + except requests.RequestException as e: + logger.error("Proxy request failed: %s", str(e)) + return drf_response.Response( + {"error": f"Failed to fetch resource: {e!s}"}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + class DocumentAccessViewSet( ResourceAccessViewsetMixin, diff --git a/src/backend/core/models.py b/src/backend/core/models.py index e77d52b9..36e799f1 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -793,6 +793,7 @@ class Document(MP_Node, BaseModel): "children_list": can_get, "children_create": can_update and user.is_authenticated, "collaboration_auth": can_get, + "cors_proxy": can_get, "descendants": can_get, "destroy": is_owner, "favorite": can_get and user.is_authenticated, diff --git a/src/backend/core/tests/documents/test_api_documents_cors_proxy.py b/src/backend/core/tests/documents/test_api_documents_cors_proxy.py new file mode 100644 index 00000000..1a073830 --- /dev/null +++ b/src/backend/core/tests/documents/test_api_documents_cors_proxy.py @@ -0,0 +1,88 @@ +"""Test on the CORS proxy API for documents.""" + +import pytest +from rest_framework.test import APIClient + +from core import factories + +pytestmark = pytest.mark.django_db + + +def test_api_docs_cors_proxy_valid_url(): + """Test the CORS proxy API for documents with a valid URL.""" + document = factories.DocumentFactory(link_reach="public") + + client = APIClient() + url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png" + response = client.get( + f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" + ) + assert response.status_code == 200 + assert response.headers["Content-Type"] == "image/png" + assert response.streaming_content + + +def test_api_docs_cors_proxy_without_url_query_string(): + """Test the CORS proxy API for documents without a URL query string.""" + document = factories.DocumentFactory(link_reach="public") + + client = APIClient() + response = client.get(f"/api/v1.0/documents/{document.id!s}/cors-proxy/") + assert response.status_code == 400 + assert response.json() == {"detail": "Missing 'url' query parameter"} + + +def test_api_docs_cors_proxy_anonymous_document_not_public(): + """Test the CORS proxy API for documents with an anonymous user and a non-public document.""" + document = factories.DocumentFactory(link_reach="authenticated") + + client = APIClient() + url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png" + response = client.get( + f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" + ) + assert response.status_code == 401 + assert response.json() == { + "detail": "Authentication credentials were not provided." + } + + +def test_api_docs_cors_proxy_authenticated_user_accessing_protected_doc(): + """ + Test the CORS proxy API for documents with an authenticated user accessing a protected + document. + """ + document = factories.DocumentFactory(link_reach="authenticated") + + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png" + response = client.get( + f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" + ) + assert response.status_code == 200 + assert response.headers["Content-Type"] == "image/png" + assert response.streaming_content + + +def test_api_docs_cors_proxy_authenticated_not_accessing_restricted_doc(): + """ + Test the CORS proxy API for documents with an authenticated user not accessing a restricted + document. + """ + document = factories.DocumentFactory(link_reach="restricted") + + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + url_to_fetch = "https://docs.numerique.gouv.fr/assets/logo-gouv.png" + response = client.get( + f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" + ) + assert response.status_code == 403 + assert response.json() == { + "detail": "You do not have permission to perform this action." + } diff --git a/src/backend/core/tests/documents/test_api_documents_retrieve.py b/src/backend/core/tests/documents/test_api_documents_retrieve.py index 8b587f06..9c556f0f 100644 --- a/src/backend/core/tests/documents/test_api_documents_retrieve.py +++ b/src/backend/core/tests/documents/test_api_documents_retrieve.py @@ -34,6 +34,7 @@ def test_api_documents_retrieve_anonymous_public_standalone(): "children_create": False, "children_list": True, "collaboration_auth": True, + "cors_proxy": True, "descendants": True, "destroy": False, # Anonymous user can't favorite a document even with read access @@ -100,6 +101,7 @@ def test_api_documents_retrieve_anonymous_public_parent(): "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, # Anonymous user can't favorite a document even with read access "favorite": False, @@ -194,6 +196,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated( "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": True, "invite_owner": False, @@ -266,6 +269,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": True, "invite_owner": False, @@ -444,6 +448,7 @@ def test_api_documents_retrieve_authenticated_related_parent(): "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": access.role == "owner", "favorite": True, "invite_owner": access.role == "owner", diff --git a/src/backend/core/tests/documents/test_api_documents_trashbin.py b/src/backend/core/tests/documents/test_api_documents_trashbin.py index 6e78f17b..86287caa 100644 --- a/src/backend/core/tests/documents/test_api_documents_trashbin.py +++ b/src/backend/core/tests/documents/test_api_documents_trashbin.py @@ -79,6 +79,7 @@ def test_api_documents_trashbin_format(): "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": True, "favorite": True, "invite_owner": True, diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index 105b4d4c..f51857ee 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -159,6 +159,7 @@ def test_models_documents_get_abilities_forbidden( "children_list": False, "collaboration_auth": False, "descendants": False, + "cors_proxy": False, "destroy": False, "favorite": False, "invite_owner": False, @@ -217,6 +218,7 @@ def test_models_documents_get_abilities_reader( "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": is_authenticated, "invite_owner": False, @@ -277,6 +279,7 @@ def test_models_documents_get_abilities_editor( "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": is_authenticated, "invite_owner": False, @@ -326,6 +329,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries): "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": True, "favorite": True, "invite_owner": True, @@ -372,6 +376,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries) "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": True, "invite_owner": False, @@ -421,6 +426,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries): "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": True, "invite_owner": False, @@ -477,6 +483,7 @@ def test_models_documents_get_abilities_reader_user( "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": True, "invite_owner": False, @@ -531,6 +538,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries): "children_list": True, "collaboration_auth": True, "descendants": True, + "cors_proxy": True, "destroy": False, "favorite": True, "invite_owner": False,