From 03d4b2afbe3866df2bdcc8e16b2caebcb6401cd2 Mon Sep 17 00:00:00 2001 From: Manuel Raynaud Date: Tue, 9 Dec 2025 17:32:24 +0100 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F(backend)=20stop=20allowing?= =?UTF-8?q?=20redirect=20in=20cors-proxy=20endpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cors-proxy endpoint was allowing redirect when fetching the target url. This can be usefull if an image url has changed but also dangerous if an attacker wants to hide a SSRF behind a redirect. --- src/backend/core/api/viewsets.py | 8 +-- .../test_api_documents_cors_proxy.py | 51 +++++++++++++++++-- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 9a4c2154..c4a137ee 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1715,7 +1715,6 @@ class DocumentViewSet( if not hostname: raise drf.exceptions.ValidationError("Invalid hostname") - # Resolve hostname to IP address(es) # Check all resolved IPs to prevent DNS rebinding attacks try: @@ -1804,14 +1803,15 @@ class DocumentViewSet( "User-Agent": request.headers.get("User-Agent", ""), "Accept": request.headers.get("Accept", ""), }, + allow_redirects=False, timeout=10, ) + response.raise_for_status() content_type = response.headers.get("Content-Type", "") if not content_type.startswith("image/"): return drf.response.Response( - {"detail": "Invalid URL used."}, - status=status.HTTP_400_BAD_REQUEST + {"detail": "Invalid URL used."}, status=status.HTTP_400_BAD_REQUEST ) # Use StreamingHttpResponse with the response's iter_content to properly stream the data @@ -1829,7 +1829,7 @@ class DocumentViewSet( except requests.RequestException as e: logger.exception(e) return drf.response.Response( - {"error": f"Failed to fetch resource from {url}"}, + {"detail": "Invalid URL used."}, status=status.HTTP_400_BAD_REQUEST, ) diff --git a/src/backend/core/tests/documents/test_api_documents_cors_proxy.py b/src/backend/core/tests/documents/test_api_documents_cors_proxy.py index 6f0d4316..b935b578 100644 --- a/src/backend/core/tests/documents/test_api_documents_cors_proxy.py +++ b/src/backend/core/tests/documents/test_api_documents_cors_proxy.py @@ -190,6 +190,53 @@ def test_api_docs_cors_proxy_unsupported_media_type(mock_getaddrinfo): assert response.json() == {"detail": "Invalid URL used."} +@unittest.mock.patch("core.api.viewsets.socket.getaddrinfo") +@responses.activate +def test_api_docs_cors_proxy_redirect(mock_getaddrinfo): + """Test the CORS proxy API for documents with a redirect.""" + document = factories.DocumentFactory(link_reach="public") + + # Mock DNS resolution to return a public IP address + mock_getaddrinfo.return_value = [ + (socket.AF_INET, socket.SOCK_STREAM, 0, "", ("8.8.8.8", 0)) + ] + + client = APIClient() + url_to_fetch = "https://external-url.com/assets/index.html" + responses.get( + url_to_fetch, + body=b"", + status=302, + headers={"Location": "https://external-url.com/other/assets/index.html"}, + ) + response = client.get( + f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" + ) + assert response.status_code == 400 + assert response.json() == {"detail": "Invalid URL used."} + + +@unittest.mock.patch("core.api.viewsets.socket.getaddrinfo") +@responses.activate +def test_api_docs_cors_proxy_url_not_returning_200(mock_getaddrinfo): + """Test the CORS proxy API for documents with a URL that does not return 200.""" + document = factories.DocumentFactory(link_reach="public") + + # Mock DNS resolution to return a public IP address + mock_getaddrinfo.return_value = [ + (socket.AF_INET, socket.SOCK_STREAM, 0, "", ("8.8.8.8", 0)) + ] + + client = APIClient() + url_to_fetch = "https://external-url.com/assets/index.html" + responses.get(url_to_fetch, body=b"", status=404) + response = client.get( + f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" + ) + assert response.status_code == 400 + assert response.json() == {"detail": "Invalid URL used."} + + @pytest.mark.parametrize( "url_to_fetch", [ @@ -229,9 +276,7 @@ def test_api_docs_cors_proxy_request_failed(mock_getaddrinfo): f"/api/v1.0/documents/{document.id!s}/cors-proxy/?url={url_to_fetch}" ) assert response.status_code == 400 - assert response.json() == { - "error": "Failed to fetch resource from https://external-url.com/assets/index.html" - } + assert response.json() == {"detail": "Invalid URL used."} @pytest.mark.parametrize(