From de3dfbb0c7aefd97166d08b590d3d9d39e2a12c9 Mon Sep 17 00:00:00 2001 From: Fabre Florian Date: Fri, 31 Oct 2025 11:20:46 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(backend)=20keep=20ordering=20from=20f?= =?UTF-8?q?ulltext=20search=20in=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep ordering by score from Find API on search/ results and fallback search still uses "-update_at" ordering as default Refactor pagination to work with a list instead of a queryset Signed-off-by: Fabre Florian --- src/backend/core/api/viewsets.py | 58 +++-- .../documents/test_api_documents_search.py | 232 ++++++++++++++++-- 2 files changed, 244 insertions(+), 46 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b5965ca2..43a39f56 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1085,12 +1085,10 @@ class DocumentViewSet( {"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED ) - def _simple_search_queryset(self, params): + def _search_simple(self, request, text): """ Returns a queryset filtered by the content of the document title """ - text = params.validated_data["q"] - # As the 'list' view we get a prefiltered queryset (deleted docs are excluded) queryset = self.get_queryset() filterset = DocumentFilter({"title": text}, queryset=queryset) @@ -1098,25 +1096,47 @@ class DocumentViewSet( if not filterset.is_valid(): raise drf.exceptions.ValidationError(filterset.errors) - return filterset.filter_queryset(queryset) + queryset = filterset.filter_queryset(queryset) - def _fulltext_search_queryset(self, indexer, token, user, params): + return self.get_response_for_queryset( + queryset.order_by("-updated_at"), + context={ + "request": request, + }, + ) + + def _search_fulltext(self, indexer, request, params): """ Returns a queryset from the results the fulltext search of Find """ + access_token = request.session.get("oidc_access_token") + user = request.user text = params.validated_data["q"] queryset = models.Document.objects.all() # Retrieve the documents ids from Find. results = indexer.search( text=text, - token=token, + token=access_token, visited=get_visited_document_ids_of(queryset, user), - page=params.validated_data.get("page", 1), - page_size=params.validated_data.get("page_size", 20), + page=1, + page_size=100, ) - return queryset.filter(pk__in=results) + docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)} + ordered_docs = [docs_by_uuid[id] for id in results] + + page = self.paginate_queryset(ordered_docs) + + serializer = self.get_serializer( + page if page else ordered_docs, + many=True, + context={ + "request": request, + }, + ) + + return self.get_paginated_response(serializer.data) @drf.decorators.action(detail=False, methods=["get"], url_path="search") @method_decorator(refresh_oidc_access_token) @@ -1132,29 +1152,17 @@ class DocumentViewSet( The ordering is always by the most recent first. """ - access_token = request.session.get("oidc_access_token") - user = request.user - params = serializers.SearchDocumentSerializer(data=request.query_params) params.is_valid(raise_exception=True) indexer = get_document_indexer() if indexer: - queryset = self._fulltext_search_queryset( - indexer, token=access_token, user=user, params=params - ) - else: - # The indexer is not configured, we fallback on a simple icontains filter by the - # model field 'title'. - queryset = self._simple_search_queryset(params) + return self._search_fulltext(indexer, request, params=params) - return self.get_response_for_queryset( - queryset.order_by("-updated_at"), - context={ - "request": request, - }, - ) + # The indexer is not configured, we fallback on a simple icontains filter by the + # model field 'title'. + return self._search_simple(request, text=params.validated_data["q"]) @drf.decorators.action(detail=True, methods=["get"], url_path="versions") def versions_list(self, request, *args, **kwargs): diff --git a/src/backend/core/tests/documents/test_api_documents_search.py b/src/backend/core/tests/documents/test_api_documents_search.py index 869a8d56..34766178 100644 --- a/src/backend/core/tests/documents/test_api_documents_search.py +++ b/src/backend/core/tests/documents/test_api_documents_search.py @@ -2,8 +2,11 @@ Tests for Documents API endpoint in impress's core app: list """ +import random from json import loads as json_loads +from django.test import RequestFactory + import pytest import responses from faker import Faker @@ -16,6 +19,15 @@ fake = Faker() pytestmark = pytest.mark.django_db +def build_search_url(**kwargs): + """Build absolute uri for search endpoint with ORDERED query arguments""" + return ( + RequestFactory() + .get("/api/v1.0/documents/search/", dict(sorted(kwargs.items()))) + .build_absolute_uri() + ) + + @pytest.mark.parametrize("role", models.LinkRoleChoices.values) @pytest.mark.parametrize("reach", models.LinkReachChoices.values) @responses.activate @@ -191,8 +203,62 @@ def test_api_documents_search_format(indexer_settings): @responses.activate -def test_api_documents_search_pagination(indexer_settings): - """Documents should be ordered by descending "updated_at" by default""" +@pytest.mark.parametrize( + "pagination, status, expected", + ( + ( + {"page": 1, "page_size": 10}, + 200, + { + "count": 10, + "previous": None, + "next": None, + "range": (0, None), + }, + ), + ( + {}, + 200, + { + "count": 10, + "previous": None, + "next": None, + "range": (0, None), + "api_page_size": 21, # default page_size is 20 + }, + ), + ( + {"page": 2, "page_size": 10}, + 404, + {}, + ), + ( + {"page": 1, "page_size": 5}, + 200, + { + "count": 10, + "previous": None, + "next": {"page": 2, "page_size": 5}, + "range": (0, 5), + }, + ), + ( + {"page": 2, "page_size": 5}, + 200, + { + "count": 10, + "previous": {"page_size": 5}, + "next": None, + "range": (5, None), + }, + ), + ({"page": 3, "page_size": 5}, 404, {}), + ), +) +def test_api_documents_search_pagination( + indexer_settings, pagination, status, expected +): + """Documents should be ordered by descending "score" by default""" indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" assert get_document_indexer() is not None @@ -202,35 +268,159 @@ def test_api_documents_search_pagination(indexer_settings): client = APIClient() client.force_login(user) - docs = factories.DocumentFactory.create_batch(10) + docs = factories.DocumentFactory.create_batch(10, title="alpha", users=[user]) + + docs_by_uuid = {str(doc.pk): doc for doc in docs} + api_results = [{"_id": id} for id in docs_by_uuid.keys()] + + # reorder randomly to simulate score ordering + random.shuffle(api_results) # Find response # pylint: disable-next=assignment-from-none api_search = responses.add( responses.POST, "http://find/api/v1.0/search", - json=[{"_id": str(doc.pk)} for doc in docs], + json=api_results, status=200, ) response = client.get( - "/api/v1.0/documents/search/", data={"q": "alpha", "page": 2, "page_size": 5} + "/api/v1.0/documents/search/", + data={ + "q": "alpha", + **pagination, + }, ) - assert response.status_code == 200 - content = response.json() - results = content.pop("results") - assert len(results) == 5 + assert response.status_code == status - # Check the query parameters. - assert api_search.call_count == 1 - assert api_search.calls[0].response.status_code == 200 - assert json_loads(api_search.calls[0].request.body) == { - "q": "alpha", - "visited": [], - "services": ["docs"], - "page_number": 2, - "page_size": 5, - "order_by": "updated_at", - "order_direction": "desc", - } + if response.status_code < 300: + previous_url = ( + build_search_url(q="alpha", **expected["previous"]) + if expected["previous"] + else None + ) + next_url = ( + build_search_url(q="alpha", **expected["next"]) + if expected["next"] + else None + ) + start, end = expected["range"] + + content = response.json() + + assert content["count"] == expected["count"] + assert content["previous"] == previous_url + assert content["next"] == next_url + + results = content.pop("results") + + # The find api results ordering by score is kept + assert [r["id"] for r in results] == [r["_id"] for r in api_results[start:end]] + + # Check the query parameters. + assert api_search.call_count == 1 + assert api_search.calls[0].response.status_code == 200 + assert json_loads(api_search.calls[0].request.body) == { + "q": "alpha", + "visited": [], + "services": ["docs"], + "page_number": 1, + "page_size": 100, + "order_by": "updated_at", + "order_direction": "desc", + } + + +@responses.activate +@pytest.mark.parametrize( + "pagination, status, expected", + ( + ( + {"page": 1, "page_size": 10}, + 200, + {"count": 10, "previous": None, "next": None, "range": (0, None)}, + ), + ( + {}, + 200, + {"count": 10, "previous": None, "next": None, "range": (0, None)}, + ), + ( + {"page": 2, "page_size": 10}, + 404, + {}, + ), + ( + {"page": 1, "page_size": 5}, + 200, + { + "count": 10, + "previous": None, + "next": {"page": 2, "page_size": 5}, + "range": (0, 5), + }, + ), + ( + {"page": 2, "page_size": 5}, + 200, + { + "count": 10, + "previous": {"page_size": 5}, + "next": None, + "range": (5, None), + }, + ), + ({"page": 3, "page_size": 5}, 404, {}), + ), +) +def test_api_documents_search_pagination_endpoint_is_none( + indexer_settings, pagination, status, expected +): + """Documents should be ordered by descending "-updated_at" by default""" + indexer_settings.SEARCH_INDEXER_QUERY_URL = None + + assert get_document_indexer() is None + + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + factories.DocumentFactory.create_batch(10, title="alpha", users=[user]) + + response = client.get( + "/api/v1.0/documents/search/", + data={ + "q": "alpha", + **pagination, + }, + ) + + assert response.status_code == status + + if response.status_code < 300: + previous_url = ( + build_search_url(q="alpha", **expected["previous"]) + if expected["previous"] + else None + ) + next_url = ( + build_search_url(q="alpha", **expected["next"]) + if expected["next"] + else None + ) + queryset = models.Document.objects.order_by("-updated_at") + start, end = expected["range"] + expected_results = [str(d.pk) for d in queryset[start:end]] + + content = response.json() + + assert content["count"] == expected["count"] + assert content["previous"] == previous_url + assert content["next"] == next_url + + results = content.pop("results") + + assert [r["id"] for r in results] == expected_results