(backend) keep ordering from fulltext search in results

Keep ordering by score from Find API on search/ results and
fallback search still uses "-update_at" ordering as default

Refactor pagination to work with a list instead of a queryset

Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
Fabre Florian
2025-10-31 11:20:46 +01:00
committed by Quentin BEY
parent b0e7a511cb
commit de3dfbb0c7
2 changed files with 244 additions and 46 deletions

View File

@@ -1085,12 +1085,10 @@ class DocumentViewSet(
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED {"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
) )
def _simple_search_queryset(self, params): def _search_simple(self, request, text):
""" """
Returns a queryset filtered by the content of the document title Returns a queryset filtered by the content of the document title
""" """
text = params.validated_data["q"]
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded) # As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
queryset = self.get_queryset() queryset = self.get_queryset()
filterset = DocumentFilter({"title": text}, queryset=queryset) filterset = DocumentFilter({"title": text}, queryset=queryset)
@@ -1098,25 +1096,47 @@ class DocumentViewSet(
if not filterset.is_valid(): if not filterset.is_valid():
raise drf.exceptions.ValidationError(filterset.errors) raise drf.exceptions.ValidationError(filterset.errors)
return filterset.filter_queryset(queryset) queryset = filterset.filter_queryset(queryset)
def _fulltext_search_queryset(self, indexer, token, user, params): return self.get_response_for_queryset(
queryset.order_by("-updated_at"),
context={
"request": request,
},
)
def _search_fulltext(self, indexer, request, params):
""" """
Returns a queryset from the results the fulltext search of Find Returns a queryset from the results the fulltext search of Find
""" """
access_token = request.session.get("oidc_access_token")
user = request.user
text = params.validated_data["q"] text = params.validated_data["q"]
queryset = models.Document.objects.all() queryset = models.Document.objects.all()
# Retrieve the documents ids from Find. # Retrieve the documents ids from Find.
results = indexer.search( results = indexer.search(
text=text, text=text,
token=token, token=access_token,
visited=get_visited_document_ids_of(queryset, user), visited=get_visited_document_ids_of(queryset, user),
page=params.validated_data.get("page", 1), page=1,
page_size=params.validated_data.get("page_size", 20), page_size=100,
) )
return queryset.filter(pk__in=results) docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)}
ordered_docs = [docs_by_uuid[id] for id in results]
page = self.paginate_queryset(ordered_docs)
serializer = self.get_serializer(
page if page else ordered_docs,
many=True,
context={
"request": request,
},
)
return self.get_paginated_response(serializer.data)
@drf.decorators.action(detail=False, methods=["get"], url_path="search") @drf.decorators.action(detail=False, methods=["get"], url_path="search")
@method_decorator(refresh_oidc_access_token) @method_decorator(refresh_oidc_access_token)
@@ -1132,29 +1152,17 @@ class DocumentViewSet(
The ordering is always by the most recent first. The ordering is always by the most recent first.
""" """
access_token = request.session.get("oidc_access_token")
user = request.user
params = serializers.SearchDocumentSerializer(data=request.query_params) params = serializers.SearchDocumentSerializer(data=request.query_params)
params.is_valid(raise_exception=True) params.is_valid(raise_exception=True)
indexer = get_document_indexer() indexer = get_document_indexer()
if indexer: if indexer:
queryset = self._fulltext_search_queryset( return self._search_fulltext(indexer, request, params=params)
indexer, token=access_token, user=user, params=params
)
else:
# The indexer is not configured, we fallback on a simple icontains filter by the
# model field 'title'.
queryset = self._simple_search_queryset(params)
return self.get_response_for_queryset( # The indexer is not configured, we fallback on a simple icontains filter by the
queryset.order_by("-updated_at"), # model field 'title'.
context={ return self._search_simple(request, text=params.validated_data["q"])
"request": request,
},
)
@drf.decorators.action(detail=True, methods=["get"], url_path="versions") @drf.decorators.action(detail=True, methods=["get"], url_path="versions")
def versions_list(self, request, *args, **kwargs): def versions_list(self, request, *args, **kwargs):

View File

@@ -2,8 +2,11 @@
Tests for Documents API endpoint in impress's core app: list Tests for Documents API endpoint in impress's core app: list
""" """
import random
from json import loads as json_loads from json import loads as json_loads
from django.test import RequestFactory
import pytest import pytest
import responses import responses
from faker import Faker from faker import Faker
@@ -16,6 +19,15 @@ fake = Faker()
pytestmark = pytest.mark.django_db pytestmark = pytest.mark.django_db
def build_search_url(**kwargs):
"""Build absolute uri for search endpoint with ORDERED query arguments"""
return (
RequestFactory()
.get("/api/v1.0/documents/search/", dict(sorted(kwargs.items())))
.build_absolute_uri()
)
@pytest.mark.parametrize("role", models.LinkRoleChoices.values) @pytest.mark.parametrize("role", models.LinkRoleChoices.values)
@pytest.mark.parametrize("reach", models.LinkReachChoices.values) @pytest.mark.parametrize("reach", models.LinkReachChoices.values)
@responses.activate @responses.activate
@@ -191,8 +203,62 @@ def test_api_documents_search_format(indexer_settings):
@responses.activate @responses.activate
def test_api_documents_search_pagination(indexer_settings): @pytest.mark.parametrize(
"""Documents should be ordered by descending "updated_at" by default""" "pagination, status, expected",
(
(
{"page": 1, "page_size": 10},
200,
{
"count": 10,
"previous": None,
"next": None,
"range": (0, None),
},
),
(
{},
200,
{
"count": 10,
"previous": None,
"next": None,
"range": (0, None),
"api_page_size": 21, # default page_size is 20
},
),
(
{"page": 2, "page_size": 10},
404,
{},
),
(
{"page": 1, "page_size": 5},
200,
{
"count": 10,
"previous": None,
"next": {"page": 2, "page_size": 5},
"range": (0, 5),
},
),
(
{"page": 2, "page_size": 5},
200,
{
"count": 10,
"previous": {"page_size": 5},
"next": None,
"range": (5, None),
},
),
({"page": 3, "page_size": 5}, 404, {}),
),
)
def test_api_documents_search_pagination(
indexer_settings, pagination, status, expected
):
"""Documents should be ordered by descending "score" by default"""
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
assert get_document_indexer() is not None assert get_document_indexer() is not None
@@ -202,35 +268,159 @@ def test_api_documents_search_pagination(indexer_settings):
client = APIClient() client = APIClient()
client.force_login(user) client.force_login(user)
docs = factories.DocumentFactory.create_batch(10) docs = factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
docs_by_uuid = {str(doc.pk): doc for doc in docs}
api_results = [{"_id": id} for id in docs_by_uuid.keys()]
# reorder randomly to simulate score ordering
random.shuffle(api_results)
# Find response # Find response
# pylint: disable-next=assignment-from-none # pylint: disable-next=assignment-from-none
api_search = responses.add( api_search = responses.add(
responses.POST, responses.POST,
"http://find/api/v1.0/search", "http://find/api/v1.0/search",
json=[{"_id": str(doc.pk)} for doc in docs], json=api_results,
status=200, status=200,
) )
response = client.get( response = client.get(
"/api/v1.0/documents/search/", data={"q": "alpha", "page": 2, "page_size": 5} "/api/v1.0/documents/search/",
data={
"q": "alpha",
**pagination,
},
) )
assert response.status_code == 200 assert response.status_code == status
content = response.json()
results = content.pop("results")
assert len(results) == 5
# Check the query parameters. if response.status_code < 300:
assert api_search.call_count == 1 previous_url = (
assert api_search.calls[0].response.status_code == 200 build_search_url(q="alpha", **expected["previous"])
assert json_loads(api_search.calls[0].request.body) == { if expected["previous"]
"q": "alpha", else None
"visited": [], )
"services": ["docs"], next_url = (
"page_number": 2, build_search_url(q="alpha", **expected["next"])
"page_size": 5, if expected["next"]
"order_by": "updated_at", else None
"order_direction": "desc", )
} start, end = expected["range"]
content = response.json()
assert content["count"] == expected["count"]
assert content["previous"] == previous_url
assert content["next"] == next_url
results = content.pop("results")
# The find api results ordering by score is kept
assert [r["id"] for r in results] == [r["_id"] for r in api_results[start:end]]
# Check the query parameters.
assert api_search.call_count == 1
assert api_search.calls[0].response.status_code == 200
assert json_loads(api_search.calls[0].request.body) == {
"q": "alpha",
"visited": [],
"services": ["docs"],
"page_number": 1,
"page_size": 100,
"order_by": "updated_at",
"order_direction": "desc",
}
@responses.activate
@pytest.mark.parametrize(
"pagination, status, expected",
(
(
{"page": 1, "page_size": 10},
200,
{"count": 10, "previous": None, "next": None, "range": (0, None)},
),
(
{},
200,
{"count": 10, "previous": None, "next": None, "range": (0, None)},
),
(
{"page": 2, "page_size": 10},
404,
{},
),
(
{"page": 1, "page_size": 5},
200,
{
"count": 10,
"previous": None,
"next": {"page": 2, "page_size": 5},
"range": (0, 5),
},
),
(
{"page": 2, "page_size": 5},
200,
{
"count": 10,
"previous": {"page_size": 5},
"next": None,
"range": (5, None),
},
),
({"page": 3, "page_size": 5}, 404, {}),
),
)
def test_api_documents_search_pagination_endpoint_is_none(
indexer_settings, pagination, status, expected
):
"""Documents should be ordered by descending "-updated_at" by default"""
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
assert get_document_indexer() is None
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
response = client.get(
"/api/v1.0/documents/search/",
data={
"q": "alpha",
**pagination,
},
)
assert response.status_code == status
if response.status_code < 300:
previous_url = (
build_search_url(q="alpha", **expected["previous"])
if expected["previous"]
else None
)
next_url = (
build_search_url(q="alpha", **expected["next"])
if expected["next"]
else None
)
queryset = models.Document.objects.order_by("-updated_at")
start, end = expected["range"]
expected_results = [str(d.pk) for d in queryset[start:end]]
content = response.json()
assert content["count"] == expected["count"]
assert content["previous"] == previous_url
assert content["next"] == next_url
results = content.pop("results")
assert [r["id"] for r in results] == expected_results