✨(backend) keep ordering from fulltext search in results
Keep ordering by score from Find API on search/ results and fallback search still uses "-update_at" ordering as default Refactor pagination to work with a list instead of a queryset Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
b0e7a511cb
commit
de3dfbb0c7
@@ -1085,12 +1085,10 @@ class DocumentViewSet(
|
||||
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
|
||||
)
|
||||
|
||||
def _simple_search_queryset(self, params):
|
||||
def _search_simple(self, request, text):
|
||||
"""
|
||||
Returns a queryset filtered by the content of the document title
|
||||
"""
|
||||
text = params.validated_data["q"]
|
||||
|
||||
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
|
||||
queryset = self.get_queryset()
|
||||
filterset = DocumentFilter({"title": text}, queryset=queryset)
|
||||
@@ -1098,25 +1096,47 @@ class DocumentViewSet(
|
||||
if not filterset.is_valid():
|
||||
raise drf.exceptions.ValidationError(filterset.errors)
|
||||
|
||||
return filterset.filter_queryset(queryset)
|
||||
queryset = filterset.filter_queryset(queryset)
|
||||
|
||||
def _fulltext_search_queryset(self, indexer, token, user, params):
|
||||
return self.get_response_for_queryset(
|
||||
queryset.order_by("-updated_at"),
|
||||
context={
|
||||
"request": request,
|
||||
},
|
||||
)
|
||||
|
||||
def _search_fulltext(self, indexer, request, params):
|
||||
"""
|
||||
Returns a queryset from the results the fulltext search of Find
|
||||
"""
|
||||
access_token = request.session.get("oidc_access_token")
|
||||
user = request.user
|
||||
text = params.validated_data["q"]
|
||||
queryset = models.Document.objects.all()
|
||||
|
||||
# Retrieve the documents ids from Find.
|
||||
results = indexer.search(
|
||||
text=text,
|
||||
token=token,
|
||||
token=access_token,
|
||||
visited=get_visited_document_ids_of(queryset, user),
|
||||
page=params.validated_data.get("page", 1),
|
||||
page_size=params.validated_data.get("page_size", 20),
|
||||
page=1,
|
||||
page_size=100,
|
||||
)
|
||||
|
||||
return queryset.filter(pk__in=results)
|
||||
docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)}
|
||||
ordered_docs = [docs_by_uuid[id] for id in results]
|
||||
|
||||
page = self.paginate_queryset(ordered_docs)
|
||||
|
||||
serializer = self.get_serializer(
|
||||
page if page else ordered_docs,
|
||||
many=True,
|
||||
context={
|
||||
"request": request,
|
||||
},
|
||||
)
|
||||
|
||||
return self.get_paginated_response(serializer.data)
|
||||
|
||||
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
|
||||
@method_decorator(refresh_oidc_access_token)
|
||||
@@ -1132,29 +1152,17 @@ class DocumentViewSet(
|
||||
|
||||
The ordering is always by the most recent first.
|
||||
"""
|
||||
access_token = request.session.get("oidc_access_token")
|
||||
user = request.user
|
||||
|
||||
params = serializers.SearchDocumentSerializer(data=request.query_params)
|
||||
params.is_valid(raise_exception=True)
|
||||
|
||||
indexer = get_document_indexer()
|
||||
|
||||
if indexer:
|
||||
queryset = self._fulltext_search_queryset(
|
||||
indexer, token=access_token, user=user, params=params
|
||||
)
|
||||
else:
|
||||
# The indexer is not configured, we fallback on a simple icontains filter by the
|
||||
# model field 'title'.
|
||||
queryset = self._simple_search_queryset(params)
|
||||
return self._search_fulltext(indexer, request, params=params)
|
||||
|
||||
return self.get_response_for_queryset(
|
||||
queryset.order_by("-updated_at"),
|
||||
context={
|
||||
"request": request,
|
||||
},
|
||||
)
|
||||
# The indexer is not configured, we fallback on a simple icontains filter by the
|
||||
# model field 'title'.
|
||||
return self._search_simple(request, text=params.validated_data["q"])
|
||||
|
||||
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
|
||||
def versions_list(self, request, *args, **kwargs):
|
||||
|
||||
@@ -2,8 +2,11 @@
|
||||
Tests for Documents API endpoint in impress's core app: list
|
||||
"""
|
||||
|
||||
import random
|
||||
from json import loads as json_loads
|
||||
|
||||
from django.test import RequestFactory
|
||||
|
||||
import pytest
|
||||
import responses
|
||||
from faker import Faker
|
||||
@@ -16,6 +19,15 @@ fake = Faker()
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def build_search_url(**kwargs):
|
||||
"""Build absolute uri for search endpoint with ORDERED query arguments"""
|
||||
return (
|
||||
RequestFactory()
|
||||
.get("/api/v1.0/documents/search/", dict(sorted(kwargs.items())))
|
||||
.build_absolute_uri()
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
|
||||
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
|
||||
@responses.activate
|
||||
@@ -191,8 +203,62 @@ def test_api_documents_search_format(indexer_settings):
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_api_documents_search_pagination(indexer_settings):
|
||||
"""Documents should be ordered by descending "updated_at" by default"""
|
||||
@pytest.mark.parametrize(
|
||||
"pagination, status, expected",
|
||||
(
|
||||
(
|
||||
{"page": 1, "page_size": 10},
|
||||
200,
|
||||
{
|
||||
"count": 10,
|
||||
"previous": None,
|
||||
"next": None,
|
||||
"range": (0, None),
|
||||
},
|
||||
),
|
||||
(
|
||||
{},
|
||||
200,
|
||||
{
|
||||
"count": 10,
|
||||
"previous": None,
|
||||
"next": None,
|
||||
"range": (0, None),
|
||||
"api_page_size": 21, # default page_size is 20
|
||||
},
|
||||
),
|
||||
(
|
||||
{"page": 2, "page_size": 10},
|
||||
404,
|
||||
{},
|
||||
),
|
||||
(
|
||||
{"page": 1, "page_size": 5},
|
||||
200,
|
||||
{
|
||||
"count": 10,
|
||||
"previous": None,
|
||||
"next": {"page": 2, "page_size": 5},
|
||||
"range": (0, 5),
|
||||
},
|
||||
),
|
||||
(
|
||||
{"page": 2, "page_size": 5},
|
||||
200,
|
||||
{
|
||||
"count": 10,
|
||||
"previous": {"page_size": 5},
|
||||
"next": None,
|
||||
"range": (5, None),
|
||||
},
|
||||
),
|
||||
({"page": 3, "page_size": 5}, 404, {}),
|
||||
),
|
||||
)
|
||||
def test_api_documents_search_pagination(
|
||||
indexer_settings, pagination, status, expected
|
||||
):
|
||||
"""Documents should be ordered by descending "score" by default"""
|
||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||
|
||||
assert get_document_indexer() is not None
|
||||
@@ -202,35 +268,159 @@ def test_api_documents_search_pagination(indexer_settings):
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
docs = factories.DocumentFactory.create_batch(10)
|
||||
docs = factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
|
||||
|
||||
docs_by_uuid = {str(doc.pk): doc for doc in docs}
|
||||
api_results = [{"_id": id} for id in docs_by_uuid.keys()]
|
||||
|
||||
# reorder randomly to simulate score ordering
|
||||
random.shuffle(api_results)
|
||||
|
||||
# Find response
|
||||
# pylint: disable-next=assignment-from-none
|
||||
api_search = responses.add(
|
||||
responses.POST,
|
||||
"http://find/api/v1.0/search",
|
||||
json=[{"_id": str(doc.pk)} for doc in docs],
|
||||
json=api_results,
|
||||
status=200,
|
||||
)
|
||||
|
||||
response = client.get(
|
||||
"/api/v1.0/documents/search/", data={"q": "alpha", "page": 2, "page_size": 5}
|
||||
"/api/v1.0/documents/search/",
|
||||
data={
|
||||
"q": "alpha",
|
||||
**pagination,
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.json()
|
||||
results = content.pop("results")
|
||||
assert len(results) == 5
|
||||
assert response.status_code == status
|
||||
|
||||
# Check the query parameters.
|
||||
assert api_search.call_count == 1
|
||||
assert api_search.calls[0].response.status_code == 200
|
||||
assert json_loads(api_search.calls[0].request.body) == {
|
||||
"q": "alpha",
|
||||
"visited": [],
|
||||
"services": ["docs"],
|
||||
"page_number": 2,
|
||||
"page_size": 5,
|
||||
"order_by": "updated_at",
|
||||
"order_direction": "desc",
|
||||
}
|
||||
if response.status_code < 300:
|
||||
previous_url = (
|
||||
build_search_url(q="alpha", **expected["previous"])
|
||||
if expected["previous"]
|
||||
else None
|
||||
)
|
||||
next_url = (
|
||||
build_search_url(q="alpha", **expected["next"])
|
||||
if expected["next"]
|
||||
else None
|
||||
)
|
||||
start, end = expected["range"]
|
||||
|
||||
content = response.json()
|
||||
|
||||
assert content["count"] == expected["count"]
|
||||
assert content["previous"] == previous_url
|
||||
assert content["next"] == next_url
|
||||
|
||||
results = content.pop("results")
|
||||
|
||||
# The find api results ordering by score is kept
|
||||
assert [r["id"] for r in results] == [r["_id"] for r in api_results[start:end]]
|
||||
|
||||
# Check the query parameters.
|
||||
assert api_search.call_count == 1
|
||||
assert api_search.calls[0].response.status_code == 200
|
||||
assert json_loads(api_search.calls[0].request.body) == {
|
||||
"q": "alpha",
|
||||
"visited": [],
|
||||
"services": ["docs"],
|
||||
"page_number": 1,
|
||||
"page_size": 100,
|
||||
"order_by": "updated_at",
|
||||
"order_direction": "desc",
|
||||
}
|
||||
|
||||
|
||||
@responses.activate
|
||||
@pytest.mark.parametrize(
|
||||
"pagination, status, expected",
|
||||
(
|
||||
(
|
||||
{"page": 1, "page_size": 10},
|
||||
200,
|
||||
{"count": 10, "previous": None, "next": None, "range": (0, None)},
|
||||
),
|
||||
(
|
||||
{},
|
||||
200,
|
||||
{"count": 10, "previous": None, "next": None, "range": (0, None)},
|
||||
),
|
||||
(
|
||||
{"page": 2, "page_size": 10},
|
||||
404,
|
||||
{},
|
||||
),
|
||||
(
|
||||
{"page": 1, "page_size": 5},
|
||||
200,
|
||||
{
|
||||
"count": 10,
|
||||
"previous": None,
|
||||
"next": {"page": 2, "page_size": 5},
|
||||
"range": (0, 5),
|
||||
},
|
||||
),
|
||||
(
|
||||
{"page": 2, "page_size": 5},
|
||||
200,
|
||||
{
|
||||
"count": 10,
|
||||
"previous": {"page_size": 5},
|
||||
"next": None,
|
||||
"range": (5, None),
|
||||
},
|
||||
),
|
||||
({"page": 3, "page_size": 5}, 404, {}),
|
||||
),
|
||||
)
|
||||
def test_api_documents_search_pagination_endpoint_is_none(
|
||||
indexer_settings, pagination, status, expected
|
||||
):
|
||||
"""Documents should be ordered by descending "-updated_at" by default"""
|
||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
|
||||
|
||||
assert get_document_indexer() is None
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
|
||||
|
||||
response = client.get(
|
||||
"/api/v1.0/documents/search/",
|
||||
data={
|
||||
"q": "alpha",
|
||||
**pagination,
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == status
|
||||
|
||||
if response.status_code < 300:
|
||||
previous_url = (
|
||||
build_search_url(q="alpha", **expected["previous"])
|
||||
if expected["previous"]
|
||||
else None
|
||||
)
|
||||
next_url = (
|
||||
build_search_url(q="alpha", **expected["next"])
|
||||
if expected["next"]
|
||||
else None
|
||||
)
|
||||
queryset = models.Document.objects.order_by("-updated_at")
|
||||
start, end = expected["range"]
|
||||
expected_results = [str(d.pk) for d in queryset[start:end]]
|
||||
|
||||
content = response.json()
|
||||
|
||||
assert content["count"] == expected["count"]
|
||||
assert content["previous"] == previous_url
|
||||
assert content["next"] == next_url
|
||||
|
||||
results = content.pop("results")
|
||||
|
||||
assert [r["id"] for r in results] == expected_results
|
||||
|
||||
Reference in New Issue
Block a user