✨(backend) keep ordering from fulltext search in results
Keep ordering by score from Find API on search/ results and fallback search still uses "-update_at" ordering as default Refactor pagination to work with a list instead of a queryset Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
b0e7a511cb
commit
de3dfbb0c7
@@ -1085,12 +1085,10 @@ class DocumentViewSet(
|
|||||||
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
|
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
|
||||||
)
|
)
|
||||||
|
|
||||||
def _simple_search_queryset(self, params):
|
def _search_simple(self, request, text):
|
||||||
"""
|
"""
|
||||||
Returns a queryset filtered by the content of the document title
|
Returns a queryset filtered by the content of the document title
|
||||||
"""
|
"""
|
||||||
text = params.validated_data["q"]
|
|
||||||
|
|
||||||
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
|
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
|
||||||
queryset = self.get_queryset()
|
queryset = self.get_queryset()
|
||||||
filterset = DocumentFilter({"title": text}, queryset=queryset)
|
filterset = DocumentFilter({"title": text}, queryset=queryset)
|
||||||
@@ -1098,25 +1096,47 @@ class DocumentViewSet(
|
|||||||
if not filterset.is_valid():
|
if not filterset.is_valid():
|
||||||
raise drf.exceptions.ValidationError(filterset.errors)
|
raise drf.exceptions.ValidationError(filterset.errors)
|
||||||
|
|
||||||
return filterset.filter_queryset(queryset)
|
queryset = filterset.filter_queryset(queryset)
|
||||||
|
|
||||||
def _fulltext_search_queryset(self, indexer, token, user, params):
|
return self.get_response_for_queryset(
|
||||||
|
queryset.order_by("-updated_at"),
|
||||||
|
context={
|
||||||
|
"request": request,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _search_fulltext(self, indexer, request, params):
|
||||||
"""
|
"""
|
||||||
Returns a queryset from the results the fulltext search of Find
|
Returns a queryset from the results the fulltext search of Find
|
||||||
"""
|
"""
|
||||||
|
access_token = request.session.get("oidc_access_token")
|
||||||
|
user = request.user
|
||||||
text = params.validated_data["q"]
|
text = params.validated_data["q"]
|
||||||
queryset = models.Document.objects.all()
|
queryset = models.Document.objects.all()
|
||||||
|
|
||||||
# Retrieve the documents ids from Find.
|
# Retrieve the documents ids from Find.
|
||||||
results = indexer.search(
|
results = indexer.search(
|
||||||
text=text,
|
text=text,
|
||||||
token=token,
|
token=access_token,
|
||||||
visited=get_visited_document_ids_of(queryset, user),
|
visited=get_visited_document_ids_of(queryset, user),
|
||||||
page=params.validated_data.get("page", 1),
|
page=1,
|
||||||
page_size=params.validated_data.get("page_size", 20),
|
page_size=100,
|
||||||
)
|
)
|
||||||
|
|
||||||
return queryset.filter(pk__in=results)
|
docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)}
|
||||||
|
ordered_docs = [docs_by_uuid[id] for id in results]
|
||||||
|
|
||||||
|
page = self.paginate_queryset(ordered_docs)
|
||||||
|
|
||||||
|
serializer = self.get_serializer(
|
||||||
|
page if page else ordered_docs,
|
||||||
|
many=True,
|
||||||
|
context={
|
||||||
|
"request": request,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.get_paginated_response(serializer.data)
|
||||||
|
|
||||||
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
|
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
|
||||||
@method_decorator(refresh_oidc_access_token)
|
@method_decorator(refresh_oidc_access_token)
|
||||||
@@ -1132,29 +1152,17 @@ class DocumentViewSet(
|
|||||||
|
|
||||||
The ordering is always by the most recent first.
|
The ordering is always by the most recent first.
|
||||||
"""
|
"""
|
||||||
access_token = request.session.get("oidc_access_token")
|
|
||||||
user = request.user
|
|
||||||
|
|
||||||
params = serializers.SearchDocumentSerializer(data=request.query_params)
|
params = serializers.SearchDocumentSerializer(data=request.query_params)
|
||||||
params.is_valid(raise_exception=True)
|
params.is_valid(raise_exception=True)
|
||||||
|
|
||||||
indexer = get_document_indexer()
|
indexer = get_document_indexer()
|
||||||
|
|
||||||
if indexer:
|
if indexer:
|
||||||
queryset = self._fulltext_search_queryset(
|
return self._search_fulltext(indexer, request, params=params)
|
||||||
indexer, token=access_token, user=user, params=params
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# The indexer is not configured, we fallback on a simple icontains filter by the
|
|
||||||
# model field 'title'.
|
|
||||||
queryset = self._simple_search_queryset(params)
|
|
||||||
|
|
||||||
return self.get_response_for_queryset(
|
# The indexer is not configured, we fallback on a simple icontains filter by the
|
||||||
queryset.order_by("-updated_at"),
|
# model field 'title'.
|
||||||
context={
|
return self._search_simple(request, text=params.validated_data["q"])
|
||||||
"request": request,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
|
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
|
||||||
def versions_list(self, request, *args, **kwargs):
|
def versions_list(self, request, *args, **kwargs):
|
||||||
|
|||||||
@@ -2,8 +2,11 @@
|
|||||||
Tests for Documents API endpoint in impress's core app: list
|
Tests for Documents API endpoint in impress's core app: list
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import random
|
||||||
from json import loads as json_loads
|
from json import loads as json_loads
|
||||||
|
|
||||||
|
from django.test import RequestFactory
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import responses
|
import responses
|
||||||
from faker import Faker
|
from faker import Faker
|
||||||
@@ -16,6 +19,15 @@ fake = Faker()
|
|||||||
pytestmark = pytest.mark.django_db
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def build_search_url(**kwargs):
|
||||||
|
"""Build absolute uri for search endpoint with ORDERED query arguments"""
|
||||||
|
return (
|
||||||
|
RequestFactory()
|
||||||
|
.get("/api/v1.0/documents/search/", dict(sorted(kwargs.items())))
|
||||||
|
.build_absolute_uri()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
|
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
|
||||||
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
|
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
|
||||||
@responses.activate
|
@responses.activate
|
||||||
@@ -191,8 +203,62 @@ def test_api_documents_search_format(indexer_settings):
|
|||||||
|
|
||||||
|
|
||||||
@responses.activate
|
@responses.activate
|
||||||
def test_api_documents_search_pagination(indexer_settings):
|
@pytest.mark.parametrize(
|
||||||
"""Documents should be ordered by descending "updated_at" by default"""
|
"pagination, status, expected",
|
||||||
|
(
|
||||||
|
(
|
||||||
|
{"page": 1, "page_size": 10},
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"count": 10,
|
||||||
|
"previous": None,
|
||||||
|
"next": None,
|
||||||
|
"range": (0, None),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{},
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"count": 10,
|
||||||
|
"previous": None,
|
||||||
|
"next": None,
|
||||||
|
"range": (0, None),
|
||||||
|
"api_page_size": 21, # default page_size is 20
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{"page": 2, "page_size": 10},
|
||||||
|
404,
|
||||||
|
{},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{"page": 1, "page_size": 5},
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"count": 10,
|
||||||
|
"previous": None,
|
||||||
|
"next": {"page": 2, "page_size": 5},
|
||||||
|
"range": (0, 5),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{"page": 2, "page_size": 5},
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"count": 10,
|
||||||
|
"previous": {"page_size": 5},
|
||||||
|
"next": None,
|
||||||
|
"range": (5, None),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
({"page": 3, "page_size": 5}, 404, {}),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_api_documents_search_pagination(
|
||||||
|
indexer_settings, pagination, status, expected
|
||||||
|
):
|
||||||
|
"""Documents should be ordered by descending "score" by default"""
|
||||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||||
|
|
||||||
assert get_document_indexer() is not None
|
assert get_document_indexer() is not None
|
||||||
@@ -202,35 +268,159 @@ def test_api_documents_search_pagination(indexer_settings):
|
|||||||
client = APIClient()
|
client = APIClient()
|
||||||
client.force_login(user)
|
client.force_login(user)
|
||||||
|
|
||||||
docs = factories.DocumentFactory.create_batch(10)
|
docs = factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
|
||||||
|
|
||||||
|
docs_by_uuid = {str(doc.pk): doc for doc in docs}
|
||||||
|
api_results = [{"_id": id} for id in docs_by_uuid.keys()]
|
||||||
|
|
||||||
|
# reorder randomly to simulate score ordering
|
||||||
|
random.shuffle(api_results)
|
||||||
|
|
||||||
# Find response
|
# Find response
|
||||||
# pylint: disable-next=assignment-from-none
|
# pylint: disable-next=assignment-from-none
|
||||||
api_search = responses.add(
|
api_search = responses.add(
|
||||||
responses.POST,
|
responses.POST,
|
||||||
"http://find/api/v1.0/search",
|
"http://find/api/v1.0/search",
|
||||||
json=[{"_id": str(doc.pk)} for doc in docs],
|
json=api_results,
|
||||||
status=200,
|
status=200,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.get(
|
response = client.get(
|
||||||
"/api/v1.0/documents/search/", data={"q": "alpha", "page": 2, "page_size": 5}
|
"/api/v1.0/documents/search/",
|
||||||
|
data={
|
||||||
|
"q": "alpha",
|
||||||
|
**pagination,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == status
|
||||||
content = response.json()
|
|
||||||
results = content.pop("results")
|
|
||||||
assert len(results) == 5
|
|
||||||
|
|
||||||
# Check the query parameters.
|
if response.status_code < 300:
|
||||||
assert api_search.call_count == 1
|
previous_url = (
|
||||||
assert api_search.calls[0].response.status_code == 200
|
build_search_url(q="alpha", **expected["previous"])
|
||||||
assert json_loads(api_search.calls[0].request.body) == {
|
if expected["previous"]
|
||||||
"q": "alpha",
|
else None
|
||||||
"visited": [],
|
)
|
||||||
"services": ["docs"],
|
next_url = (
|
||||||
"page_number": 2,
|
build_search_url(q="alpha", **expected["next"])
|
||||||
"page_size": 5,
|
if expected["next"]
|
||||||
"order_by": "updated_at",
|
else None
|
||||||
"order_direction": "desc",
|
)
|
||||||
}
|
start, end = expected["range"]
|
||||||
|
|
||||||
|
content = response.json()
|
||||||
|
|
||||||
|
assert content["count"] == expected["count"]
|
||||||
|
assert content["previous"] == previous_url
|
||||||
|
assert content["next"] == next_url
|
||||||
|
|
||||||
|
results = content.pop("results")
|
||||||
|
|
||||||
|
# The find api results ordering by score is kept
|
||||||
|
assert [r["id"] for r in results] == [r["_id"] for r in api_results[start:end]]
|
||||||
|
|
||||||
|
# Check the query parameters.
|
||||||
|
assert api_search.call_count == 1
|
||||||
|
assert api_search.calls[0].response.status_code == 200
|
||||||
|
assert json_loads(api_search.calls[0].request.body) == {
|
||||||
|
"q": "alpha",
|
||||||
|
"visited": [],
|
||||||
|
"services": ["docs"],
|
||||||
|
"page_number": 1,
|
||||||
|
"page_size": 100,
|
||||||
|
"order_by": "updated_at",
|
||||||
|
"order_direction": "desc",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@responses.activate
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"pagination, status, expected",
|
||||||
|
(
|
||||||
|
(
|
||||||
|
{"page": 1, "page_size": 10},
|
||||||
|
200,
|
||||||
|
{"count": 10, "previous": None, "next": None, "range": (0, None)},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{},
|
||||||
|
200,
|
||||||
|
{"count": 10, "previous": None, "next": None, "range": (0, None)},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{"page": 2, "page_size": 10},
|
||||||
|
404,
|
||||||
|
{},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{"page": 1, "page_size": 5},
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"count": 10,
|
||||||
|
"previous": None,
|
||||||
|
"next": {"page": 2, "page_size": 5},
|
||||||
|
"range": (0, 5),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{"page": 2, "page_size": 5},
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
"count": 10,
|
||||||
|
"previous": {"page_size": 5},
|
||||||
|
"next": None,
|
||||||
|
"range": (5, None),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
({"page": 3, "page_size": 5}, 404, {}),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_api_documents_search_pagination_endpoint_is_none(
|
||||||
|
indexer_settings, pagination, status, expected
|
||||||
|
):
|
||||||
|
"""Documents should be ordered by descending "-updated_at" by default"""
|
||||||
|
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
|
||||||
|
|
||||||
|
assert get_document_indexer() is None
|
||||||
|
|
||||||
|
user = factories.UserFactory()
|
||||||
|
|
||||||
|
client = APIClient()
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
|
||||||
|
|
||||||
|
response = client.get(
|
||||||
|
"/api/v1.0/documents/search/",
|
||||||
|
data={
|
||||||
|
"q": "alpha",
|
||||||
|
**pagination,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == status
|
||||||
|
|
||||||
|
if response.status_code < 300:
|
||||||
|
previous_url = (
|
||||||
|
build_search_url(q="alpha", **expected["previous"])
|
||||||
|
if expected["previous"]
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
next_url = (
|
||||||
|
build_search_url(q="alpha", **expected["next"])
|
||||||
|
if expected["next"]
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
queryset = models.Document.objects.order_by("-updated_at")
|
||||||
|
start, end = expected["range"]
|
||||||
|
expected_results = [str(d.pk) for d in queryset[start:end]]
|
||||||
|
|
||||||
|
content = response.json()
|
||||||
|
|
||||||
|
assert content["count"] == expected["count"]
|
||||||
|
assert content["previous"] == previous_url
|
||||||
|
assert content["next"] == next_url
|
||||||
|
|
||||||
|
results = content.pop("results")
|
||||||
|
|
||||||
|
assert [r["id"] for r in results] == expected_results
|
||||||
|
|||||||
Reference in New Issue
Block a user