diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 932b5a81..f147f610 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -1019,3 +1019,7 @@ class FindDocumentSerializer(serializers.Serializer): """Serializer for Find search requests""" q = serializers.CharField(required=True, allow_blank=False, trim_whitespace=True) + page_size = serializers.IntegerField( + required=False, min_value=1, max_value=50, default=20 + ) + page = serializers.IntegerField(required=False, min_value=1, default=1) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 2a5fc50c..bd19d07f 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -13,7 +13,7 @@ from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg from django.contrib.postgres.search import TrigramSimilarity from django.core.cache import cache -from django.core.exceptions import ImproperlyConfigured, ValidationError +from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from django.core.validators import URLValidator from django.db import connection, transaction @@ -52,7 +52,10 @@ from core.services.converter_services import ( from core.services.converter_services import ( YdocConverter, ) -from core.services.search_indexers import get_document_indexer_class +from core.services.search_indexers import ( + default_document_indexer, + get_visited_document_ids_of, +) from core.tasks.mail import send_ask_for_access_mail from core.utils import extract_attachments, filter_descendants @@ -1090,23 +1093,42 @@ class DocumentViewSet( The filtering allows full text search through the opensearch indexation app "find". """ access_token = request.session.get("oidc_access_token") + user = request.user serializer = serializers.FindDocumentSerializer(data=request.query_params) serializer.is_valid(raise_exception=True) - try: - indexer = get_document_indexer_class()() - queryset = indexer.search( - text=serializer.validated_data.get("q", ""), - user=request.user, - token=access_token, + indexer = default_document_indexer() + + if not indexer: + queryset = self.get_queryset() + filterset = DocumentFilter( + {"title": serializer.validated_data.get("q", "")}, queryset=queryset ) - except ImproperlyConfigured: - return drf.response.Response( - {"detail": "The service is not properly configured."}, - status=status.HTTP_401_UNAUTHORIZED, + + if not filterset.is_valid(): + raise drf.exceptions.ValidationError(filterset.errors) + + queryset = filterset.filter_queryset(queryset).order_by("-updated_at") + + return self.get_response_for_queryset( + queryset, + context={ + "request": request, + }, ) + queryset = models.Document.objects.all() + results = indexer.search( + text=serializer.validated_data.get("q", ""), + token=access_token, + visited=get_visited_document_ids_of(queryset, user), + page=serializer.validated_data.get("page", 1), + page_size=serializer.validated_data.get("page_size", 20), + ) + + queryset = queryset.filter(pk__in=results) + return self.get_response_for_queryset( queryset, context={ diff --git a/src/backend/core/services/search_indexers.py b/src/backend/core/services/search_indexers.py index 48a437be..255aa3bb 100644 --- a/src/backend/core/services/search_indexers.py +++ b/src/backend/core/services/search_indexers.py @@ -8,6 +8,7 @@ from functools import cache from django.conf import settings from django.contrib.auth.models import AnonymousUser from django.core.exceptions import ImproperlyConfigured +from django.db.models import Subquery from django.utils.module_loading import import_string import requests @@ -18,7 +19,23 @@ logger = logging.getLogger(__name__) @cache -def get_document_indexer_class() -> "BaseDocumentIndexer": +def default_document_indexer(): + """Returns default indexer service is enabled and properly configured.""" + + # For this usecase an empty indexer class is not an issue but a feature. + if not getattr(settings, "SEARCH_INDEXER_CLASS", None): + logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)") + return None + + try: + return get_document_indexer_class()() + except ImproperlyConfigured as err: + logger.error("Document indexer is not properly configured : %s", err) + return None + + +@cache +def get_document_indexer_class(): """Return the indexer backend class based on the settings.""" classpath = settings.SEARCH_INDEXER_CLASS @@ -65,7 +82,7 @@ def get_batch_accesses_by_users_and_teams(paths): return dict(access_by_document_path) -def get_visited_document_ids_of(user): +def get_visited_document_ids_of(queryset, user): """ Returns the ids of the documents that have a linktrace to the user and NOT owned. It will be use to limit the opensearch responses to the public documents already @@ -74,11 +91,18 @@ def get_visited_document_ids_of(user): if isinstance(user, AnonymousUser): return [] - qs = models.LinkTrace.objects.filter(user=user).exclude( - document__accesses__user=user, + qs = models.LinkTrace.objects.filter(user=user) + + docs = ( + queryset.exclude(accesses__user=user) + .filter( + deleted_at__isnull=True, + ancestors_deleted_at__isnull=True, + ) + .filter(pk__in=Subquery(qs.values("document_id"))) ) - return list({str(id) for id in qs.values_list("document_id", flat=True)}) + return list({str(id) for id in docs.values_list("pk", flat=True)}) class BaseDocumentIndexer(ABC): @@ -159,22 +183,41 @@ class BaseDocumentIndexer(ABC): Must be implemented by subclasses. """ - def search(self, text, user, token): + # pylint: disable-next=too-many-arguments,too-many-positional-arguments + def search(self, text, token, visited=(), page=1, page_size=50): """ Search for documents in Find app. - """ - visited_ids = get_visited_document_ids_of(user) + Ensure the same default ordering as "Docs" list : -updated_at + Returns ids of the documents + + Args: + text (str): Text search content. + token (str): OIDC Authentication token. + visited (list, optional): + List of ids of active public documents with LinkTrace + Defaults to settings.SEARCH_INDEXER_BATCH_SIZE. + page (int, optional): + The page number to retrieve. + Defaults to 1 if not specified. + page_size (int, optional): + The number of results to return per page. + Defaults to 50 if not specified. + """ response = self.search_query( data={ "q": text, - "visited": visited_ids, + "visited": visited, "services": ["docs"], + "page_number": page, + "page_size": page_size, + "order_by": "updated_at", + "order_direction": "desc", }, token=token, ) - return self.format_response(response) + return [d["_id"] for d in response] @abstractmethod def search_query(self, data, token) -> dict: @@ -184,14 +227,6 @@ class BaseDocumentIndexer(ABC): Must be implemented by subclasses. """ - @abstractmethod - def format_response(self, data: dict): - """ - Convert the JSON response from Find app as document queryset. - - Must be implemented by subclasses. - """ - class FindDocumentIndexer(BaseDocumentIndexer): """ @@ -253,12 +288,6 @@ class FindDocumentIndexer(BaseDocumentIndexer): logger.error("HTTPError: %s", e) raise - def format_response(self, data: dict): - """ - Retrieve documents ids from Find app response and return a queryset. - """ - return models.Document.objects.filter(pk__in=[d["_id"] for d in data]) - def push(self, data): """ Push a batch of documents to the Find backend. @@ -266,7 +295,6 @@ class FindDocumentIndexer(BaseDocumentIndexer): Args: data (list): List of document dictionaries. """ - try: response = requests.post( self.indexer_url, diff --git a/src/backend/core/signals.py b/src/backend/core/signals.py index b6a12929..fbb9368a 100644 --- a/src/backend/core/signals.py +++ b/src/backend/core/signals.py @@ -2,10 +2,14 @@ Declare and configure the signals for the impress core application """ +from functools import partial + +from django.db import transaction from django.db.models import signals from django.dispatch import receiver from . import models +from .services.search_indexers import default_document_indexer from .tasks.find import trigger_document_indexer @@ -16,7 +20,8 @@ def document_post_save(sender, instance, **kwargs): # pylint: disable=unused-ar Note : Within the transaction we can have an empty content and a serialization error. """ - trigger_document_indexer(instance, on_commit=True) + if default_document_indexer() is not None: + transaction.on_commit(partial(trigger_document_indexer, instance)) @receiver(signals.post_save, sender=models.DocumentAccess) @@ -24,5 +29,5 @@ def document_access_post_save(sender, instance, created, **kwargs): # pylint: d """ Asynchronous call to the document indexer at the end of the transaction. """ - if not created: - trigger_document_indexer(instance.document, on_commit=True) + if not created and default_document_indexer() is not None: + transaction.on_commit(partial(trigger_document_indexer, instance.document)) diff --git a/src/backend/core/tasks/find.py b/src/backend/core/tasks/find.py index 10b15c35..f4d41495 100644 --- a/src/backend/core/tasks/find.py +++ b/src/backend/core/tasks/find.py @@ -1,11 +1,9 @@ """Trigger document indexation using celery task.""" -from functools import partial from logging import getLogger from django.conf import settings from django.core.cache import cache -from django.db import transaction from impress.celery_app import app @@ -37,7 +35,7 @@ def decr_counter(key): @app.task def document_indexer_task(document_id): - """Send indexation query for a document using celery task.""" + """Celery Task : Sends indexation query for a document.""" key = document_indexer_debounce_key(document_id) # check if the counter : if still up, skip the task. only the last one @@ -46,6 +44,7 @@ def document_indexer_task(document_id): logger.info("Skip document %s indexation", document_id) return + # Prevents some circular imports # pylint: disable=import-outside-toplevel from core import models # noqa: PLC0415 from core.services.search_indexers import ( # noqa: PLC0415 @@ -63,35 +62,27 @@ def document_indexer_task(document_id): indexer.push(data) -def trigger_document_indexer(document, on_commit=False): +def trigger_document_indexer(document): """ Trigger indexation task with debounce a delay set by the SEARCH_INDEXER_COUNTDOWN setting. Args: document (Document): The document instance. - on_commit (bool): Wait for the end of the transaction before starting the task - (some fields may be in wrong state within the transaction) """ - if document.deleted_at or document.ancestors_deleted_at: - pass + return - if on_commit: - transaction.on_commit( - partial(trigger_document_indexer, document, on_commit=False) - ) - else: - key = document_indexer_debounce_key(document.pk) - countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1) + key = document_indexer_debounce_key(document.pk) + countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1) - logger.info( - "Add task for document %s indexation in %.2f seconds", - document.pk, - countdown, - ) + logger.info( + "Add task for document %s indexation in %.2f seconds", + document.pk, + countdown, + ) - # Each time this method is called during the countdown, we increment the - # counter and each task decrease it, so the index be run only once. - incr_counter(key) + # Each time this method is called during the countdown, we increment the + # counter and each task decrease it, so the index be run only once. + incr_counter(key) - document_indexer_task.apply_async(args=[document.pk], countdown=countdown) + document_indexer_task.apply_async(args=[document.pk], countdown=countdown) diff --git a/src/backend/core/tests/commands/test_index.py b/src/backend/core/tests/commands/test_index.py index d30e0118..169e0b83 100644 --- a/src/backend/core/tests/commands/test_index.py +++ b/src/backend/core/tests/commands/test_index.py @@ -15,6 +15,7 @@ from core.services.search_indexers import FindDocumentIndexer @pytest.mark.django_db +@pytest.mark.usefixtures("indexer_settings") def test_index(): """Test the command `index` that run the Find app indexer for all the available documents.""" user = factories.UserFactory() diff --git a/src/backend/core/tests/conftest.py b/src/backend/core/tests/conftest.py index 00e830e1..2102879b 100644 --- a/src/backend/core/tests/conftest.py +++ b/src/backend/core/tests/conftest.py @@ -24,3 +24,32 @@ def mock_user_teams(): "core.models.User.teams", new_callable=mock.PropertyMock ) as mock_teams: yield mock_teams + + +@pytest.fixture(name="indexer_settings") +def indexer_settings_fixture(settings): + """ + Setup valid settings for the document indexer. Clear the indexer cache. + """ + + # pylint: disable-next=import-outside-toplevel + from core.services.search_indexers import ( # noqa: PLC0415 + default_document_indexer, + get_document_indexer_class, + ) + + default_document_indexer.cache_clear() + get_document_indexer_class.cache_clear() + + settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer" + settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest" + settings.SEARCH_INDEXER_URL = "http://localhost:8081/api/v1.0/documents/index/" + settings.SEARCH_INDEXER_QUERY_URL = ( + "http://localhost:8081/api/v1.0/documents/search/" + ) + + yield settings + + # clear cache to prevent issues with other tests + default_document_indexer.cache_clear() + get_document_indexer_class.cache_clear() diff --git a/src/backend/core/tests/documents/test_api_documents_search.py b/src/backend/core/tests/documents/test_api_documents_search.py index f5dcfa24..2c3b2d35 100644 --- a/src/backend/core/tests/documents/test_api_documents_search.py +++ b/src/backend/core/tests/documents/test_api_documents_search.py @@ -2,12 +2,15 @@ Tests for Documents API endpoint in impress's core app: list """ +from json import loads as json_loads + import pytest import responses from faker import Faker from rest_framework.test import APIClient from core import factories, models +from core.services.search_indexers import default_document_indexer fake = Faker() pytestmark = pytest.mark.django_db @@ -16,13 +19,12 @@ pytestmark = pytest.mark.django_db @pytest.mark.parametrize("role", models.LinkRoleChoices.values) @pytest.mark.parametrize("reach", models.LinkReachChoices.values) @responses.activate -def test_api_documents_search_anonymous(reach, role, settings): +def test_api_documents_search_anonymous(reach, role, indexer_settings): """ Anonymous users should not be allowed to search documents whatever the link reach and link role """ - factories.DocumentFactory(link_reach=reach, link_role=role) - settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" + indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" factories.DocumentFactory(link_reach=reach, link_role=role) @@ -45,46 +47,90 @@ def test_api_documents_search_anonymous(reach, role, settings): } -def test_api_documents_search_endpoint_is_none(settings): - """Missing SEARCH_INDEXER_QUERY_URL should throw an error""" - settings.SEARCH_INDEXER_QUERY_URL = None +def test_api_documents_search_endpoint_is_none(indexer_settings): + """ + Missing SEARCH_INDEXER_QUERY_URL, so the indexer is not properly configured. + Should fallback on title filter + """ + indexer_settings.SEARCH_INDEXER_QUERY_URL = None + + assert default_document_indexer() is None user = factories.UserFactory() + document = factories.DocumentFactory(title="alpha") + access = factories.UserDocumentAccessFactory(document=document, user=user) client = APIClient() client.force_login(user) - response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"}) + response = client.get("/api/v1.0/documents/search/", data={"q": "alpha"}) - assert response.status_code == 401 - assert response.json() == {"detail": "The service is not properly configured."} + assert response.status_code == 200 + content = response.json() + results = content.pop("results") + assert content == { + "count": 1, + "next": None, + "previous": None, + } + assert len(results) == 1 + assert results[0] == { + "id": str(document.id), + "abilities": document.get_abilities(user), + "ancestors_link_reach": None, + "ancestors_link_role": None, + "computed_link_reach": document.computed_link_reach, + "computed_link_role": document.computed_link_role, + "created_at": document.created_at.isoformat().replace("+00:00", "Z"), + "creator": str(document.creator.id), + "depth": 1, + "excerpt": document.excerpt, + "link_reach": document.link_reach, + "link_role": document.link_role, + "nb_accesses_ancestors": 1, + "nb_accesses_direct": 1, + "numchild": 0, + "path": document.path, + "title": document.title, + "updated_at": document.updated_at.isoformat().replace("+00:00", "Z"), + "user_role": access.role, + } @responses.activate -def test_api_documents_search_invalid_params(settings): +def test_api_documents_search_invalid_params(indexer_settings): """Validate the format of documents as returned by the search view.""" - settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" + indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" user = factories.UserFactory() client = APIClient() client.force_login(user) - response = APIClient().get("/api/v1.0/documents/search/") + response = client.get("/api/v1.0/documents/search/") assert response.status_code == 400 assert response.json() == {"q": ["This field is required."]} - response = APIClient().get("/api/v1.0/documents/search/", data={"q": " "}) + response = client.get("/api/v1.0/documents/search/", data={"q": " "}) assert response.status_code == 400 assert response.json() == {"q": ["This field may not be blank."]} + response = client.get( + "/api/v1.0/documents/search/", data={"q": "any", "page": "NaN"} + ) + + assert response.status_code == 400 + assert response.json() == {"page": ["A valid integer is required."]} + @responses.activate -def test_api_documents_search_format(settings): +def test_api_documents_search_format(indexer_settings): """Validate the format of documents as returned by the search view.""" - settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" + indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" + + assert default_document_indexer() is not None user = factories.UserFactory() @@ -140,3 +186,49 @@ def test_api_documents_search_format(settings): "updated_at": document.updated_at.isoformat().replace("+00:00", "Z"), "user_role": access.role, } + + +@responses.activate +def test_api_documents_search_pagination(indexer_settings): + """Documents should be ordered by descending "updated_at" by default""" + indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" + + assert default_document_indexer() is not None + + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + docs = factories.DocumentFactory.create_batch(10) + + # Find response + # pylint: disable-next=assignment-from-none + api_search = responses.add( + responses.POST, + "http://find/api/v1.0/search", + json=[{"_id": str(doc.pk)} for doc in docs], + status=200, + ) + + response = client.get( + "/api/v1.0/documents/search/", data={"q": "alpha", "page": 2, "page_size": 5} + ) + + assert response.status_code == 200 + content = response.json() + results = content.pop("results") + assert len(results) == 5 + + # Check the query parameters. + assert api_search.call_count == 1 + assert api_search.calls[0].response.status_code == 200 + assert json_loads(api_search.calls[0].request.body) == { + "q": "alpha", + "visited": [], + "services": ["docs"], + "page_number": 2, + "page_size": 5, + "order_by": "updated_at", + "order_direction": "desc", + } diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index 44165961..e3481068 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -1626,9 +1626,9 @@ def test_models_documents_compute_ancestors_links_paths_mapping_structure( @mock.patch.object(FindDocumentIndexer, "push") @pytest.mark.django_db(transaction=True) -def test_models_documents_post_save_indexer(mock_push, settings): +def test_models_documents_post_save_indexer(mock_push, indexer_settings): """Test indexation task on document creation""" - settings.SEARCH_INDEXER_COUNTDOWN = 0 + indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 user = factories.UserFactory() @@ -1666,10 +1666,127 @@ def test_models_documents_post_save_indexer(mock_push, settings): assert cache.get(document_indexer_debounce_key(doc3.pk)) == 0 +@mock.patch.object(FindDocumentIndexer, "push") @pytest.mark.django_db(transaction=True) -def test_models_documents_post_save_indexer_debounce(settings): +def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings): + """Skip indexation task on deleted or ancestor_deleted documents""" + indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 + + user = factories.UserFactory() + + with transaction.atomic(): + doc = factories.DocumentFactory() + doc_deleted = factories.DocumentFactory() + doc_ancestor_deleted = factories.DocumentFactory(parent=doc_deleted) + doc_deleted.soft_delete() + doc_ancestor_deleted.ancestors_deleted_at = doc_deleted.deleted_at + + factories.UserDocumentAccessFactory(document=doc, user=user) + factories.UserDocumentAccessFactory(document=doc_deleted, user=user) + factories.UserDocumentAccessFactory(document=doc_ancestor_deleted, user=user) + + doc_deleted.refresh_from_db() + doc_ancestor_deleted.refresh_from_db() + + assert doc_deleted.deleted_at is not None + assert doc_deleted.ancestors_deleted_at is not None + + assert doc_ancestor_deleted.deleted_at is None + assert doc_ancestor_deleted.ancestors_deleted_at is not None + + time.sleep(0.2) # waits for the end of the tasks + + accesses = { + str(doc.path): {"users": [user.sub]}, + str(doc_deleted.path): {"users": [user.sub]}, + str(doc_ancestor_deleted.path): {"users": [user.sub]}, + } + + data = [call.args[0] for call in mock_push.call_args_list] + + indexer = FindDocumentIndexer() + + # Only the not deleted document is indexed + assert data == [ + indexer.serialize_document(doc, accesses), + ] + + # The debounce counters should be reset + assert cache.get(document_indexer_debounce_key(doc.pk)) == 0 + + # These caches are not filled + assert cache.get(document_indexer_debounce_key(doc_deleted.pk)) is None + assert cache.get(document_indexer_debounce_key(doc_ancestor_deleted.pk)) is None + + +@mock.patch.object(FindDocumentIndexer, "push") +@pytest.mark.django_db(transaction=True) +def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings): + """Restart indexation task on restored documents""" + indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 + + user = factories.UserFactory() + + with transaction.atomic(): + doc = factories.DocumentFactory() + doc_deleted = factories.DocumentFactory() + doc_ancestor_deleted = factories.DocumentFactory(parent=doc_deleted) + doc_deleted.soft_delete() + doc_ancestor_deleted.ancestors_deleted_at = doc_deleted.deleted_at + + factories.UserDocumentAccessFactory(document=doc, user=user) + factories.UserDocumentAccessFactory(document=doc_deleted, user=user) + factories.UserDocumentAccessFactory(document=doc_ancestor_deleted, user=user) + + doc_deleted.refresh_from_db() + doc_ancestor_deleted.refresh_from_db() + + assert doc_deleted.deleted_at is not None + assert doc_deleted.ancestors_deleted_at is not None + + assert doc_ancestor_deleted.deleted_at is None + assert doc_ancestor_deleted.ancestors_deleted_at is not None + + time.sleep(0.2) # waits for the end of the tasks + + doc_deleted.restore() + + doc_deleted.refresh_from_db() + doc_ancestor_deleted.refresh_from_db() + + assert doc_deleted.deleted_at is None + assert doc_deleted.ancestors_deleted_at is None + + assert doc_ancestor_deleted.deleted_at is None + assert doc_ancestor_deleted.ancestors_deleted_at is None + + time.sleep(0.2) + + accesses = { + str(doc.path): {"users": [user.sub]}, + str(doc_deleted.path): {"users": [user.sub]}, + str(doc_ancestor_deleted.path): {"users": [user.sub]}, + } + + data = [call.args[0] for call in mock_push.call_args_list] + + indexer = FindDocumentIndexer() + + # All docs are re-indexed + assert sorted(data, key=itemgetter("id")) == sorted( + [ + indexer.serialize_document(doc, accesses), + indexer.serialize_document(doc_deleted, accesses), + # The restored document child is not saved so no indexation. + ], + key=itemgetter("id"), + ) + + +@pytest.mark.django_db(transaction=True) +def test_models_documents_post_save_indexer_debounce(indexer_settings): """Test indexation task skipping on document update""" - settings.SEARCH_INDEXER_COUNTDOWN = 0 + indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 indexer = FindDocumentIndexer() user = factories.UserFactory() @@ -1714,9 +1831,9 @@ def test_models_documents_post_save_indexer_debounce(settings): @pytest.mark.django_db(transaction=True) -def test_models_documents_access_post_save_indexer(settings): +def test_models_documents_access_post_save_indexer(indexer_settings): """Test indexation task on DocumentAccess update""" - settings.SEARCH_INDEXER_COUNTDOWN = 0 + indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 indexer = FindDocumentIndexer() user = factories.UserFactory() diff --git a/src/backend/core/tests/test_services_search_indexers.py b/src/backend/core/tests/test_services_search_indexers.py index e0f43fca..63c2d305 100644 --- a/src/backend/core/tests/test_services_search_indexers.py +++ b/src/backend/core/tests/test_services_search_indexers.py @@ -1,6 +1,7 @@ """Tests for Documents search indexers""" from functools import partial +from json import dumps as json_dumps from unittest.mock import patch from django.contrib.auth.models import AnonymousUser @@ -8,11 +9,14 @@ from django.core.exceptions import ImproperlyConfigured from django.utils.module_loading import import_string import pytest +import responses +from requests import HTTPError from core import factories, models, utils from core.services.search_indexers import ( BaseDocumentIndexer, FindDocumentIndexer, + default_document_indexer, get_document_indexer_class, get_visited_document_ids_of, ) @@ -32,39 +36,19 @@ class FakeDocumentIndexer(BaseDocumentIndexer): def search_query(self, data, token): return {} - def format_response(self, data: dict): - return {} - -@pytest.fixture(name="fake_indexer_settings") -def fake_indexer_settings_fixture(settings): - """Fixture to switch the indexer to the FakeDocumentIndexer.""" - _original_backend = str(settings.SEARCH_INDEXER_CLASS) - - settings.SEARCH_INDEXER_CLASS = ( - "core.tests.test_services_search_indexers.FakeDocumentIndexer" - ) - get_document_indexer_class.cache_clear() - - yield settings - - settings.SEARCH_INDEXER_CLASS = _original_backend - # clear cache to prevent issues with other tests - get_document_indexer_class.cache_clear() - - -def test_services_search_indexer_class_is_empty(fake_indexer_settings): +def test_services_search_indexer_class_is_empty(indexer_settings): """ Should raise ImproperlyConfigured if SEARCH_INDEXER_CLASS is None or empty. """ - fake_indexer_settings.SEARCH_INDEXER_CLASS = None + indexer_settings.SEARCH_INDEXER_CLASS = None with pytest.raises(ImproperlyConfigured) as exc_info: get_document_indexer_class() assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value) - fake_indexer_settings.SEARCH_INDEXER_CLASS = "" + indexer_settings.SEARCH_INDEXER_CLASS = "" # clear cache again get_document_indexer_class.cache_clear() @@ -75,11 +59,11 @@ def test_services_search_indexer_class_is_empty(fake_indexer_settings): assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value) -def test_services_search_indexer_class_invalid(fake_indexer_settings): +def test_services_search_indexer_class_invalid(indexer_settings): """ Should raise RuntimeError if SEARCH_INDEXER_CLASS cannot be imported. """ - fake_indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown" + indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown" with pytest.raises(ImproperlyConfigured) as exc_info: get_document_indexer_class() @@ -90,11 +74,11 @@ def test_services_search_indexer_class_invalid(fake_indexer_settings): ) -def test_services_search_indexer_class(fake_indexer_settings): +def test_services_search_indexer_class(indexer_settings): """ Import indexer class defined in setting SEARCH_INDEXER_CLASS. """ - fake_indexer_settings.SEARCH_INDEXER_CLASS = ( + indexer_settings.SEARCH_INDEXER_CLASS = ( "core.tests.test_services_search_indexers.FakeDocumentIndexer" ) @@ -103,11 +87,43 @@ def test_services_search_indexer_class(fake_indexer_settings): ) -def test_services_search_indexer_url_is_none(settings): +def test_services_search_indexer_is_configured(indexer_settings): + """ + Should return true only when the indexer class and other configuration settings + are valid. + """ + indexer_settings.SEARCH_INDEXER_CLASS = None + + # None + default_document_indexer.cache_clear() + assert not default_document_indexer() + + # Empty + indexer_settings.SEARCH_INDEXER_CLASS = "" + + default_document_indexer.cache_clear() + assert not default_document_indexer() + + # Valid class + indexer_settings.SEARCH_INDEXER_CLASS = ( + "core.services.search_indexers.FindDocumentIndexer" + ) + + default_document_indexer.cache_clear() + assert default_document_indexer() is not None + + indexer_settings.SEARCH_INDEXER_URL = "" + + # Invalid url + default_document_indexer.cache_clear() + assert not default_document_indexer() + + +def test_services_search_indexer_url_is_none(indexer_settings): """ Indexer should raise RuntimeError if SEARCH_INDEXER_URL is None or empty. """ - settings.SEARCH_INDEXER_URL = None + indexer_settings.SEARCH_INDEXER_URL = None with pytest.raises(ImproperlyConfigured) as exc_info: FindDocumentIndexer() @@ -115,11 +131,11 @@ def test_services_search_indexer_url_is_none(settings): assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value) -def test_services_search_indexer_url_is_empty(settings): +def test_services_search_indexer_url_is_empty(indexer_settings): """ Indexer should raise RuntimeError if SEARCH_INDEXER_URL is empty string. """ - settings.SEARCH_INDEXER_URL = "" + indexer_settings.SEARCH_INDEXER_URL = "" with pytest.raises(ImproperlyConfigured) as exc_info: FindDocumentIndexer() @@ -127,11 +143,11 @@ def test_services_search_indexer_url_is_empty(settings): assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value) -def test_services_search_indexer_secret_is_none(settings): +def test_services_search_indexer_secret_is_none(indexer_settings): """ - Indexer should raise RuntimeError if SEARCH_INDEXER_SECRET is None or empty. + Indexer should raise RuntimeError if SEARCH_INDEXER_SECRET is None. """ - settings.SEARCH_INDEXER_SECRET = None + indexer_settings.SEARCH_INDEXER_SECRET = None with pytest.raises(ImproperlyConfigured) as exc_info: FindDocumentIndexer() @@ -141,11 +157,11 @@ def test_services_search_indexer_secret_is_none(settings): ) -def test_services_search_indexer_secret_is_empty(settings): +def test_services_search_indexer_secret_is_empty(indexer_settings): """ Indexer should raise RuntimeError if SEARCH_INDEXER_SECRET is empty string. """ - settings.SEARCH_INDEXER_SECRET = "" + indexer_settings.SEARCH_INDEXER_SECRET = "" with pytest.raises(ImproperlyConfigured) as exc_info: FindDocumentIndexer() @@ -155,6 +171,35 @@ def test_services_search_indexer_secret_is_empty(settings): ) +def test_services_search_endpoint_is_none(indexer_settings): + """ + Indexer should raise RuntimeError if SEARCH_INDEXER_QUERY_URL is None. + """ + indexer_settings.SEARCH_INDEXER_QUERY_URL = None + + with pytest.raises(ImproperlyConfigured) as exc_info: + FindDocumentIndexer() + + assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( + exc_info.value + ) + + +def test_services_search_endpoint_is_empty(indexer_settings): + """ + Indexer should raise RuntimeError if SEARCH_INDEXER_QUERY_URL is empty. + """ + indexer_settings.SEARCH_INDEXER_QUERY_URL = "" + + with pytest.raises(ImproperlyConfigured) as exc_info: + FindDocumentIndexer() + + assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( + exc_info.value + ) + + +@pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_serialize_document_returns_expected_json(): """ It should serialize documents with correct metadata and access control. @@ -195,6 +240,7 @@ def test_services_search_indexers_serialize_document_returns_expected_json(): } +@pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_serialize_document_deleted(): """Deleted documents are marked as just in the serialized json.""" parent = factories.DocumentFactory() @@ -209,6 +255,7 @@ def test_services_search_indexers_serialize_document_deleted(): assert result["is_active"] is False +@pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_serialize_document_empty(): """Empty documents returns empty content in the serialized json.""" document = factories.DocumentFactory(content="", title=None) @@ -220,13 +267,35 @@ def test_services_search_indexers_serialize_document_empty(): assert result["title"] == "" +@responses.activate +def test_services_search_indexers_index_errors(indexer_settings): + """ + Documents indexing response handling on Find API HTTP errors. + """ + factories.DocumentFactory() + + indexer_settings.SEARCH_INDEXER_URL = "http://app-find/api/v1.0/documents/index/" + + responses.add( + responses.POST, + "http://app-find/api/v1.0/documents/index/", + status=401, + body=json_dumps({"message": "Authentication failed."}), + ) + + with pytest.raises(HTTPError): + FindDocumentIndexer().index() + + @patch.object(FindDocumentIndexer, "push") -def test_services_search_indexers_batches_pass_only_batch_accesses(mock_push, settings): +def test_services_search_indexers_batches_pass_only_batch_accesses( + mock_push, indexer_settings +): """ Documents indexing should be processed in batches, and only the access data relevant to each batch should be used. """ - settings.SEARCH_INDEXER_BATCH_SIZE = 2 + indexer_settings.SEARCH_INDEXER_BATCH_SIZE = 2 documents = factories.DocumentFactory.create_batch(5) # Attach a single user access to each document @@ -259,6 +328,7 @@ def test_services_search_indexers_batches_pass_only_batch_accesses(mock_push, se @patch.object(FindDocumentIndexer, "push") +@pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ancestors_link_reach(mock_push): """Document accesses and reach should take into account ancestors link reaches.""" great_grand_parent = factories.DocumentFactory(link_reach="restricted") @@ -279,6 +349,7 @@ def test_services_search_indexers_ancestors_link_reach(mock_push): @patch.object(FindDocumentIndexer, "push") +@pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ancestors_users(mock_push): """Document accesses and reach should include users from ancestors.""" user_gp, user_p, user_d = factories.UserFactory.create_batch(3) @@ -301,6 +372,7 @@ def test_services_search_indexers_ancestors_users(mock_push): @patch.object(FindDocumentIndexer, "push") +@pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ancestors_teams(mock_push): """Document accesses and reach should include teams from ancestors.""" grand_parent = factories.DocumentFactory(teams=["team_gp"]) @@ -317,12 +389,12 @@ def test_services_search_indexers_ancestors_teams(mock_push): @patch("requests.post") -def test_push_uses_correct_url_and_data(mock_post, settings): +def test_push_uses_correct_url_and_data(mock_post, indexer_settings): """ push() should call requests.post with the correct URL from settings the timeout set to 10 seconds and the data as JSON. """ - settings.SEARCH_INDEXER_URL = "http://example.com/index" + indexer_settings.SEARCH_INDEXER_URL = "http://example.com/index" indexer = FindDocumentIndexer() sample_data = [{"id": "123", "title": "Test"}] @@ -335,7 +407,7 @@ def test_push_uses_correct_url_and_data(mock_post, settings): mock_post.assert_called_once() args, kwargs = mock_post.call_args - assert args[0] == settings.SEARCH_INDEXER_URL + assert args[0] == indexer_settings.SEARCH_INDEXER_URL assert kwargs.get("json") == sample_data assert kwargs.get("timeout") == 10 @@ -348,9 +420,10 @@ def test_get_visited_document_ids_of(): user = factories.UserFactory() other = factories.UserFactory() anonymous = AnonymousUser() + queryset = models.Document.objects.all() - assert not get_visited_document_ids_of(anonymous) - assert not get_visited_document_ids_of(user) + assert not get_visited_document_ids_of(queryset, anonymous) + assert not get_visited_document_ids_of(queryset, user) doc1, doc2, _ = factories.DocumentFactory.create_batch(3) @@ -360,7 +433,7 @@ def test_get_visited_document_ids_of(): create_link(document=doc2) # The third document is not visited - assert sorted(get_visited_document_ids_of(user)) == sorted( + assert sorted(get_visited_document_ids_of(queryset, user)) == sorted( [str(doc1.pk), str(doc2.pk)] ) @@ -368,11 +441,67 @@ def test_get_visited_document_ids_of(): factories.UserDocumentAccessFactory(user=user, document=doc2) # The second document have an access for the user - assert get_visited_document_ids_of(user) == [str(doc1.pk)] + assert get_visited_document_ids_of(queryset, user) == [str(doc1.pk)] + + +@pytest.mark.usefixtures("indexer_settings") +def test_get_visited_document_ids_of_deleted(): + """ + get_visited_document_ids_of() returns the ids of the documents viewed + by the user if they are not deleted. + """ + user = factories.UserFactory() + anonymous = AnonymousUser() + queryset = models.Document.objects.all() + + assert not get_visited_document_ids_of(queryset, anonymous) + assert not get_visited_document_ids_of(queryset, user) + + doc = factories.DocumentFactory() + doc_deleted = factories.DocumentFactory() + doc_ancestor_deleted = factories.DocumentFactory(parent=doc_deleted) + + create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False) + + create_link(document=doc) + create_link(document=doc_deleted) + create_link(document=doc_ancestor_deleted) + + # The all documents are visited + assert sorted(get_visited_document_ids_of(queryset, user)) == sorted( + [str(doc.pk), str(doc_deleted.pk), str(doc_ancestor_deleted.pk)] + ) + + doc_deleted.soft_delete() + + # Only the first document is not deleted + assert get_visited_document_ids_of(queryset, user) == [str(doc.pk)] + + +@responses.activate +def test_services_search_indexers_search_errors(indexer_settings): + """ + Documents indexing response handling on Find API HTTP errors. + """ + factories.DocumentFactory() + + indexer_settings.SEARCH_INDEXER_QUERY_URL = ( + "http://app-find/api/v1.0/documents/search/" + ) + + responses.add( + responses.POST, + "http://app-find/api/v1.0/documents/search/", + status=401, + body=json_dumps({"message": "Authentication failed."}), + ) + + with pytest.raises(HTTPError): + FindDocumentIndexer().search("alpha", token="mytoken") @patch("requests.post") -def test_services_search_indexers_search(mock_post, settings): +def test_services_search_indexers_search(mock_post, indexer_settings): """ search() should call requests.post to SEARCH_INDEXER_QUERY_URL with the document ids from linktraces. @@ -390,30 +519,22 @@ def test_services_search_indexers_search(mock_post, settings): create_link(document=doc1) create_link(document=doc2) - indexer.search("alpha", user=user, token="mytoken") + visited = get_visited_document_ids_of(models.Document.objects.all(), user) + + indexer.search("alpha", visited=visited, token="mytoken") args, kwargs = mock_post.call_args - assert args[0] == settings.SEARCH_INDEXER_QUERY_URL + assert args[0] == indexer_settings.SEARCH_INDEXER_QUERY_URL query_data = kwargs.get("json") assert query_data["q"] == "alpha" assert sorted(query_data["visited"]) == sorted([str(doc1.pk), str(doc2.pk)]) assert query_data["services"] == ["docs"] + assert query_data["page_number"] == 1 + assert query_data["page_size"] == 50 + assert query_data["order_by"] == "updated_at" + assert query_data["order_direction"] == "desc" assert kwargs.get("headers") == {"Authorization": "Bearer mytoken"} assert kwargs.get("timeout") == 10 - - -def test_search_query_raises_error_if_search_endpoint_is_none(settings): - """ - Indexer should raise RuntimeError if SEARCH_INDEXER_QUERY_URL is None or empty. - """ - settings.SEARCH_INDEXER_QUERY_URL = None - - with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() - - assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( - exc_info.value - ) diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index d7b41cd5..05b03ba5 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -101,7 +101,7 @@ class Base(Configuration): # Search SEARCH_INDEXER_CLASS = values.Value( - default="core.services.search_indexers.FindDocumentIndexer", + default=None, environ_name="SEARCH_INDEXER_CLASS", environ_prefix=None, ) @@ -982,11 +982,6 @@ class Test(Base): # Tests are raising warnings because the /data/static directory does not exist STATIC_ROOT = None - # Setup indexer configuration to make test working on the CI. - SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest" # noqa - SEARCH_INDEXER_URL = "http://localhost:8081/api/v1.0/documents/index/" - SEARCH_INDEXER_QUERY_URL = "http://localhost:8081/api/v1.0/documents/search/" - CELERY_TASK_ALWAYS_EAGER = values.BooleanValue(True) def __init__(self):