diff --git a/env.d/development/common b/env.d/development/common index 7ad7589f..a6bfb65b 100644 --- a/env.d/development/common +++ b/env.d/development/common @@ -79,7 +79,7 @@ Y_PROVIDER_API_KEY=yprovider-api-key THEME_CUSTOMIZATION_CACHE_TIMEOUT=15 # Indexer -SEARCH_INDEXER_CLASS="core.services.search_indexers.FindDocumentIndexer" +SEARCH_INDEXER_CLASS="core.services.search_indexers.SearchIndexer" SEARCH_INDEXER_SECRET=find-api-key-for-docs-with-exactly-50-chars-length # Key generated by create_demo in Find app. SEARCH_INDEXER_URL="http://find:8000/api/v1.0/documents/index/" SEARCH_INDEXER_QUERY_URL="http://find:8000/api/v1.0/documents/search/" diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index f147f610..6c224513 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -1015,8 +1015,8 @@ class ThreadSerializer(serializers.ModelSerializer): return {} -class FindDocumentSerializer(serializers.Serializer): - """Serializer for Find search requests""" +class SearchDocumentSerializer(serializers.Serializer): + """Serializer for fulltext search requests through Find application""" q = serializers.CharField(required=True, allow_blank=False, trim_whitespace=True) page_size = serializers.IntegerField( diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 300d346f..b5965ca2 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1085,16 +1085,49 @@ class DocumentViewSet( {"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED ) + def _simple_search_queryset(self, params): + """ + Returns a queryset filtered by the content of the document title + """ + text = params.validated_data["q"] + + # As the 'list' view we get a prefiltered queryset (deleted docs are excluded) + queryset = self.get_queryset() + filterset = DocumentFilter({"title": text}, queryset=queryset) + + if not filterset.is_valid(): + raise drf.exceptions.ValidationError(filterset.errors) + + return filterset.filter_queryset(queryset) + + def _fulltext_search_queryset(self, indexer, token, user, params): + """ + Returns a queryset from the results the fulltext search of Find + """ + text = params.validated_data["q"] + queryset = models.Document.objects.all() + + # Retrieve the documents ids from Find. + results = indexer.search( + text=text, + token=token, + visited=get_visited_document_ids_of(queryset, user), + page=params.validated_data.get("page", 1), + page_size=params.validated_data.get("page_size", 20), + ) + + return queryset.filter(pk__in=results) + @drf.decorators.action(detail=False, methods=["get"], url_path="search") @method_decorator(refresh_oidc_access_token) def search(self, request, *args, **kwargs): """ Returns a DRF response containing the filtered, annotated and ordered document list. - Applies filtering based on request parameter 'q' from `FindDocumentSerializer`. + Applies filtering based on request parameter 'q' from `SearchDocumentSerializer`. Depending of the configuration it can be: - A fulltext search through the opensearch indexation app "find" if the backend is - enabled (see SEARCH_BACKEND_CLASS) + enabled (see SEARCH_INDEXER_CLASS) - A filtering by the model field 'title'. The ordering is always by the most recent first. @@ -1102,46 +1135,22 @@ class DocumentViewSet( access_token = request.session.get("oidc_access_token") user = request.user - serializer = serializers.FindDocumentSerializer(data=request.query_params) - serializer.is_valid(raise_exception=True) + params = serializers.SearchDocumentSerializer(data=request.query_params) + params.is_valid(raise_exception=True) indexer = get_document_indexer() - text = serializer.validated_data["q"] - # The indexer is not configured, so we fallback on a simple filter on the - # model field 'title'. - if not indexer: - # As the 'list' view we get a prefiltered queryset (deleted docs are excluded) - queryset = self.get_queryset() - filterset = DocumentFilter({"title": text}, queryset=queryset) - - if not filterset.is_valid(): - raise drf.exceptions.ValidationError(filterset.errors) - - queryset = filterset.filter_queryset(queryset).order_by("-updated_at") - - return self.get_response_for_queryset( - queryset, - context={ - "request": request, - }, + if indexer: + queryset = self._fulltext_search_queryset( + indexer, token=access_token, user=user, params=params ) - - queryset = models.Document.objects.all() - - # Retrieve the documents ids from Find. - results = indexer.search( - text=text, - token=access_token, - visited=get_visited_document_ids_of(queryset, user), - page=serializer.validated_data.get("page", 1), - page_size=serializer.validated_data.get("page_size", 20), - ) - - queryset = queryset.filter(pk__in=results).order_by("-updated_at") + else: + # The indexer is not configured, we fallback on a simple icontains filter by the + # model field 'title'. + queryset = self._simple_search_queryset(params) return self.get_response_for_queryset( - queryset, + queryset.order_by("-updated_at"), context={ "request": request, }, diff --git a/src/backend/core/services/search_indexers.py b/src/backend/core/services/search_indexers.py index c5c26e4b..abaa4d45 100644 --- a/src/backend/core/services/search_indexers.py +++ b/src/backend/core/services/search_indexers.py @@ -223,7 +223,7 @@ class BaseDocumentIndexer(ABC): """ -class FindDocumentIndexer(BaseDocumentIndexer): +class SearchIndexer(BaseDocumentIndexer): """ Document indexer that pushes documents to La Suite Find app. """ @@ -270,18 +270,14 @@ class FindDocumentIndexer(BaseDocumentIndexer): Returns: dict: A JSON-serializable dictionary. """ - try: - response = requests.post( - self.search_url, - json=data, - headers={"Authorization": f"Bearer {token}"}, - timeout=10, - ) - response.raise_for_status() - return response.json() - except requests.exceptions.HTTPError as e: - logger.error("HTTPError: %s", e) - raise + response = requests.post( + self.search_url, + json=data, + headers={"Authorization": f"Bearer {token}"}, + timeout=10, + ) + response.raise_for_status() + return response.json() def push(self, data): """ @@ -290,14 +286,10 @@ class FindDocumentIndexer(BaseDocumentIndexer): Args: data (list): List of document dictionaries. """ - try: - response = requests.post( - self.indexer_url, - json=data, - headers={"Authorization": f"Bearer {self.indexer_secret}"}, - timeout=10, - ) - response.raise_for_status() - except requests.exceptions.HTTPError as e: - logger.error("HTTPError: %s", e) - raise + response = requests.post( + self.indexer_url, + json=data, + headers={"Authorization": f"Bearer {self.indexer_secret}"}, + timeout=10, + ) + response.raise_for_status() diff --git a/src/backend/core/signals.py b/src/backend/core/signals.py index 112a42f5..1aa95f44 100644 --- a/src/backend/core/signals.py +++ b/src/backend/core/signals.py @@ -9,7 +9,7 @@ from django.db.models import signals from django.dispatch import receiver from . import models -from .tasks.find import trigger_document_indexer +from .tasks.search import trigger_document_indexer @receiver(signals.post_save, sender=models.Document) diff --git a/src/backend/core/tasks/find.py b/src/backend/core/tasks/search.py similarity index 100% rename from src/backend/core/tasks/find.py rename to src/backend/core/tasks/search.py diff --git a/src/backend/core/tests/commands/test_index.py b/src/backend/core/tests/commands/test_index.py index 78d30249..ad7d39e6 100644 --- a/src/backend/core/tests/commands/test_index.py +++ b/src/backend/core/tests/commands/test_index.py @@ -11,7 +11,7 @@ from django.db import transaction import pytest from core import factories -from core.services.search_indexers import FindDocumentIndexer +from core.services.search_indexers import SearchIndexer @pytest.mark.django_db @@ -19,7 +19,7 @@ from core.services.search_indexers import FindDocumentIndexer def test_index(): """Test the command `index` that run the Find app indexer for all the available documents.""" user = factories.UserFactory() - indexer = FindDocumentIndexer() + indexer = SearchIndexer() with transaction.atomic(): doc = factories.DocumentFactory() @@ -36,7 +36,7 @@ def test_index(): str(no_title_doc.path): {"users": [user.sub]}, } - with mock.patch.object(FindDocumentIndexer, "push") as mock_push: + with mock.patch.object(SearchIndexer, "push") as mock_push: call_command("index") push_call_args = [call.args[0] for call in mock_push.call_args_list] diff --git a/src/backend/core/tests/conftest.py b/src/backend/core/tests/conftest.py index 7c5a59b8..ba607b42 100644 --- a/src/backend/core/tests/conftest.py +++ b/src/backend/core/tests/conftest.py @@ -39,7 +39,7 @@ def indexer_settings_fixture(settings): get_document_indexer.cache_clear() - settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer" + settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.SearchIndexer" settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest" settings.SEARCH_INDEXER_URL = "http://localhost:8081/api/v1.0/documents/index/" settings.SEARCH_INDEXER_QUERY_URL = ( diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index 91f41707..9a97f51c 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -21,7 +21,7 @@ from django.utils import timezone import pytest from core import factories, models -from core.services.search_indexers import FindDocumentIndexer +from core.services.search_indexers import SearchIndexer pytestmark = pytest.mark.django_db @@ -1622,7 +1622,7 @@ def test_models_documents_compute_ancestors_links_paths_mapping_structure( } -@mock.patch.object(FindDocumentIndexer, "push") +@mock.patch.object(SearchIndexer, "push") @pytest.mark.django_db(transaction=True) def test_models_documents_post_save_indexer(mock_push, indexer_settings): """Test indexation task on document creation""" @@ -1634,7 +1634,7 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings): accesses = {} data = [call.args[0] for call in mock_push.call_args_list] - indexer = FindDocumentIndexer() + indexer = SearchIndexer() assert sorted(data, key=itemgetter("id")) == sorted( [ @@ -1651,7 +1651,7 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings): assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0 -@mock.patch.object(FindDocumentIndexer, "push") +@mock.patch.object(SearchIndexer, "push") @pytest.mark.django_db(transaction=True) def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings): """Task should not start an indexation when disabled""" @@ -1664,7 +1664,7 @@ def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_se assert mock_push.call_args_list == [] -@mock.patch.object(FindDocumentIndexer, "push") +@mock.patch.object(SearchIndexer, "push") @pytest.mark.django_db(transaction=True) def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings): """Test indexation task on document creation""" @@ -1687,7 +1687,7 @@ def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_set data = [call.args[0] for call in mock_push.call_args_list] - indexer = FindDocumentIndexer() + indexer = SearchIndexer() assert sorted(data, key=itemgetter("id")) == sorted( [ @@ -1704,7 +1704,7 @@ def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_set assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0 -@mock.patch.object(FindDocumentIndexer, "push") +@mock.patch.object(SearchIndexer, "push") @pytest.mark.django_db(transaction=True) def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings): """Indexation task on deleted or ancestor_deleted documents""" @@ -1747,7 +1747,7 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings) data = [call.args[0] for call in mock_push.call_args_list] - indexer = FindDocumentIndexer() + indexer = SearchIndexer() # Even deleted document are re-indexed : only update their status in the future ? assert sorted(data, key=itemgetter("id")) == sorted( @@ -1766,7 +1766,7 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings) assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0 -@mock.patch.object(FindDocumentIndexer, "push") +@mock.patch.object(SearchIndexer, "push") @pytest.mark.django_db(transaction=True) def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings): """Restart indexation task on restored documents""" @@ -1820,7 +1820,7 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings data = [call.args[0] for call in mock_push.call_args_list] - indexer = FindDocumentIndexer() + indexer = SearchIndexer() # All docs are re-indexed assert sorted(data, key=itemgetter("id")) == sorted( @@ -1840,10 +1840,10 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings): """Test indexation task skipping on document update""" indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 - indexer = FindDocumentIndexer() + indexer = SearchIndexer() user = factories.UserFactory() - with mock.patch.object(FindDocumentIndexer, "push"): + with mock.patch.object(SearchIndexer, "push"): with transaction.atomic(): doc = factories.DocumentFactory() factories.UserDocumentAccessFactory(document=doc, user=user) @@ -1852,7 +1852,7 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings): str(doc.path): {"users": [user.sub]}, } - with mock.patch.object(FindDocumentIndexer, "push") as mock_push: + with mock.patch.object(SearchIndexer, "push") as mock_push: # Simulate 1 waiting task cache.set(f"doc-indexer-debounce-{doc.pk}", 1) @@ -1863,7 +1863,7 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings): assert [call.args[0] for call in mock_push.call_args_list] == [] - with mock.patch.object(FindDocumentIndexer, "push") as mock_push: + with mock.patch.object(SearchIndexer, "push") as mock_push: # No waiting task cache.set(f"doc-indexer-debounce-{doc.pk}", 0) @@ -1881,10 +1881,10 @@ def test_models_documents_access_post_save_indexer(indexer_settings): """Test indexation task on DocumentAccess update""" indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 - indexer = FindDocumentIndexer() + indexer = SearchIndexer() user = factories.UserFactory() - with mock.patch.object(FindDocumentIndexer, "push"): + with mock.patch.object(SearchIndexer, "push"): with transaction.atomic(): doc = factories.DocumentFactory() doc_access = factories.UserDocumentAccessFactory(document=doc, user=user) @@ -1893,9 +1893,9 @@ def test_models_documents_access_post_save_indexer(indexer_settings): str(doc.path): {"users": [user.sub]}, } - indexer = FindDocumentIndexer() + indexer = SearchIndexer() - with mock.patch.object(FindDocumentIndexer, "push") as mock_push: + with mock.patch.object(SearchIndexer, "push") as mock_push: with transaction.atomic(): doc_access.save() diff --git a/src/backend/core/tests/test_services_search_indexers.py b/src/backend/core/tests/test_services_search_indexers.py index 15686b77..a65f92ca 100644 --- a/src/backend/core/tests/test_services_search_indexers.py +++ b/src/backend/core/tests/test_services_search_indexers.py @@ -15,7 +15,7 @@ from requests import HTTPError from core import factories, models, utils from core.services.search_indexers import ( BaseDocumentIndexer, - FindDocumentIndexer, + SearchIndexer, get_document_indexer, get_visited_document_ids_of, ) @@ -78,7 +78,7 @@ def test_services_search_indexer_is_configured(indexer_settings): # Valid class indexer_settings.SEARCH_INDEXER_CLASS = ( - "core.services.search_indexers.FindDocumentIndexer" + "core.services.search_indexers.SearchIndexer" ) get_document_indexer.cache_clear() @@ -98,7 +98,7 @@ def test_services_search_indexer_url_is_none(indexer_settings): indexer_settings.SEARCH_INDEXER_URL = None with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() + SearchIndexer() assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value) @@ -110,7 +110,7 @@ def test_services_search_indexer_url_is_empty(indexer_settings): indexer_settings.SEARCH_INDEXER_URL = "" with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() + SearchIndexer() assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value) @@ -122,7 +122,7 @@ def test_services_search_indexer_secret_is_none(indexer_settings): indexer_settings.SEARCH_INDEXER_SECRET = None with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() + SearchIndexer() assert "SEARCH_INDEXER_SECRET must be set in Django settings." in str( exc_info.value @@ -136,7 +136,7 @@ def test_services_search_indexer_secret_is_empty(indexer_settings): indexer_settings.SEARCH_INDEXER_SECRET = "" with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() + SearchIndexer() assert "SEARCH_INDEXER_SECRET must be set in Django settings." in str( exc_info.value @@ -150,7 +150,7 @@ def test_services_search_endpoint_is_none(indexer_settings): indexer_settings.SEARCH_INDEXER_QUERY_URL = None with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() + SearchIndexer() assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( exc_info.value @@ -164,7 +164,7 @@ def test_services_search_endpoint_is_empty(indexer_settings): indexer_settings.SEARCH_INDEXER_QUERY_URL = "" with pytest.raises(ImproperlyConfigured) as exc_info: - FindDocumentIndexer() + SearchIndexer() assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( exc_info.value @@ -192,7 +192,7 @@ def test_services_search_indexers_serialize_document_returns_expected_json(): } } - indexer = FindDocumentIndexer() + indexer = SearchIndexer() result = indexer.serialize_document(document, accesses) assert set(result.pop("users")) == {str(user_a.sub), str(user_b.sub)} @@ -221,7 +221,7 @@ def test_services_search_indexers_serialize_document_deleted(): parent.soft_delete() document.refresh_from_db() - indexer = FindDocumentIndexer() + indexer = SearchIndexer() result = indexer.serialize_document(document, {}) assert result["is_active"] is False @@ -232,7 +232,7 @@ def test_services_search_indexers_serialize_document_empty(): """Empty documents returns empty content in the serialized json.""" document = factories.DocumentFactory(content="", title=None) - indexer = FindDocumentIndexer() + indexer = SearchIndexer() result = indexer.serialize_document(document, {}) assert result["content"] == "" @@ -256,10 +256,10 @@ def test_services_search_indexers_index_errors(indexer_settings): ) with pytest.raises(HTTPError): - FindDocumentIndexer().index() + SearchIndexer().index() -@patch.object(FindDocumentIndexer, "push") +@patch.object(SearchIndexer, "push") def test_services_search_indexers_batches_pass_only_batch_accesses( mock_push, indexer_settings ): @@ -276,7 +276,7 @@ def test_services_search_indexers_batches_pass_only_batch_accesses( access = factories.UserDocumentAccessFactory(document=document) expected_user_subs[str(document.id)] = str(access.user.sub) - assert FindDocumentIndexer().index() == 5 + assert SearchIndexer().index() == 5 # Should be 3 batches: 2 + 2 + 1 assert mock_push.call_count == 3 @@ -299,7 +299,7 @@ def test_services_search_indexers_batches_pass_only_batch_accesses( assert seen_doc_ids == {str(d.id) for d in documents} -@patch.object(FindDocumentIndexer, "push") +@patch.object(SearchIndexer, "push") @pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ignore_empty_documents(mock_push): """ @@ -311,7 +311,7 @@ def test_services_search_indexers_ignore_empty_documents(mock_push): empty_title = factories.DocumentFactory(title="") empty_content = factories.DocumentFactory(content="") - assert FindDocumentIndexer().index() == 3 + assert SearchIndexer().index() == 3 assert mock_push.call_count == 1 @@ -327,7 +327,7 @@ def test_services_search_indexers_ignore_empty_documents(mock_push): } -@patch.object(FindDocumentIndexer, "push") +@patch.object(SearchIndexer, "push") @pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ancestors_link_reach(mock_push): """Document accesses and reach should take into account ancestors link reaches.""" @@ -338,7 +338,7 @@ def test_services_search_indexers_ancestors_link_reach(mock_push): parent = factories.DocumentFactory(parent=grand_parent, link_reach="public") document = factories.DocumentFactory(parent=parent, link_reach="restricted") - assert FindDocumentIndexer().index() == 4 + assert SearchIndexer().index() == 4 results = {doc["id"]: doc for doc in mock_push.call_args[0][0]} assert len(results) == 4 @@ -348,7 +348,7 @@ def test_services_search_indexers_ancestors_link_reach(mock_push): assert results[str(document.id)]["reach"] == "public" -@patch.object(FindDocumentIndexer, "push") +@patch.object(SearchIndexer, "push") @pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ancestors_users(mock_push): """Document accesses and reach should include users from ancestors.""" @@ -358,7 +358,7 @@ def test_services_search_indexers_ancestors_users(mock_push): parent = factories.DocumentFactory(parent=grand_parent, users=[user_p]) document = factories.DocumentFactory(parent=parent, users=[user_d]) - assert FindDocumentIndexer().index() == 3 + assert SearchIndexer().index() == 3 results = {doc["id"]: doc for doc in mock_push.call_args[0][0]} assert len(results) == 3 @@ -371,7 +371,7 @@ def test_services_search_indexers_ancestors_users(mock_push): } -@patch.object(FindDocumentIndexer, "push") +@patch.object(SearchIndexer, "push") @pytest.mark.usefixtures("indexer_settings") def test_services_search_indexers_ancestors_teams(mock_push): """Document accesses and reach should include teams from ancestors.""" @@ -379,7 +379,7 @@ def test_services_search_indexers_ancestors_teams(mock_push): parent = factories.DocumentFactory(parent=grand_parent, teams=["team_p"]) document = factories.DocumentFactory(parent=parent, teams=["team_d"]) - assert FindDocumentIndexer().index() == 3 + assert SearchIndexer().index() == 3 results = {doc["id"]: doc for doc in mock_push.call_args[0][0]} assert len(results) == 3 @@ -396,7 +396,7 @@ def test_push_uses_correct_url_and_data(mock_post, indexer_settings): """ indexer_settings.SEARCH_INDEXER_URL = "http://example.com/index" - indexer = FindDocumentIndexer() + indexer = SearchIndexer() sample_data = [{"id": "123", "title": "Test"}] mock_response = mock_post.return_value @@ -497,7 +497,7 @@ def test_services_search_indexers_search_errors(indexer_settings): ) with pytest.raises(HTTPError): - FindDocumentIndexer().search("alpha", token="mytoken") + SearchIndexer().search("alpha", token="mytoken") @patch("requests.post") @@ -507,7 +507,7 @@ def test_services_search_indexers_search(mock_post, indexer_settings): document ids from linktraces. """ user = factories.UserFactory() - indexer = FindDocumentIndexer() + indexer = SearchIndexer() mock_response = mock_post.return_value mock_response.raise_for_status.return_value = None # No error