(backend) some refactor of indexer classes & modules

Rename FindDocumentIndexer as SearchIndexer
Rename FindDocumentSerializer as SearchDocumentSerializer
Rename package core.tasks.find as core.task.search
Remove logs on http errors in SearchIndexer
Factorise some code in search API view.

Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
Fabre Florian
2025-10-07 20:54:21 +02:00
committed by Quentin BEY
parent 6f282ec5d6
commit 044c1495a9
10 changed files with 112 additions and 111 deletions

View File

@@ -79,7 +79,7 @@ Y_PROVIDER_API_KEY=yprovider-api-key
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15 THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
# Indexer # Indexer
SEARCH_INDEXER_CLASS="core.services.search_indexers.FindDocumentIndexer" SEARCH_INDEXER_CLASS="core.services.search_indexers.SearchIndexer"
SEARCH_INDEXER_SECRET=find-api-key-for-docs-with-exactly-50-chars-length # Key generated by create_demo in Find app. SEARCH_INDEXER_SECRET=find-api-key-for-docs-with-exactly-50-chars-length # Key generated by create_demo in Find app.
SEARCH_INDEXER_URL="http://find:8000/api/v1.0/documents/index/" SEARCH_INDEXER_URL="http://find:8000/api/v1.0/documents/index/"
SEARCH_INDEXER_QUERY_URL="http://find:8000/api/v1.0/documents/search/" SEARCH_INDEXER_QUERY_URL="http://find:8000/api/v1.0/documents/search/"

View File

@@ -1015,8 +1015,8 @@ class ThreadSerializer(serializers.ModelSerializer):
return {} return {}
class FindDocumentSerializer(serializers.Serializer): class SearchDocumentSerializer(serializers.Serializer):
"""Serializer for Find search requests""" """Serializer for fulltext search requests through Find application"""
q = serializers.CharField(required=True, allow_blank=False, trim_whitespace=True) q = serializers.CharField(required=True, allow_blank=False, trim_whitespace=True)
page_size = serializers.IntegerField( page_size = serializers.IntegerField(

View File

@@ -1085,16 +1085,49 @@ class DocumentViewSet(
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED {"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
) )
def _simple_search_queryset(self, params):
"""
Returns a queryset filtered by the content of the document title
"""
text = params.validated_data["q"]
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
queryset = self.get_queryset()
filterset = DocumentFilter({"title": text}, queryset=queryset)
if not filterset.is_valid():
raise drf.exceptions.ValidationError(filterset.errors)
return filterset.filter_queryset(queryset)
def _fulltext_search_queryset(self, indexer, token, user, params):
"""
Returns a queryset from the results the fulltext search of Find
"""
text = params.validated_data["q"]
queryset = models.Document.objects.all()
# Retrieve the documents ids from Find.
results = indexer.search(
text=text,
token=token,
visited=get_visited_document_ids_of(queryset, user),
page=params.validated_data.get("page", 1),
page_size=params.validated_data.get("page_size", 20),
)
return queryset.filter(pk__in=results)
@drf.decorators.action(detail=False, methods=["get"], url_path="search") @drf.decorators.action(detail=False, methods=["get"], url_path="search")
@method_decorator(refresh_oidc_access_token) @method_decorator(refresh_oidc_access_token)
def search(self, request, *args, **kwargs): def search(self, request, *args, **kwargs):
""" """
Returns a DRF response containing the filtered, annotated and ordered document list. Returns a DRF response containing the filtered, annotated and ordered document list.
Applies filtering based on request parameter 'q' from `FindDocumentSerializer`. Applies filtering based on request parameter 'q' from `SearchDocumentSerializer`.
Depending of the configuration it can be: Depending of the configuration it can be:
- A fulltext search through the opensearch indexation app "find" if the backend is - A fulltext search through the opensearch indexation app "find" if the backend is
enabled (see SEARCH_BACKEND_CLASS) enabled (see SEARCH_INDEXER_CLASS)
- A filtering by the model field 'title'. - A filtering by the model field 'title'.
The ordering is always by the most recent first. The ordering is always by the most recent first.
@@ -1102,46 +1135,22 @@ class DocumentViewSet(
access_token = request.session.get("oidc_access_token") access_token = request.session.get("oidc_access_token")
user = request.user user = request.user
serializer = serializers.FindDocumentSerializer(data=request.query_params) params = serializers.SearchDocumentSerializer(data=request.query_params)
serializer.is_valid(raise_exception=True) params.is_valid(raise_exception=True)
indexer = get_document_indexer() indexer = get_document_indexer()
text = serializer.validated_data["q"]
# The indexer is not configured, so we fallback on a simple filter on the if indexer:
# model field 'title'. queryset = self._fulltext_search_queryset(
if not indexer: indexer, token=access_token, user=user, params=params
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
queryset = self.get_queryset()
filterset = DocumentFilter({"title": text}, queryset=queryset)
if not filterset.is_valid():
raise drf.exceptions.ValidationError(filterset.errors)
queryset = filterset.filter_queryset(queryset).order_by("-updated_at")
return self.get_response_for_queryset(
queryset,
context={
"request": request,
},
) )
else:
queryset = models.Document.objects.all() # The indexer is not configured, we fallback on a simple icontains filter by the
# model field 'title'.
# Retrieve the documents ids from Find. queryset = self._simple_search_queryset(params)
results = indexer.search(
text=text,
token=access_token,
visited=get_visited_document_ids_of(queryset, user),
page=serializer.validated_data.get("page", 1),
page_size=serializer.validated_data.get("page_size", 20),
)
queryset = queryset.filter(pk__in=results).order_by("-updated_at")
return self.get_response_for_queryset( return self.get_response_for_queryset(
queryset, queryset.order_by("-updated_at"),
context={ context={
"request": request, "request": request,
}, },

View File

@@ -223,7 +223,7 @@ class BaseDocumentIndexer(ABC):
""" """
class FindDocumentIndexer(BaseDocumentIndexer): class SearchIndexer(BaseDocumentIndexer):
""" """
Document indexer that pushes documents to La Suite Find app. Document indexer that pushes documents to La Suite Find app.
""" """
@@ -270,18 +270,14 @@ class FindDocumentIndexer(BaseDocumentIndexer):
Returns: Returns:
dict: A JSON-serializable dictionary. dict: A JSON-serializable dictionary.
""" """
try: response = requests.post(
response = requests.post( self.search_url,
self.search_url, json=data,
json=data, headers={"Authorization": f"Bearer {token}"},
headers={"Authorization": f"Bearer {token}"}, timeout=10,
timeout=10, )
) response.raise_for_status()
response.raise_for_status() return response.json()
return response.json()
except requests.exceptions.HTTPError as e:
logger.error("HTTPError: %s", e)
raise
def push(self, data): def push(self, data):
""" """
@@ -290,14 +286,10 @@ class FindDocumentIndexer(BaseDocumentIndexer):
Args: Args:
data (list): List of document dictionaries. data (list): List of document dictionaries.
""" """
try: response = requests.post(
response = requests.post( self.indexer_url,
self.indexer_url, json=data,
json=data, headers={"Authorization": f"Bearer {self.indexer_secret}"},
headers={"Authorization": f"Bearer {self.indexer_secret}"}, timeout=10,
timeout=10, )
) response.raise_for_status()
response.raise_for_status()
except requests.exceptions.HTTPError as e:
logger.error("HTTPError: %s", e)
raise

View File

@@ -9,7 +9,7 @@ from django.db.models import signals
from django.dispatch import receiver from django.dispatch import receiver
from . import models from . import models
from .tasks.find import trigger_document_indexer from .tasks.search import trigger_document_indexer
@receiver(signals.post_save, sender=models.Document) @receiver(signals.post_save, sender=models.Document)

View File

@@ -11,7 +11,7 @@ from django.db import transaction
import pytest import pytest
from core import factories from core import factories
from core.services.search_indexers import FindDocumentIndexer from core.services.search_indexers import SearchIndexer
@pytest.mark.django_db @pytest.mark.django_db
@@ -19,7 +19,7 @@ from core.services.search_indexers import FindDocumentIndexer
def test_index(): def test_index():
"""Test the command `index` that run the Find app indexer for all the available documents.""" """Test the command `index` that run the Find app indexer for all the available documents."""
user = factories.UserFactory() user = factories.UserFactory()
indexer = FindDocumentIndexer() indexer = SearchIndexer()
with transaction.atomic(): with transaction.atomic():
doc = factories.DocumentFactory() doc = factories.DocumentFactory()
@@ -36,7 +36,7 @@ def test_index():
str(no_title_doc.path): {"users": [user.sub]}, str(no_title_doc.path): {"users": [user.sub]},
} }
with mock.patch.object(FindDocumentIndexer, "push") as mock_push: with mock.patch.object(SearchIndexer, "push") as mock_push:
call_command("index") call_command("index")
push_call_args = [call.args[0] for call in mock_push.call_args_list] push_call_args = [call.args[0] for call in mock_push.call_args_list]

View File

@@ -39,7 +39,7 @@ def indexer_settings_fixture(settings):
get_document_indexer.cache_clear() get_document_indexer.cache_clear()
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer" settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.SearchIndexer"
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest" settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
settings.SEARCH_INDEXER_URL = "http://localhost:8081/api/v1.0/documents/index/" settings.SEARCH_INDEXER_URL = "http://localhost:8081/api/v1.0/documents/index/"
settings.SEARCH_INDEXER_QUERY_URL = ( settings.SEARCH_INDEXER_QUERY_URL = (

View File

@@ -21,7 +21,7 @@ from django.utils import timezone
import pytest import pytest
from core import factories, models from core import factories, models
from core.services.search_indexers import FindDocumentIndexer from core.services.search_indexers import SearchIndexer
pytestmark = pytest.mark.django_db pytestmark = pytest.mark.django_db
@@ -1622,7 +1622,7 @@ def test_models_documents_compute_ancestors_links_paths_mapping_structure(
} }
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(SearchIndexer, "push")
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer(mock_push, indexer_settings): def test_models_documents_post_save_indexer(mock_push, indexer_settings):
"""Test indexation task on document creation""" """Test indexation task on document creation"""
@@ -1634,7 +1634,7 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
accesses = {} accesses = {}
data = [call.args[0] for call in mock_push.call_args_list] data = [call.args[0] for call in mock_push.call_args_list]
indexer = FindDocumentIndexer() indexer = SearchIndexer()
assert sorted(data, key=itemgetter("id")) == sorted( assert sorted(data, key=itemgetter("id")) == sorted(
[ [
@@ -1651,7 +1651,7 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0 assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(SearchIndexer, "push")
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings): def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings):
"""Task should not start an indexation when disabled""" """Task should not start an indexation when disabled"""
@@ -1664,7 +1664,7 @@ def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_se
assert mock_push.call_args_list == [] assert mock_push.call_args_list == []
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(SearchIndexer, "push")
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings): def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings):
"""Test indexation task on document creation""" """Test indexation task on document creation"""
@@ -1687,7 +1687,7 @@ def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_set
data = [call.args[0] for call in mock_push.call_args_list] data = [call.args[0] for call in mock_push.call_args_list]
indexer = FindDocumentIndexer() indexer = SearchIndexer()
assert sorted(data, key=itemgetter("id")) == sorted( assert sorted(data, key=itemgetter("id")) == sorted(
[ [
@@ -1704,7 +1704,7 @@ def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_set
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0 assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(SearchIndexer, "push")
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings): def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings):
"""Indexation task on deleted or ancestor_deleted documents""" """Indexation task on deleted or ancestor_deleted documents"""
@@ -1747,7 +1747,7 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
data = [call.args[0] for call in mock_push.call_args_list] data = [call.args[0] for call in mock_push.call_args_list]
indexer = FindDocumentIndexer() indexer = SearchIndexer()
# Even deleted document are re-indexed : only update their status in the future ? # Even deleted document are re-indexed : only update their status in the future ?
assert sorted(data, key=itemgetter("id")) == sorted( assert sorted(data, key=itemgetter("id")) == sorted(
@@ -1766,7 +1766,7 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0 assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(SearchIndexer, "push")
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings): def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings):
"""Restart indexation task on restored documents""" """Restart indexation task on restored documents"""
@@ -1820,7 +1820,7 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
data = [call.args[0] for call in mock_push.call_args_list] data = [call.args[0] for call in mock_push.call_args_list]
indexer = FindDocumentIndexer() indexer = SearchIndexer()
# All docs are re-indexed # All docs are re-indexed
assert sorted(data, key=itemgetter("id")) == sorted( assert sorted(data, key=itemgetter("id")) == sorted(
@@ -1840,10 +1840,10 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
"""Test indexation task skipping on document update""" """Test indexation task skipping on document update"""
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
indexer = FindDocumentIndexer() indexer = SearchIndexer()
user = factories.UserFactory() user = factories.UserFactory()
with mock.patch.object(FindDocumentIndexer, "push"): with mock.patch.object(SearchIndexer, "push"):
with transaction.atomic(): with transaction.atomic():
doc = factories.DocumentFactory() doc = factories.DocumentFactory()
factories.UserDocumentAccessFactory(document=doc, user=user) factories.UserDocumentAccessFactory(document=doc, user=user)
@@ -1852,7 +1852,7 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
str(doc.path): {"users": [user.sub]}, str(doc.path): {"users": [user.sub]},
} }
with mock.patch.object(FindDocumentIndexer, "push") as mock_push: with mock.patch.object(SearchIndexer, "push") as mock_push:
# Simulate 1 waiting task # Simulate 1 waiting task
cache.set(f"doc-indexer-debounce-{doc.pk}", 1) cache.set(f"doc-indexer-debounce-{doc.pk}", 1)
@@ -1863,7 +1863,7 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
assert [call.args[0] for call in mock_push.call_args_list] == [] assert [call.args[0] for call in mock_push.call_args_list] == []
with mock.patch.object(FindDocumentIndexer, "push") as mock_push: with mock.patch.object(SearchIndexer, "push") as mock_push:
# No waiting task # No waiting task
cache.set(f"doc-indexer-debounce-{doc.pk}", 0) cache.set(f"doc-indexer-debounce-{doc.pk}", 0)
@@ -1881,10 +1881,10 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
"""Test indexation task on DocumentAccess update""" """Test indexation task on DocumentAccess update"""
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
indexer = FindDocumentIndexer() indexer = SearchIndexer()
user = factories.UserFactory() user = factories.UserFactory()
with mock.patch.object(FindDocumentIndexer, "push"): with mock.patch.object(SearchIndexer, "push"):
with transaction.atomic(): with transaction.atomic():
doc = factories.DocumentFactory() doc = factories.DocumentFactory()
doc_access = factories.UserDocumentAccessFactory(document=doc, user=user) doc_access = factories.UserDocumentAccessFactory(document=doc, user=user)
@@ -1893,9 +1893,9 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
str(doc.path): {"users": [user.sub]}, str(doc.path): {"users": [user.sub]},
} }
indexer = FindDocumentIndexer() indexer = SearchIndexer()
with mock.patch.object(FindDocumentIndexer, "push") as mock_push: with mock.patch.object(SearchIndexer, "push") as mock_push:
with transaction.atomic(): with transaction.atomic():
doc_access.save() doc_access.save()

View File

@@ -15,7 +15,7 @@ from requests import HTTPError
from core import factories, models, utils from core import factories, models, utils
from core.services.search_indexers import ( from core.services.search_indexers import (
BaseDocumentIndexer, BaseDocumentIndexer,
FindDocumentIndexer, SearchIndexer,
get_document_indexer, get_document_indexer,
get_visited_document_ids_of, get_visited_document_ids_of,
) )
@@ -78,7 +78,7 @@ def test_services_search_indexer_is_configured(indexer_settings):
# Valid class # Valid class
indexer_settings.SEARCH_INDEXER_CLASS = ( indexer_settings.SEARCH_INDEXER_CLASS = (
"core.services.search_indexers.FindDocumentIndexer" "core.services.search_indexers.SearchIndexer"
) )
get_document_indexer.cache_clear() get_document_indexer.cache_clear()
@@ -98,7 +98,7 @@ def test_services_search_indexer_url_is_none(indexer_settings):
indexer_settings.SEARCH_INDEXER_URL = None indexer_settings.SEARCH_INDEXER_URL = None
with pytest.raises(ImproperlyConfigured) as exc_info: with pytest.raises(ImproperlyConfigured) as exc_info:
FindDocumentIndexer() SearchIndexer()
assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value) assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value)
@@ -110,7 +110,7 @@ def test_services_search_indexer_url_is_empty(indexer_settings):
indexer_settings.SEARCH_INDEXER_URL = "" indexer_settings.SEARCH_INDEXER_URL = ""
with pytest.raises(ImproperlyConfigured) as exc_info: with pytest.raises(ImproperlyConfigured) as exc_info:
FindDocumentIndexer() SearchIndexer()
assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value) assert "SEARCH_INDEXER_URL must be set in Django settings." in str(exc_info.value)
@@ -122,7 +122,7 @@ def test_services_search_indexer_secret_is_none(indexer_settings):
indexer_settings.SEARCH_INDEXER_SECRET = None indexer_settings.SEARCH_INDEXER_SECRET = None
with pytest.raises(ImproperlyConfigured) as exc_info: with pytest.raises(ImproperlyConfigured) as exc_info:
FindDocumentIndexer() SearchIndexer()
assert "SEARCH_INDEXER_SECRET must be set in Django settings." in str( assert "SEARCH_INDEXER_SECRET must be set in Django settings." in str(
exc_info.value exc_info.value
@@ -136,7 +136,7 @@ def test_services_search_indexer_secret_is_empty(indexer_settings):
indexer_settings.SEARCH_INDEXER_SECRET = "" indexer_settings.SEARCH_INDEXER_SECRET = ""
with pytest.raises(ImproperlyConfigured) as exc_info: with pytest.raises(ImproperlyConfigured) as exc_info:
FindDocumentIndexer() SearchIndexer()
assert "SEARCH_INDEXER_SECRET must be set in Django settings." in str( assert "SEARCH_INDEXER_SECRET must be set in Django settings." in str(
exc_info.value exc_info.value
@@ -150,7 +150,7 @@ def test_services_search_endpoint_is_none(indexer_settings):
indexer_settings.SEARCH_INDEXER_QUERY_URL = None indexer_settings.SEARCH_INDEXER_QUERY_URL = None
with pytest.raises(ImproperlyConfigured) as exc_info: with pytest.raises(ImproperlyConfigured) as exc_info:
FindDocumentIndexer() SearchIndexer()
assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str(
exc_info.value exc_info.value
@@ -164,7 +164,7 @@ def test_services_search_endpoint_is_empty(indexer_settings):
indexer_settings.SEARCH_INDEXER_QUERY_URL = "" indexer_settings.SEARCH_INDEXER_QUERY_URL = ""
with pytest.raises(ImproperlyConfigured) as exc_info: with pytest.raises(ImproperlyConfigured) as exc_info:
FindDocumentIndexer() SearchIndexer()
assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str( assert "SEARCH_INDEXER_QUERY_URL must be set in Django settings." in str(
exc_info.value exc_info.value
@@ -192,7 +192,7 @@ def test_services_search_indexers_serialize_document_returns_expected_json():
} }
} }
indexer = FindDocumentIndexer() indexer = SearchIndexer()
result = indexer.serialize_document(document, accesses) result = indexer.serialize_document(document, accesses)
assert set(result.pop("users")) == {str(user_a.sub), str(user_b.sub)} assert set(result.pop("users")) == {str(user_a.sub), str(user_b.sub)}
@@ -221,7 +221,7 @@ def test_services_search_indexers_serialize_document_deleted():
parent.soft_delete() parent.soft_delete()
document.refresh_from_db() document.refresh_from_db()
indexer = FindDocumentIndexer() indexer = SearchIndexer()
result = indexer.serialize_document(document, {}) result = indexer.serialize_document(document, {})
assert result["is_active"] is False assert result["is_active"] is False
@@ -232,7 +232,7 @@ def test_services_search_indexers_serialize_document_empty():
"""Empty documents returns empty content in the serialized json.""" """Empty documents returns empty content in the serialized json."""
document = factories.DocumentFactory(content="", title=None) document = factories.DocumentFactory(content="", title=None)
indexer = FindDocumentIndexer() indexer = SearchIndexer()
result = indexer.serialize_document(document, {}) result = indexer.serialize_document(document, {})
assert result["content"] == "" assert result["content"] == ""
@@ -256,10 +256,10 @@ def test_services_search_indexers_index_errors(indexer_settings):
) )
with pytest.raises(HTTPError): with pytest.raises(HTTPError):
FindDocumentIndexer().index() SearchIndexer().index()
@patch.object(FindDocumentIndexer, "push") @patch.object(SearchIndexer, "push")
def test_services_search_indexers_batches_pass_only_batch_accesses( def test_services_search_indexers_batches_pass_only_batch_accesses(
mock_push, indexer_settings mock_push, indexer_settings
): ):
@@ -276,7 +276,7 @@ def test_services_search_indexers_batches_pass_only_batch_accesses(
access = factories.UserDocumentAccessFactory(document=document) access = factories.UserDocumentAccessFactory(document=document)
expected_user_subs[str(document.id)] = str(access.user.sub) expected_user_subs[str(document.id)] = str(access.user.sub)
assert FindDocumentIndexer().index() == 5 assert SearchIndexer().index() == 5
# Should be 3 batches: 2 + 2 + 1 # Should be 3 batches: 2 + 2 + 1
assert mock_push.call_count == 3 assert mock_push.call_count == 3
@@ -299,7 +299,7 @@ def test_services_search_indexers_batches_pass_only_batch_accesses(
assert seen_doc_ids == {str(d.id) for d in documents} assert seen_doc_ids == {str(d.id) for d in documents}
@patch.object(FindDocumentIndexer, "push") @patch.object(SearchIndexer, "push")
@pytest.mark.usefixtures("indexer_settings") @pytest.mark.usefixtures("indexer_settings")
def test_services_search_indexers_ignore_empty_documents(mock_push): def test_services_search_indexers_ignore_empty_documents(mock_push):
""" """
@@ -311,7 +311,7 @@ def test_services_search_indexers_ignore_empty_documents(mock_push):
empty_title = factories.DocumentFactory(title="") empty_title = factories.DocumentFactory(title="")
empty_content = factories.DocumentFactory(content="") empty_content = factories.DocumentFactory(content="")
assert FindDocumentIndexer().index() == 3 assert SearchIndexer().index() == 3
assert mock_push.call_count == 1 assert mock_push.call_count == 1
@@ -327,7 +327,7 @@ def test_services_search_indexers_ignore_empty_documents(mock_push):
} }
@patch.object(FindDocumentIndexer, "push") @patch.object(SearchIndexer, "push")
@pytest.mark.usefixtures("indexer_settings") @pytest.mark.usefixtures("indexer_settings")
def test_services_search_indexers_ancestors_link_reach(mock_push): def test_services_search_indexers_ancestors_link_reach(mock_push):
"""Document accesses and reach should take into account ancestors link reaches.""" """Document accesses and reach should take into account ancestors link reaches."""
@@ -338,7 +338,7 @@ def test_services_search_indexers_ancestors_link_reach(mock_push):
parent = factories.DocumentFactory(parent=grand_parent, link_reach="public") parent = factories.DocumentFactory(parent=grand_parent, link_reach="public")
document = factories.DocumentFactory(parent=parent, link_reach="restricted") document = factories.DocumentFactory(parent=parent, link_reach="restricted")
assert FindDocumentIndexer().index() == 4 assert SearchIndexer().index() == 4
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]} results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
assert len(results) == 4 assert len(results) == 4
@@ -348,7 +348,7 @@ def test_services_search_indexers_ancestors_link_reach(mock_push):
assert results[str(document.id)]["reach"] == "public" assert results[str(document.id)]["reach"] == "public"
@patch.object(FindDocumentIndexer, "push") @patch.object(SearchIndexer, "push")
@pytest.mark.usefixtures("indexer_settings") @pytest.mark.usefixtures("indexer_settings")
def test_services_search_indexers_ancestors_users(mock_push): def test_services_search_indexers_ancestors_users(mock_push):
"""Document accesses and reach should include users from ancestors.""" """Document accesses and reach should include users from ancestors."""
@@ -358,7 +358,7 @@ def test_services_search_indexers_ancestors_users(mock_push):
parent = factories.DocumentFactory(parent=grand_parent, users=[user_p]) parent = factories.DocumentFactory(parent=grand_parent, users=[user_p])
document = factories.DocumentFactory(parent=parent, users=[user_d]) document = factories.DocumentFactory(parent=parent, users=[user_d])
assert FindDocumentIndexer().index() == 3 assert SearchIndexer().index() == 3
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]} results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
assert len(results) == 3 assert len(results) == 3
@@ -371,7 +371,7 @@ def test_services_search_indexers_ancestors_users(mock_push):
} }
@patch.object(FindDocumentIndexer, "push") @patch.object(SearchIndexer, "push")
@pytest.mark.usefixtures("indexer_settings") @pytest.mark.usefixtures("indexer_settings")
def test_services_search_indexers_ancestors_teams(mock_push): def test_services_search_indexers_ancestors_teams(mock_push):
"""Document accesses and reach should include teams from ancestors.""" """Document accesses and reach should include teams from ancestors."""
@@ -379,7 +379,7 @@ def test_services_search_indexers_ancestors_teams(mock_push):
parent = factories.DocumentFactory(parent=grand_parent, teams=["team_p"]) parent = factories.DocumentFactory(parent=grand_parent, teams=["team_p"])
document = factories.DocumentFactory(parent=parent, teams=["team_d"]) document = factories.DocumentFactory(parent=parent, teams=["team_d"])
assert FindDocumentIndexer().index() == 3 assert SearchIndexer().index() == 3
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]} results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
assert len(results) == 3 assert len(results) == 3
@@ -396,7 +396,7 @@ def test_push_uses_correct_url_and_data(mock_post, indexer_settings):
""" """
indexer_settings.SEARCH_INDEXER_URL = "http://example.com/index" indexer_settings.SEARCH_INDEXER_URL = "http://example.com/index"
indexer = FindDocumentIndexer() indexer = SearchIndexer()
sample_data = [{"id": "123", "title": "Test"}] sample_data = [{"id": "123", "title": "Test"}]
mock_response = mock_post.return_value mock_response = mock_post.return_value
@@ -497,7 +497,7 @@ def test_services_search_indexers_search_errors(indexer_settings):
) )
with pytest.raises(HTTPError): with pytest.raises(HTTPError):
FindDocumentIndexer().search("alpha", token="mytoken") SearchIndexer().search("alpha", token="mytoken")
@patch("requests.post") @patch("requests.post")
@@ -507,7 +507,7 @@ def test_services_search_indexers_search(mock_post, indexer_settings):
document ids from linktraces. document ids from linktraces.
""" """
user = factories.UserFactory() user = factories.UserFactory()
indexer = FindDocumentIndexer() indexer = SearchIndexer()
mock_response = mock_post.return_value mock_response = mock_post.return_value
mock_response.raise_for_status.return_value = None # No error mock_response.raise_for_status.return_value = None # No error