(backend) Index deleted documents

Add SEARCH_INDEXER_COUNTDOWN as configurable setting.
Make the search backend creation simplier (only 'get_document_indexer' now).
Allow indexation of deleted documents.

Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
Fabre Florian
2025-09-24 13:44:37 +02:00
committed by Quentin BEY
parent 331a94ad2f
commit a48f61e583
11 changed files with 175 additions and 153 deletions

View File

@@ -53,7 +53,7 @@ from core.services.converter_services import (
YdocConverter, YdocConverter,
) )
from core.services.search_indexers import ( from core.services.search_indexers import (
default_document_indexer, get_document_indexer,
get_visited_document_ids_of, get_visited_document_ids_of,
) )
from core.tasks.mail import send_ask_for_access_mail from core.tasks.mail import send_ask_for_access_mail
@@ -1090,7 +1090,14 @@ class DocumentViewSet(
def search(self, request, *args, **kwargs): def search(self, request, *args, **kwargs):
""" """
Returns a DRF response containing the filtered, annotated and ordered document list. Returns a DRF response containing the filtered, annotated and ordered document list.
The filtering allows full text search through the opensearch indexation app "find".
Applies filtering based on request parameter 'q' from `FindDocumentSerializer`.
Depending of the configuration it can be:
- A fulltext search through the opensearch indexation app "find" if the backend is
enabled (see SEARCH_BACKEND_CLASS)
- A filtering by the model field 'title'.
The ordering is always by the most recent first.
""" """
access_token = request.session.get("oidc_access_token") access_token = request.session.get("oidc_access_token")
user = request.user user = request.user
@@ -1098,13 +1105,15 @@ class DocumentViewSet(
serializer = serializers.FindDocumentSerializer(data=request.query_params) serializer = serializers.FindDocumentSerializer(data=request.query_params)
serializer.is_valid(raise_exception=True) serializer.is_valid(raise_exception=True)
indexer = default_document_indexer() indexer = get_document_indexer()
text = serializer.validated_data["q"]
# The indexer is not configured, so we fallback on a simple filter on the
# model field 'title'.
if not indexer: if not indexer:
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
queryset = self.get_queryset() queryset = self.get_queryset()
filterset = DocumentFilter( filterset = DocumentFilter({"title": text}, queryset=queryset)
{"title": serializer.validated_data.get("q", "")}, queryset=queryset
)
if not filterset.is_valid(): if not filterset.is_valid():
raise drf.exceptions.ValidationError(filterset.errors) raise drf.exceptions.ValidationError(filterset.errors)
@@ -1119,15 +1128,17 @@ class DocumentViewSet(
) )
queryset = models.Document.objects.all() queryset = models.Document.objects.all()
# Retrieve the documents ids from Find.
results = indexer.search( results = indexer.search(
text=serializer.validated_data.get("q", ""), text=text,
token=access_token, token=access_token,
visited=get_visited_document_ids_of(queryset, user), visited=get_visited_document_ids_of(queryset, user),
page=serializer.validated_data.get("page", 1), page=serializer.validated_data.get("page", 1),
page_size=serializer.validated_data.get("page_size", 20), page_size=serializer.validated_data.get("page_size", 20),
) )
queryset = queryset.filter(pk__in=results) queryset = queryset.filter(pk__in=results).order_by("-updated_at")
return self.get_response_for_queryset( return self.get_response_for_queryset(
queryset, queryset,

View File

@@ -5,9 +5,9 @@ Handle search setup that needs to be done at bootstrap time.
import logging import logging
import time import time
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand, CommandError
from ...services.search_indexers import FindDocumentIndexer from core.services.search_indexers import get_document_indexer
logger = logging.getLogger("docs.search.bootstrap_search") logger = logging.getLogger("docs.search.bootstrap_search")
@@ -19,9 +19,18 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
"""Launch and log search index generation.""" """Launch and log search index generation."""
indexer = get_document_indexer()
if not indexer:
raise CommandError("The indexer is not enabled or properly configured.")
logger.info("Starting to regenerate Find index...") logger.info("Starting to regenerate Find index...")
start = time.perf_counter() start = time.perf_counter()
count = FindDocumentIndexer().index()
try:
count = indexer.index()
except Exception as err:
raise CommandError("Unable to regenerate index") from err
duration = time.perf_counter() - start duration = time.perf_counter() - start
logger.info( logger.info(

View File

@@ -19,37 +19,24 @@ logger = logging.getLogger(__name__)
@cache @cache
def default_document_indexer(): def get_document_indexer():
"""Returns default indexer service is enabled and properly configured.""" """Returns an instance of indexer service if enabled and properly configured."""
classpath = settings.SEARCH_INDEXER_CLASS
# For this usecase an empty indexer class is not an issue but a feature. # For this usecase an empty indexer class is not an issue but a feature.
if not getattr(settings, "SEARCH_INDEXER_CLASS", None): if not classpath:
logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)") logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)")
return None return None
try: try:
return get_document_indexer_class()() indexer_class = import_string(settings.SEARCH_INDEXER_CLASS)
return indexer_class()
except ImportError as err:
logger.error("SEARCH_INDEXER_CLASS setting is not valid : %s", err)
except ImproperlyConfigured as err: except ImproperlyConfigured as err:
logger.error("Document indexer is not properly configured : %s", err) logger.error("Document indexer is not properly configured : %s", err)
return None
return None
@cache
def get_document_indexer_class():
"""Return the indexer backend class based on the settings."""
classpath = settings.SEARCH_INDEXER_CLASS
if not classpath:
raise ImproperlyConfigured(
"SEARCH_INDEXER_CLASS must be set in Django settings."
)
try:
return import_string(settings.SEARCH_INDEXER_CLASS)
except ImportError as err:
raise ImproperlyConfigured(
f"SEARCH_INDEXER_CLASS setting is not valid : {err}"
) from err
def get_batch_accesses_by_users_and_teams(paths): def get_batch_accesses_by_users_and_teams(paths):
@@ -100,9 +87,11 @@ def get_visited_document_ids_of(queryset, user):
ancestors_deleted_at__isnull=True, ancestors_deleted_at__isnull=True,
) )
.filter(pk__in=Subquery(qs.values("document_id"))) .filter(pk__in=Subquery(qs.values("document_id")))
.order_by("pk")
.distinct("pk")
) )
return list({str(id) for id in docs.values_list("pk", flat=True)}) return [str(id) for id in docs.values_list("pk", flat=True)]
class BaseDocumentIndexer(ABC): class BaseDocumentIndexer(ABC):

View File

@@ -9,7 +9,6 @@ from django.db.models import signals
from django.dispatch import receiver from django.dispatch import receiver
from . import models from . import models
from .services.search_indexers import default_document_indexer
from .tasks.find import trigger_document_indexer from .tasks.find import trigger_document_indexer
@@ -20,8 +19,7 @@ def document_post_save(sender, instance, **kwargs): # pylint: disable=unused-ar
Note : Within the transaction we can have an empty content and a serialization Note : Within the transaction we can have an empty content and a serialization
error. error.
""" """
if default_document_indexer() is not None: transaction.on_commit(partial(trigger_document_indexer, instance))
transaction.on_commit(partial(trigger_document_indexer, instance))
@receiver(signals.post_save, sender=models.DocumentAccess) @receiver(signals.post_save, sender=models.DocumentAccess)
@@ -29,5 +27,5 @@ def document_access_post_save(sender, instance, created, **kwargs): # pylint: d
""" """
Asynchronous call to the document indexer at the end of the transaction. Asynchronous call to the document indexer at the end of the transaction.
""" """
if not created and default_document_indexer() is not None: if not created:
transaction.on_commit(partial(trigger_document_indexer, instance.document)) transaction.on_commit(partial(trigger_document_indexer, instance.document))

View File

@@ -10,13 +10,10 @@ from impress.celery_app import app
logger = getLogger(__file__) logger = getLogger(__file__)
def document_indexer_debounce_key(document_id): def indexer_debounce_lock(document_id):
"""Returns debounce cache key"""
return f"doc-indexer-debounce-{document_id}"
def incr_counter(key):
"""Increase or reset counter""" """Increase or reset counter"""
key = f"doc-indexer-debounce-{document_id}"
try: try:
return cache.incr(key) return cache.incr(key)
except ValueError: except ValueError:
@@ -24,8 +21,10 @@ def incr_counter(key):
return 1 return 1
def decr_counter(key): def indexer_debounce_release(document_id):
"""Decrease or reset counter""" """Decrease or reset counter"""
key = f"doc-indexer-debounce-{document_id}"
try: try:
return cache.decr(key) return cache.decr(key)
except ValueError: except ValueError:
@@ -36,24 +35,26 @@ def decr_counter(key):
@app.task @app.task
def document_indexer_task(document_id): def document_indexer_task(document_id):
"""Celery Task : Sends indexation query for a document.""" """Celery Task : Sends indexation query for a document."""
key = document_indexer_debounce_key(document_id) # Prevents some circular imports
# pylint: disable=import-outside-toplevel
from core import models # noqa : PLC0415
from core.services.search_indexers import ( # noqa : PLC0415
get_batch_accesses_by_users_and_teams,
get_document_indexer,
)
# check if the counter : if still up, skip the task. only the last one # check if the counter : if still up, skip the task. only the last one
# within the countdown delay will do the query. # within the countdown delay will do the query.
if decr_counter(key) > 0: if indexer_debounce_release(document_id) > 0:
logger.info("Skip document %s indexation", document_id) logger.info("Skip document %s indexation", document_id)
return return
# Prevents some circular imports indexer = get_document_indexer()
# pylint: disable=import-outside-toplevel
from core import models # noqa: PLC0415 if indexer is None:
from core.services.search_indexers import ( # noqa: PLC0415 return
get_batch_accesses_by_users_and_teams,
get_document_indexer_class,
)
doc = models.Document.objects.get(pk=document_id) doc = models.Document.objects.get(pk=document_id)
indexer = get_document_indexer_class()()
accesses = get_batch_accesses_by_users_and_teams((doc.path,)) accesses = get_batch_accesses_by_users_and_teams((doc.path,))
data = indexer.serialize_document(document=doc, accesses=accesses) data = indexer.serialize_document(document=doc, accesses=accesses)
@@ -69,11 +70,11 @@ def trigger_document_indexer(document):
Args: Args:
document (Document): The document instance. document (Document): The document instance.
""" """
if document.deleted_at or document.ancestors_deleted_at: countdown = settings.SEARCH_INDEXER_COUNTDOWN
return
key = document_indexer_debounce_key(document.pk) # DO NOT create a task if indexation if disabled
countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1) if not settings.SEARCH_INDEXER_CLASS:
return
logger.info( logger.info(
"Add task for document %s indexation in %.2f seconds", "Add task for document %s indexation in %.2f seconds",
@@ -83,6 +84,6 @@ def trigger_document_indexer(document):
# Each time this method is called during the countdown, we increment the # Each time this method is called during the countdown, we increment the
# counter and each task decrease it, so the index be run only once. # counter and each task decrease it, so the index be run only once.
incr_counter(key) indexer_debounce_lock(document.pk)
document_indexer_task.apply_async(args=[document.pk], countdown=countdown) document_indexer_task.apply_async(args=[document.pk], countdown=countdown)

View File

@@ -5,7 +5,7 @@ Unit test for `index` command.
from operator import itemgetter from operator import itemgetter
from unittest import mock from unittest import mock
from django.core.management import call_command from django.core.management import CommandError, call_command
from django.db import transaction from django.db import transaction
import pytest import pytest
@@ -51,3 +51,15 @@ def test_index():
], ],
key=itemgetter("id"), key=itemgetter("id"),
) )
@pytest.mark.django_db
@pytest.mark.usefixtures("indexer_settings")
def test_index_improperly_configured(indexer_settings):
"""The command should raise an exception if the indexer is not configured"""
indexer_settings.SEARCH_INDEXER_CLASS = None
with pytest.raises(CommandError) as err:
call_command("index")
assert str(err.value) == "The indexer is not enabled or properly configured."

View File

@@ -34,12 +34,10 @@ def indexer_settings_fixture(settings):
# pylint: disable-next=import-outside-toplevel # pylint: disable-next=import-outside-toplevel
from core.services.search_indexers import ( # noqa: PLC0415 from core.services.search_indexers import ( # noqa: PLC0415
default_document_indexer, get_document_indexer,
get_document_indexer_class,
) )
default_document_indexer.cache_clear() get_document_indexer.cache_clear()
get_document_indexer_class.cache_clear()
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer" settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer"
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest" settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
@@ -51,5 +49,4 @@ def indexer_settings_fixture(settings):
yield settings yield settings
# clear cache to prevent issues with other tests # clear cache to prevent issues with other tests
default_document_indexer.cache_clear() get_document_indexer.cache_clear()
get_document_indexer_class.cache_clear()

View File

@@ -10,7 +10,7 @@ from faker import Faker
from rest_framework.test import APIClient from rest_framework.test import APIClient
from core import factories, models from core import factories, models
from core.services.search_indexers import default_document_indexer from core.services.search_indexers import get_document_indexer
fake = Faker() fake = Faker()
pytestmark = pytest.mark.django_db pytestmark = pytest.mark.django_db
@@ -54,7 +54,7 @@ def test_api_documents_search_endpoint_is_none(indexer_settings):
""" """
indexer_settings.SEARCH_INDEXER_QUERY_URL = None indexer_settings.SEARCH_INDEXER_QUERY_URL = None
assert default_document_indexer() is None assert get_document_indexer() is None
user = factories.UserFactory() user = factories.UserFactory()
document = factories.DocumentFactory(title="alpha") document = factories.DocumentFactory(title="alpha")
@@ -130,7 +130,7 @@ def test_api_documents_search_format(indexer_settings):
"""Validate the format of documents as returned by the search view.""" """Validate the format of documents as returned by the search view."""
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
assert default_document_indexer() is not None assert get_document_indexer() is not None
user = factories.UserFactory() user = factories.UserFactory()
@@ -193,7 +193,7 @@ def test_api_documents_search_pagination(indexer_settings):
"""Documents should be ordered by descending "updated_at" by default""" """Documents should be ordered by descending "updated_at" by default"""
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search" indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
assert default_document_indexer() is not None assert get_document_indexer() is not None
user = factories.UserFactory() user = factories.UserFactory()

View File

@@ -5,7 +5,6 @@ Unit tests for the Document model
import random import random
import smtplib import smtplib
import time
from logging import Logger from logging import Logger
from operator import itemgetter from operator import itemgetter
from unittest import mock from unittest import mock
@@ -23,7 +22,6 @@ import pytest
from core import factories, models from core import factories, models
from core.services.search_indexers import FindDocumentIndexer from core.services.search_indexers import FindDocumentIndexer
from core.tasks.find import document_indexer_debounce_key
pytestmark = pytest.mark.django_db pytestmark = pytest.mark.django_db
@@ -1630,6 +1628,48 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
"""Test indexation task on document creation""" """Test indexation task on document creation"""
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
with transaction.atomic():
doc1, doc2, doc3 = factories.DocumentFactory.create_batch(3)
accesses = {}
data = [call.args[0] for call in mock_push.call_args_list]
indexer = FindDocumentIndexer()
assert sorted(data, key=itemgetter("id")) == sorted(
[
indexer.serialize_document(doc1, accesses),
indexer.serialize_document(doc2, accesses),
indexer.serialize_document(doc3, accesses),
],
key=itemgetter("id"),
)
# The debounce counters should be reset
assert cache.get(f"doc-indexer-debounce-{doc1.pk}") == 0
assert cache.get(f"doc-indexer-debounce-{doc2.pk}") == 0
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
@mock.patch.object(FindDocumentIndexer, "push")
@pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings):
"""Task should not start an indexation when disabled"""
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
indexer_settings.SEARCH_INDEXER_CLASS = None
with transaction.atomic():
factories.DocumentFactory()
assert mock_push.call_args_list == []
@mock.patch.object(FindDocumentIndexer, "push")
@pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings):
"""Test indexation task on document creation"""
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
user = factories.UserFactory() user = factories.UserFactory()
with transaction.atomic(): with transaction.atomic():
@@ -1639,8 +1679,6 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
factories.UserDocumentAccessFactory(document=doc2, user=user) factories.UserDocumentAccessFactory(document=doc2, user=user)
factories.UserDocumentAccessFactory(document=doc3, user=user) factories.UserDocumentAccessFactory(document=doc3, user=user)
time.sleep(0.2) # waits for the end of the tasks
accesses = { accesses = {
str(doc1.path): {"users": [user.sub]}, str(doc1.path): {"users": [user.sub]},
str(doc2.path): {"users": [user.sub]}, str(doc2.path): {"users": [user.sub]},
@@ -1661,15 +1699,15 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
) )
# The debounce counters should be reset # The debounce counters should be reset
assert cache.get(document_indexer_debounce_key(doc1.pk)) == 0 assert cache.get(f"doc-indexer-debounce-{doc1.pk}") == 0
assert cache.get(document_indexer_debounce_key(doc2.pk)) == 0 assert cache.get(f"doc-indexer-debounce-{doc2.pk}") == 0
assert cache.get(document_indexer_debounce_key(doc3.pk)) == 0 assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(FindDocumentIndexer, "push")
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings): def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings):
"""Skip indexation task on deleted or ancestor_deleted documents""" """Indexation task on deleted or ancestor_deleted documents"""
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0 indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
user = factories.UserFactory() user = factories.UserFactory()
@@ -1694,8 +1732,6 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
assert doc_ancestor_deleted.deleted_at is None assert doc_ancestor_deleted.deleted_at is None
assert doc_ancestor_deleted.ancestors_deleted_at is not None assert doc_ancestor_deleted.ancestors_deleted_at is not None
time.sleep(0.2) # waits for the end of the tasks
accesses = { accesses = {
str(doc.path): {"users": [user.sub]}, str(doc.path): {"users": [user.sub]},
str(doc_deleted.path): {"users": [user.sub]}, str(doc_deleted.path): {"users": [user.sub]},
@@ -1706,17 +1742,21 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
indexer = FindDocumentIndexer() indexer = FindDocumentIndexer()
# Only the not deleted document is indexed # Even deleted document are re-indexed : only update their status in the future ?
assert data == [ assert sorted(data, key=itemgetter("id")) == sorted(
indexer.serialize_document(doc, accesses), [
] indexer.serialize_document(doc, accesses),
indexer.serialize_document(doc_deleted, accesses),
indexer.serialize_document(doc_ancestor_deleted, accesses),
indexer.serialize_document(doc_deleted, accesses), # soft_delete()
],
key=itemgetter("id"),
)
# The debounce counters should be reset # The debounce counters should be reset
assert cache.get(document_indexer_debounce_key(doc.pk)) == 0 assert cache.get(f"doc-indexer-debounce-{doc.pk}") == 0
assert cache.get(f"doc-indexer-debounce-{doc_deleted.pk}") == 0
# These caches are not filled assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0
assert cache.get(document_indexer_debounce_key(doc_deleted.pk)) is None
assert cache.get(document_indexer_debounce_key(doc_ancestor_deleted.pk)) is None
@mock.patch.object(FindDocumentIndexer, "push") @mock.patch.object(FindDocumentIndexer, "push")
@@ -1747,20 +1787,16 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
assert doc_ancestor_deleted.deleted_at is None assert doc_ancestor_deleted.deleted_at is None
assert doc_ancestor_deleted.ancestors_deleted_at is not None assert doc_ancestor_deleted.ancestors_deleted_at is not None
time.sleep(0.2) # waits for the end of the tasks doc_restored = models.Document.objects.get(pk=doc_deleted.pk)
doc_restored.restore()
doc_deleted.restore() doc_ancestor_restored = models.Document.objects.get(pk=doc_ancestor_deleted.pk)
doc_deleted.refresh_from_db() assert doc_restored.deleted_at is None
doc_ancestor_deleted.refresh_from_db() assert doc_restored.ancestors_deleted_at is None
assert doc_deleted.deleted_at is None assert doc_ancestor_restored.deleted_at is None
assert doc_deleted.ancestors_deleted_at is None assert doc_ancestor_restored.ancestors_deleted_at is None
assert doc_ancestor_deleted.deleted_at is None
assert doc_ancestor_deleted.ancestors_deleted_at is None
time.sleep(0.2)
accesses = { accesses = {
str(doc.path): {"users": [user.sub]}, str(doc.path): {"users": [user.sub]},
@@ -1777,7 +1813,9 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
[ [
indexer.serialize_document(doc, accesses), indexer.serialize_document(doc, accesses),
indexer.serialize_document(doc_deleted, accesses), indexer.serialize_document(doc_deleted, accesses),
# The restored document child is not saved so no indexation. indexer.serialize_document(doc_deleted, accesses), # soft_delete()
indexer.serialize_document(doc_restored, accesses), # restore()
indexer.serialize_document(doc_ancestor_deleted, accesses),
], ],
key=itemgetter("id"), key=itemgetter("id"),
) )
@@ -1800,31 +1838,25 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
str(doc.path): {"users": [user.sub]}, str(doc.path): {"users": [user.sub]},
} }
time.sleep(0.1) # waits for the end of the tasks
with mock.patch.object(FindDocumentIndexer, "push") as mock_push: with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
# Simulate 1 waiting task # Simulate 1 waiting task
cache.set(document_indexer_debounce_key(doc.pk), 1) cache.set(f"doc-indexer-debounce-{doc.pk}", 1)
# save doc to trigger the indexer, but nothing should be done since # save doc to trigger the indexer, but nothing should be done since
# the counter is over 0 # the counter is over 0
with transaction.atomic(): with transaction.atomic():
doc.save() doc.save()
time.sleep(0.1)
assert [call.args[0] for call in mock_push.call_args_list] == [] assert [call.args[0] for call in mock_push.call_args_list] == []
with mock.patch.object(FindDocumentIndexer, "push") as mock_push: with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
# No waiting task # No waiting task
cache.set(document_indexer_debounce_key(doc.pk), 0) cache.set(f"doc-indexer-debounce-{doc.pk}", 0)
with transaction.atomic(): with transaction.atomic():
doc = models.Document.objects.get(pk=doc.pk) doc = models.Document.objects.get(pk=doc.pk)
doc.save() doc.save()
time.sleep(0.1)
assert [call.args[0] for call in mock_push.call_args_list] == [ assert [call.args[0] for call in mock_push.call_args_list] == [
indexer.serialize_document(doc, accesses), indexer.serialize_document(doc, accesses),
] ]
@@ -1853,8 +1885,6 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
with transaction.atomic(): with transaction.atomic():
doc_access.save() doc_access.save()
time.sleep(0.1)
assert [call.args[0] for call in mock_push.call_args_list] == [ assert [call.args[0] for call in mock_push.call_args_list] == [
indexer.serialize_document(doc, accesses), indexer.serialize_document(doc, accesses),
] ]

View File

@@ -16,8 +16,7 @@ from core import factories, models, utils
from core.services.search_indexers import ( from core.services.search_indexers import (
BaseDocumentIndexer, BaseDocumentIndexer,
FindDocumentIndexer, FindDocumentIndexer,
default_document_indexer, get_document_indexer,
get_document_indexer_class,
get_visited_document_ids_of, get_visited_document_ids_of,
) )
@@ -37,41 +36,13 @@ class FakeDocumentIndexer(BaseDocumentIndexer):
return {} return {}
def test_services_search_indexer_class_is_empty(indexer_settings):
"""
Should raise ImproperlyConfigured if SEARCH_INDEXER_CLASS is None or empty.
"""
indexer_settings.SEARCH_INDEXER_CLASS = None
with pytest.raises(ImproperlyConfigured) as exc_info:
get_document_indexer_class()
assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value)
indexer_settings.SEARCH_INDEXER_CLASS = ""
# clear cache again
get_document_indexer_class.cache_clear()
with pytest.raises(ImproperlyConfigured) as exc_info:
get_document_indexer_class()
assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value)
def test_services_search_indexer_class_invalid(indexer_settings): def test_services_search_indexer_class_invalid(indexer_settings):
""" """
Should raise RuntimeError if SEARCH_INDEXER_CLASS cannot be imported. Should raise RuntimeError if SEARCH_INDEXER_CLASS cannot be imported.
""" """
indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown" indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown"
with pytest.raises(ImproperlyConfigured) as exc_info: assert get_document_indexer() is None
get_document_indexer_class()
assert (
"SEARCH_INDEXER_CLASS setting is not valid : No module named 'unknown'"
in str(exc_info.value)
)
def test_services_search_indexer_class(indexer_settings): def test_services_search_indexer_class(indexer_settings):
@@ -82,8 +53,9 @@ def test_services_search_indexer_class(indexer_settings):
"core.tests.test_services_search_indexers.FakeDocumentIndexer" "core.tests.test_services_search_indexers.FakeDocumentIndexer"
) )
assert get_document_indexer_class() == import_string( assert isinstance(
"core.tests.test_services_search_indexers.FakeDocumentIndexer" get_document_indexer(),
import_string("core.tests.test_services_search_indexers.FakeDocumentIndexer"),
) )
@@ -95,28 +67,28 @@ def test_services_search_indexer_is_configured(indexer_settings):
indexer_settings.SEARCH_INDEXER_CLASS = None indexer_settings.SEARCH_INDEXER_CLASS = None
# None # None
default_document_indexer.cache_clear() get_document_indexer.cache_clear()
assert not default_document_indexer() assert not get_document_indexer()
# Empty # Empty
indexer_settings.SEARCH_INDEXER_CLASS = "" indexer_settings.SEARCH_INDEXER_CLASS = ""
default_document_indexer.cache_clear() get_document_indexer.cache_clear()
assert not default_document_indexer() assert not get_document_indexer()
# Valid class # Valid class
indexer_settings.SEARCH_INDEXER_CLASS = ( indexer_settings.SEARCH_INDEXER_CLASS = (
"core.services.search_indexers.FindDocumentIndexer" "core.services.search_indexers.FindDocumentIndexer"
) )
default_document_indexer.cache_clear() get_document_indexer.cache_clear()
assert default_document_indexer() is not None assert get_document_indexer() is not None
indexer_settings.SEARCH_INDEXER_URL = "" indexer_settings.SEARCH_INDEXER_URL = ""
# Invalid url # Invalid url
default_document_indexer.cache_clear() get_document_indexer.cache_clear()
assert not default_document_indexer() assert not get_document_indexer()
def test_services_search_indexer_url_is_none(indexer_settings): def test_services_search_indexer_url_is_none(indexer_settings):

View File

@@ -111,6 +111,9 @@ class Base(Configuration):
SEARCH_INDEXER_URL = values.Value( SEARCH_INDEXER_URL = values.Value(
default=None, environ_name="SEARCH_INDEXER_URL", environ_prefix=None default=None, environ_name="SEARCH_INDEXER_URL", environ_prefix=None
) )
SEARCH_INDEXER_COUNTDOWN = values.IntegerValue(
default=1, environ_name="SEARCH_INDEXER_COUNTDOWN", environ_prefix=None
)
SEARCH_INDEXER_SECRET = values.Value( SEARCH_INDEXER_SECRET = values.Value(
default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None
) )