✨(backend) Index deleted documents
Add SEARCH_INDEXER_COUNTDOWN as configurable setting. Make the search backend creation simplier (only 'get_document_indexer' now). Allow indexation of deleted documents. Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
331a94ad2f
commit
a48f61e583
@@ -53,7 +53,7 @@ from core.services.converter_services import (
|
|||||||
YdocConverter,
|
YdocConverter,
|
||||||
)
|
)
|
||||||
from core.services.search_indexers import (
|
from core.services.search_indexers import (
|
||||||
default_document_indexer,
|
get_document_indexer,
|
||||||
get_visited_document_ids_of,
|
get_visited_document_ids_of,
|
||||||
)
|
)
|
||||||
from core.tasks.mail import send_ask_for_access_mail
|
from core.tasks.mail import send_ask_for_access_mail
|
||||||
@@ -1090,7 +1090,14 @@ class DocumentViewSet(
|
|||||||
def search(self, request, *args, **kwargs):
|
def search(self, request, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Returns a DRF response containing the filtered, annotated and ordered document list.
|
Returns a DRF response containing the filtered, annotated and ordered document list.
|
||||||
The filtering allows full text search through the opensearch indexation app "find".
|
|
||||||
|
Applies filtering based on request parameter 'q' from `FindDocumentSerializer`.
|
||||||
|
Depending of the configuration it can be:
|
||||||
|
- A fulltext search through the opensearch indexation app "find" if the backend is
|
||||||
|
enabled (see SEARCH_BACKEND_CLASS)
|
||||||
|
- A filtering by the model field 'title'.
|
||||||
|
|
||||||
|
The ordering is always by the most recent first.
|
||||||
"""
|
"""
|
||||||
access_token = request.session.get("oidc_access_token")
|
access_token = request.session.get("oidc_access_token")
|
||||||
user = request.user
|
user = request.user
|
||||||
@@ -1098,13 +1105,15 @@ class DocumentViewSet(
|
|||||||
serializer = serializers.FindDocumentSerializer(data=request.query_params)
|
serializer = serializers.FindDocumentSerializer(data=request.query_params)
|
||||||
serializer.is_valid(raise_exception=True)
|
serializer.is_valid(raise_exception=True)
|
||||||
|
|
||||||
indexer = default_document_indexer()
|
indexer = get_document_indexer()
|
||||||
|
text = serializer.validated_data["q"]
|
||||||
|
|
||||||
|
# The indexer is not configured, so we fallback on a simple filter on the
|
||||||
|
# model field 'title'.
|
||||||
if not indexer:
|
if not indexer:
|
||||||
|
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
|
||||||
queryset = self.get_queryset()
|
queryset = self.get_queryset()
|
||||||
filterset = DocumentFilter(
|
filterset = DocumentFilter({"title": text}, queryset=queryset)
|
||||||
{"title": serializer.validated_data.get("q", "")}, queryset=queryset
|
|
||||||
)
|
|
||||||
|
|
||||||
if not filterset.is_valid():
|
if not filterset.is_valid():
|
||||||
raise drf.exceptions.ValidationError(filterset.errors)
|
raise drf.exceptions.ValidationError(filterset.errors)
|
||||||
@@ -1119,15 +1128,17 @@ class DocumentViewSet(
|
|||||||
)
|
)
|
||||||
|
|
||||||
queryset = models.Document.objects.all()
|
queryset = models.Document.objects.all()
|
||||||
|
|
||||||
|
# Retrieve the documents ids from Find.
|
||||||
results = indexer.search(
|
results = indexer.search(
|
||||||
text=serializer.validated_data.get("q", ""),
|
text=text,
|
||||||
token=access_token,
|
token=access_token,
|
||||||
visited=get_visited_document_ids_of(queryset, user),
|
visited=get_visited_document_ids_of(queryset, user),
|
||||||
page=serializer.validated_data.get("page", 1),
|
page=serializer.validated_data.get("page", 1),
|
||||||
page_size=serializer.validated_data.get("page_size", 20),
|
page_size=serializer.validated_data.get("page_size", 20),
|
||||||
)
|
)
|
||||||
|
|
||||||
queryset = queryset.filter(pk__in=results)
|
queryset = queryset.filter(pk__in=results).order_by("-updated_at")
|
||||||
|
|
||||||
return self.get_response_for_queryset(
|
return self.get_response_for_queryset(
|
||||||
queryset,
|
queryset,
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ Handle search setup that needs to be done at bootstrap time.
|
|||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
|
||||||
from ...services.search_indexers import FindDocumentIndexer
|
from core.services.search_indexers import get_document_indexer
|
||||||
|
|
||||||
logger = logging.getLogger("docs.search.bootstrap_search")
|
logger = logging.getLogger("docs.search.bootstrap_search")
|
||||||
|
|
||||||
@@ -19,9 +19,18 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
"""Launch and log search index generation."""
|
"""Launch and log search index generation."""
|
||||||
|
indexer = get_document_indexer()
|
||||||
|
|
||||||
|
if not indexer:
|
||||||
|
raise CommandError("The indexer is not enabled or properly configured.")
|
||||||
|
|
||||||
logger.info("Starting to regenerate Find index...")
|
logger.info("Starting to regenerate Find index...")
|
||||||
start = time.perf_counter()
|
start = time.perf_counter()
|
||||||
count = FindDocumentIndexer().index()
|
|
||||||
|
try:
|
||||||
|
count = indexer.index()
|
||||||
|
except Exception as err:
|
||||||
|
raise CommandError("Unable to regenerate index") from err
|
||||||
|
|
||||||
duration = time.perf_counter() - start
|
duration = time.perf_counter() - start
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -19,37 +19,24 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
@cache
|
@cache
|
||||||
def default_document_indexer():
|
def get_document_indexer():
|
||||||
"""Returns default indexer service is enabled and properly configured."""
|
"""Returns an instance of indexer service if enabled and properly configured."""
|
||||||
|
classpath = settings.SEARCH_INDEXER_CLASS
|
||||||
|
|
||||||
# For this usecase an empty indexer class is not an issue but a feature.
|
# For this usecase an empty indexer class is not an issue but a feature.
|
||||||
if not getattr(settings, "SEARCH_INDEXER_CLASS", None):
|
if not classpath:
|
||||||
logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)")
|
logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return get_document_indexer_class()()
|
indexer_class = import_string(settings.SEARCH_INDEXER_CLASS)
|
||||||
|
return indexer_class()
|
||||||
|
except ImportError as err:
|
||||||
|
logger.error("SEARCH_INDEXER_CLASS setting is not valid : %s", err)
|
||||||
except ImproperlyConfigured as err:
|
except ImproperlyConfigured as err:
|
||||||
logger.error("Document indexer is not properly configured : %s", err)
|
logger.error("Document indexer is not properly configured : %s", err)
|
||||||
return None
|
|
||||||
|
|
||||||
|
return None
|
||||||
@cache
|
|
||||||
def get_document_indexer_class():
|
|
||||||
"""Return the indexer backend class based on the settings."""
|
|
||||||
classpath = settings.SEARCH_INDEXER_CLASS
|
|
||||||
|
|
||||||
if not classpath:
|
|
||||||
raise ImproperlyConfigured(
|
|
||||||
"SEARCH_INDEXER_CLASS must be set in Django settings."
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
return import_string(settings.SEARCH_INDEXER_CLASS)
|
|
||||||
except ImportError as err:
|
|
||||||
raise ImproperlyConfigured(
|
|
||||||
f"SEARCH_INDEXER_CLASS setting is not valid : {err}"
|
|
||||||
) from err
|
|
||||||
|
|
||||||
|
|
||||||
def get_batch_accesses_by_users_and_teams(paths):
|
def get_batch_accesses_by_users_and_teams(paths):
|
||||||
@@ -100,9 +87,11 @@ def get_visited_document_ids_of(queryset, user):
|
|||||||
ancestors_deleted_at__isnull=True,
|
ancestors_deleted_at__isnull=True,
|
||||||
)
|
)
|
||||||
.filter(pk__in=Subquery(qs.values("document_id")))
|
.filter(pk__in=Subquery(qs.values("document_id")))
|
||||||
|
.order_by("pk")
|
||||||
|
.distinct("pk")
|
||||||
)
|
)
|
||||||
|
|
||||||
return list({str(id) for id in docs.values_list("pk", flat=True)})
|
return [str(id) for id in docs.values_list("pk", flat=True)]
|
||||||
|
|
||||||
|
|
||||||
class BaseDocumentIndexer(ABC):
|
class BaseDocumentIndexer(ABC):
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ from django.db.models import signals
|
|||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
|
||||||
from . import models
|
from . import models
|
||||||
from .services.search_indexers import default_document_indexer
|
|
||||||
from .tasks.find import trigger_document_indexer
|
from .tasks.find import trigger_document_indexer
|
||||||
|
|
||||||
|
|
||||||
@@ -20,8 +19,7 @@ def document_post_save(sender, instance, **kwargs): # pylint: disable=unused-ar
|
|||||||
Note : Within the transaction we can have an empty content and a serialization
|
Note : Within the transaction we can have an empty content and a serialization
|
||||||
error.
|
error.
|
||||||
"""
|
"""
|
||||||
if default_document_indexer() is not None:
|
transaction.on_commit(partial(trigger_document_indexer, instance))
|
||||||
transaction.on_commit(partial(trigger_document_indexer, instance))
|
|
||||||
|
|
||||||
|
|
||||||
@receiver(signals.post_save, sender=models.DocumentAccess)
|
@receiver(signals.post_save, sender=models.DocumentAccess)
|
||||||
@@ -29,5 +27,5 @@ def document_access_post_save(sender, instance, created, **kwargs): # pylint: d
|
|||||||
"""
|
"""
|
||||||
Asynchronous call to the document indexer at the end of the transaction.
|
Asynchronous call to the document indexer at the end of the transaction.
|
||||||
"""
|
"""
|
||||||
if not created and default_document_indexer() is not None:
|
if not created:
|
||||||
transaction.on_commit(partial(trigger_document_indexer, instance.document))
|
transaction.on_commit(partial(trigger_document_indexer, instance.document))
|
||||||
|
|||||||
@@ -10,13 +10,10 @@ from impress.celery_app import app
|
|||||||
logger = getLogger(__file__)
|
logger = getLogger(__file__)
|
||||||
|
|
||||||
|
|
||||||
def document_indexer_debounce_key(document_id):
|
def indexer_debounce_lock(document_id):
|
||||||
"""Returns debounce cache key"""
|
|
||||||
return f"doc-indexer-debounce-{document_id}"
|
|
||||||
|
|
||||||
|
|
||||||
def incr_counter(key):
|
|
||||||
"""Increase or reset counter"""
|
"""Increase or reset counter"""
|
||||||
|
key = f"doc-indexer-debounce-{document_id}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return cache.incr(key)
|
return cache.incr(key)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -24,8 +21,10 @@ def incr_counter(key):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def decr_counter(key):
|
def indexer_debounce_release(document_id):
|
||||||
"""Decrease or reset counter"""
|
"""Decrease or reset counter"""
|
||||||
|
key = f"doc-indexer-debounce-{document_id}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return cache.decr(key)
|
return cache.decr(key)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -36,24 +35,26 @@ def decr_counter(key):
|
|||||||
@app.task
|
@app.task
|
||||||
def document_indexer_task(document_id):
|
def document_indexer_task(document_id):
|
||||||
"""Celery Task : Sends indexation query for a document."""
|
"""Celery Task : Sends indexation query for a document."""
|
||||||
key = document_indexer_debounce_key(document_id)
|
# Prevents some circular imports
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
from core import models # noqa : PLC0415
|
||||||
|
from core.services.search_indexers import ( # noqa : PLC0415
|
||||||
|
get_batch_accesses_by_users_and_teams,
|
||||||
|
get_document_indexer,
|
||||||
|
)
|
||||||
|
|
||||||
# check if the counter : if still up, skip the task. only the last one
|
# check if the counter : if still up, skip the task. only the last one
|
||||||
# within the countdown delay will do the query.
|
# within the countdown delay will do the query.
|
||||||
if decr_counter(key) > 0:
|
if indexer_debounce_release(document_id) > 0:
|
||||||
logger.info("Skip document %s indexation", document_id)
|
logger.info("Skip document %s indexation", document_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Prevents some circular imports
|
indexer = get_document_indexer()
|
||||||
# pylint: disable=import-outside-toplevel
|
|
||||||
from core import models # noqa: PLC0415
|
if indexer is None:
|
||||||
from core.services.search_indexers import ( # noqa: PLC0415
|
return
|
||||||
get_batch_accesses_by_users_and_teams,
|
|
||||||
get_document_indexer_class,
|
|
||||||
)
|
|
||||||
|
|
||||||
doc = models.Document.objects.get(pk=document_id)
|
doc = models.Document.objects.get(pk=document_id)
|
||||||
indexer = get_document_indexer_class()()
|
|
||||||
accesses = get_batch_accesses_by_users_and_teams((doc.path,))
|
accesses = get_batch_accesses_by_users_and_teams((doc.path,))
|
||||||
|
|
||||||
data = indexer.serialize_document(document=doc, accesses=accesses)
|
data = indexer.serialize_document(document=doc, accesses=accesses)
|
||||||
@@ -69,11 +70,11 @@ def trigger_document_indexer(document):
|
|||||||
Args:
|
Args:
|
||||||
document (Document): The document instance.
|
document (Document): The document instance.
|
||||||
"""
|
"""
|
||||||
if document.deleted_at or document.ancestors_deleted_at:
|
countdown = settings.SEARCH_INDEXER_COUNTDOWN
|
||||||
return
|
|
||||||
|
|
||||||
key = document_indexer_debounce_key(document.pk)
|
# DO NOT create a task if indexation if disabled
|
||||||
countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1)
|
if not settings.SEARCH_INDEXER_CLASS:
|
||||||
|
return
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Add task for document %s indexation in %.2f seconds",
|
"Add task for document %s indexation in %.2f seconds",
|
||||||
@@ -83,6 +84,6 @@ def trigger_document_indexer(document):
|
|||||||
|
|
||||||
# Each time this method is called during the countdown, we increment the
|
# Each time this method is called during the countdown, we increment the
|
||||||
# counter and each task decrease it, so the index be run only once.
|
# counter and each task decrease it, so the index be run only once.
|
||||||
incr_counter(key)
|
indexer_debounce_lock(document.pk)
|
||||||
|
|
||||||
document_indexer_task.apply_async(args=[document.pk], countdown=countdown)
|
document_indexer_task.apply_async(args=[document.pk], countdown=countdown)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Unit test for `index` command.
|
|||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from django.core.management import call_command
|
from django.core.management import CommandError, call_command
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -51,3 +51,15 @@ def test_index():
|
|||||||
],
|
],
|
||||||
key=itemgetter("id"),
|
key=itemgetter("id"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
@pytest.mark.usefixtures("indexer_settings")
|
||||||
|
def test_index_improperly_configured(indexer_settings):
|
||||||
|
"""The command should raise an exception if the indexer is not configured"""
|
||||||
|
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||||
|
|
||||||
|
with pytest.raises(CommandError) as err:
|
||||||
|
call_command("index")
|
||||||
|
|
||||||
|
assert str(err.value) == "The indexer is not enabled or properly configured."
|
||||||
|
|||||||
@@ -34,12 +34,10 @@ def indexer_settings_fixture(settings):
|
|||||||
|
|
||||||
# pylint: disable-next=import-outside-toplevel
|
# pylint: disable-next=import-outside-toplevel
|
||||||
from core.services.search_indexers import ( # noqa: PLC0415
|
from core.services.search_indexers import ( # noqa: PLC0415
|
||||||
default_document_indexer,
|
get_document_indexer,
|
||||||
get_document_indexer_class,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
default_document_indexer.cache_clear()
|
get_document_indexer.cache_clear()
|
||||||
get_document_indexer_class.cache_clear()
|
|
||||||
|
|
||||||
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer"
|
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer"
|
||||||
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
|
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
|
||||||
@@ -51,5 +49,4 @@ def indexer_settings_fixture(settings):
|
|||||||
yield settings
|
yield settings
|
||||||
|
|
||||||
# clear cache to prevent issues with other tests
|
# clear cache to prevent issues with other tests
|
||||||
default_document_indexer.cache_clear()
|
get_document_indexer.cache_clear()
|
||||||
get_document_indexer_class.cache_clear()
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from faker import Faker
|
|||||||
from rest_framework.test import APIClient
|
from rest_framework.test import APIClient
|
||||||
|
|
||||||
from core import factories, models
|
from core import factories, models
|
||||||
from core.services.search_indexers import default_document_indexer
|
from core.services.search_indexers import get_document_indexer
|
||||||
|
|
||||||
fake = Faker()
|
fake = Faker()
|
||||||
pytestmark = pytest.mark.django_db
|
pytestmark = pytest.mark.django_db
|
||||||
@@ -54,7 +54,7 @@ def test_api_documents_search_endpoint_is_none(indexer_settings):
|
|||||||
"""
|
"""
|
||||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
|
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
|
||||||
|
|
||||||
assert default_document_indexer() is None
|
assert get_document_indexer() is None
|
||||||
|
|
||||||
user = factories.UserFactory()
|
user = factories.UserFactory()
|
||||||
document = factories.DocumentFactory(title="alpha")
|
document = factories.DocumentFactory(title="alpha")
|
||||||
@@ -130,7 +130,7 @@ def test_api_documents_search_format(indexer_settings):
|
|||||||
"""Validate the format of documents as returned by the search view."""
|
"""Validate the format of documents as returned by the search view."""
|
||||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||||
|
|
||||||
assert default_document_indexer() is not None
|
assert get_document_indexer() is not None
|
||||||
|
|
||||||
user = factories.UserFactory()
|
user = factories.UserFactory()
|
||||||
|
|
||||||
@@ -193,7 +193,7 @@ def test_api_documents_search_pagination(indexer_settings):
|
|||||||
"""Documents should be ordered by descending "updated_at" by default"""
|
"""Documents should be ordered by descending "updated_at" by default"""
|
||||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||||
|
|
||||||
assert default_document_indexer() is not None
|
assert get_document_indexer() is not None
|
||||||
|
|
||||||
user = factories.UserFactory()
|
user = factories.UserFactory()
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ Unit tests for the Document model
|
|||||||
|
|
||||||
import random
|
import random
|
||||||
import smtplib
|
import smtplib
|
||||||
import time
|
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
@@ -23,7 +22,6 @@ import pytest
|
|||||||
|
|
||||||
from core import factories, models
|
from core import factories, models
|
||||||
from core.services.search_indexers import FindDocumentIndexer
|
from core.services.search_indexers import FindDocumentIndexer
|
||||||
from core.tasks.find import document_indexer_debounce_key
|
|
||||||
|
|
||||||
pytestmark = pytest.mark.django_db
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
@@ -1630,6 +1628,48 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
|
|||||||
"""Test indexation task on document creation"""
|
"""Test indexation task on document creation"""
|
||||||
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||||
|
|
||||||
|
with transaction.atomic():
|
||||||
|
doc1, doc2, doc3 = factories.DocumentFactory.create_batch(3)
|
||||||
|
|
||||||
|
accesses = {}
|
||||||
|
data = [call.args[0] for call in mock_push.call_args_list]
|
||||||
|
|
||||||
|
indexer = FindDocumentIndexer()
|
||||||
|
|
||||||
|
assert sorted(data, key=itemgetter("id")) == sorted(
|
||||||
|
[
|
||||||
|
indexer.serialize_document(doc1, accesses),
|
||||||
|
indexer.serialize_document(doc2, accesses),
|
||||||
|
indexer.serialize_document(doc3, accesses),
|
||||||
|
],
|
||||||
|
key=itemgetter("id"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The debounce counters should be reset
|
||||||
|
assert cache.get(f"doc-indexer-debounce-{doc1.pk}") == 0
|
||||||
|
assert cache.get(f"doc-indexer-debounce-{doc2.pk}") == 0
|
||||||
|
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
|
||||||
|
|
||||||
|
|
||||||
|
@mock.patch.object(FindDocumentIndexer, "push")
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings):
|
||||||
|
"""Task should not start an indexation when disabled"""
|
||||||
|
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||||
|
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||||
|
|
||||||
|
with transaction.atomic():
|
||||||
|
factories.DocumentFactory()
|
||||||
|
|
||||||
|
assert mock_push.call_args_list == []
|
||||||
|
|
||||||
|
|
||||||
|
@mock.patch.object(FindDocumentIndexer, "push")
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings):
|
||||||
|
"""Test indexation task on document creation"""
|
||||||
|
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||||
|
|
||||||
user = factories.UserFactory()
|
user = factories.UserFactory()
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
@@ -1639,8 +1679,6 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
|
|||||||
factories.UserDocumentAccessFactory(document=doc2, user=user)
|
factories.UserDocumentAccessFactory(document=doc2, user=user)
|
||||||
factories.UserDocumentAccessFactory(document=doc3, user=user)
|
factories.UserDocumentAccessFactory(document=doc3, user=user)
|
||||||
|
|
||||||
time.sleep(0.2) # waits for the end of the tasks
|
|
||||||
|
|
||||||
accesses = {
|
accesses = {
|
||||||
str(doc1.path): {"users": [user.sub]},
|
str(doc1.path): {"users": [user.sub]},
|
||||||
str(doc2.path): {"users": [user.sub]},
|
str(doc2.path): {"users": [user.sub]},
|
||||||
@@ -1661,15 +1699,15 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# The debounce counters should be reset
|
# The debounce counters should be reset
|
||||||
assert cache.get(document_indexer_debounce_key(doc1.pk)) == 0
|
assert cache.get(f"doc-indexer-debounce-{doc1.pk}") == 0
|
||||||
assert cache.get(document_indexer_debounce_key(doc2.pk)) == 0
|
assert cache.get(f"doc-indexer-debounce-{doc2.pk}") == 0
|
||||||
assert cache.get(document_indexer_debounce_key(doc3.pk)) == 0
|
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
|
||||||
|
|
||||||
|
|
||||||
@mock.patch.object(FindDocumentIndexer, "push")
|
@mock.patch.object(FindDocumentIndexer, "push")
|
||||||
@pytest.mark.django_db(transaction=True)
|
@pytest.mark.django_db(transaction=True)
|
||||||
def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings):
|
def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings):
|
||||||
"""Skip indexation task on deleted or ancestor_deleted documents"""
|
"""Indexation task on deleted or ancestor_deleted documents"""
|
||||||
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||||
|
|
||||||
user = factories.UserFactory()
|
user = factories.UserFactory()
|
||||||
@@ -1694,8 +1732,6 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
|
|||||||
assert doc_ancestor_deleted.deleted_at is None
|
assert doc_ancestor_deleted.deleted_at is None
|
||||||
assert doc_ancestor_deleted.ancestors_deleted_at is not None
|
assert doc_ancestor_deleted.ancestors_deleted_at is not None
|
||||||
|
|
||||||
time.sleep(0.2) # waits for the end of the tasks
|
|
||||||
|
|
||||||
accesses = {
|
accesses = {
|
||||||
str(doc.path): {"users": [user.sub]},
|
str(doc.path): {"users": [user.sub]},
|
||||||
str(doc_deleted.path): {"users": [user.sub]},
|
str(doc_deleted.path): {"users": [user.sub]},
|
||||||
@@ -1706,17 +1742,21 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
|
|||||||
|
|
||||||
indexer = FindDocumentIndexer()
|
indexer = FindDocumentIndexer()
|
||||||
|
|
||||||
# Only the not deleted document is indexed
|
# Even deleted document are re-indexed : only update their status in the future ?
|
||||||
assert data == [
|
assert sorted(data, key=itemgetter("id")) == sorted(
|
||||||
indexer.serialize_document(doc, accesses),
|
[
|
||||||
]
|
indexer.serialize_document(doc, accesses),
|
||||||
|
indexer.serialize_document(doc_deleted, accesses),
|
||||||
|
indexer.serialize_document(doc_ancestor_deleted, accesses),
|
||||||
|
indexer.serialize_document(doc_deleted, accesses), # soft_delete()
|
||||||
|
],
|
||||||
|
key=itemgetter("id"),
|
||||||
|
)
|
||||||
|
|
||||||
# The debounce counters should be reset
|
# The debounce counters should be reset
|
||||||
assert cache.get(document_indexer_debounce_key(doc.pk)) == 0
|
assert cache.get(f"doc-indexer-debounce-{doc.pk}") == 0
|
||||||
|
assert cache.get(f"doc-indexer-debounce-{doc_deleted.pk}") == 0
|
||||||
# These caches are not filled
|
assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0
|
||||||
assert cache.get(document_indexer_debounce_key(doc_deleted.pk)) is None
|
|
||||||
assert cache.get(document_indexer_debounce_key(doc_ancestor_deleted.pk)) is None
|
|
||||||
|
|
||||||
|
|
||||||
@mock.patch.object(FindDocumentIndexer, "push")
|
@mock.patch.object(FindDocumentIndexer, "push")
|
||||||
@@ -1747,20 +1787,16 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
|
|||||||
assert doc_ancestor_deleted.deleted_at is None
|
assert doc_ancestor_deleted.deleted_at is None
|
||||||
assert doc_ancestor_deleted.ancestors_deleted_at is not None
|
assert doc_ancestor_deleted.ancestors_deleted_at is not None
|
||||||
|
|
||||||
time.sleep(0.2) # waits for the end of the tasks
|
doc_restored = models.Document.objects.get(pk=doc_deleted.pk)
|
||||||
|
doc_restored.restore()
|
||||||
|
|
||||||
doc_deleted.restore()
|
doc_ancestor_restored = models.Document.objects.get(pk=doc_ancestor_deleted.pk)
|
||||||
|
|
||||||
doc_deleted.refresh_from_db()
|
assert doc_restored.deleted_at is None
|
||||||
doc_ancestor_deleted.refresh_from_db()
|
assert doc_restored.ancestors_deleted_at is None
|
||||||
|
|
||||||
assert doc_deleted.deleted_at is None
|
assert doc_ancestor_restored.deleted_at is None
|
||||||
assert doc_deleted.ancestors_deleted_at is None
|
assert doc_ancestor_restored.ancestors_deleted_at is None
|
||||||
|
|
||||||
assert doc_ancestor_deleted.deleted_at is None
|
|
||||||
assert doc_ancestor_deleted.ancestors_deleted_at is None
|
|
||||||
|
|
||||||
time.sleep(0.2)
|
|
||||||
|
|
||||||
accesses = {
|
accesses = {
|
||||||
str(doc.path): {"users": [user.sub]},
|
str(doc.path): {"users": [user.sub]},
|
||||||
@@ -1777,7 +1813,9 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
|
|||||||
[
|
[
|
||||||
indexer.serialize_document(doc, accesses),
|
indexer.serialize_document(doc, accesses),
|
||||||
indexer.serialize_document(doc_deleted, accesses),
|
indexer.serialize_document(doc_deleted, accesses),
|
||||||
# The restored document child is not saved so no indexation.
|
indexer.serialize_document(doc_deleted, accesses), # soft_delete()
|
||||||
|
indexer.serialize_document(doc_restored, accesses), # restore()
|
||||||
|
indexer.serialize_document(doc_ancestor_deleted, accesses),
|
||||||
],
|
],
|
||||||
key=itemgetter("id"),
|
key=itemgetter("id"),
|
||||||
)
|
)
|
||||||
@@ -1800,31 +1838,25 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
|
|||||||
str(doc.path): {"users": [user.sub]},
|
str(doc.path): {"users": [user.sub]},
|
||||||
}
|
}
|
||||||
|
|
||||||
time.sleep(0.1) # waits for the end of the tasks
|
|
||||||
|
|
||||||
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
|
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
|
||||||
# Simulate 1 waiting task
|
# Simulate 1 waiting task
|
||||||
cache.set(document_indexer_debounce_key(doc.pk), 1)
|
cache.set(f"doc-indexer-debounce-{doc.pk}", 1)
|
||||||
|
|
||||||
# save doc to trigger the indexer, but nothing should be done since
|
# save doc to trigger the indexer, but nothing should be done since
|
||||||
# the counter is over 0
|
# the counter is over 0
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
doc.save()
|
doc.save()
|
||||||
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
assert [call.args[0] for call in mock_push.call_args_list] == []
|
assert [call.args[0] for call in mock_push.call_args_list] == []
|
||||||
|
|
||||||
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
|
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
|
||||||
# No waiting task
|
# No waiting task
|
||||||
cache.set(document_indexer_debounce_key(doc.pk), 0)
|
cache.set(f"doc-indexer-debounce-{doc.pk}", 0)
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
doc = models.Document.objects.get(pk=doc.pk)
|
doc = models.Document.objects.get(pk=doc.pk)
|
||||||
doc.save()
|
doc.save()
|
||||||
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
assert [call.args[0] for call in mock_push.call_args_list] == [
|
assert [call.args[0] for call in mock_push.call_args_list] == [
|
||||||
indexer.serialize_document(doc, accesses),
|
indexer.serialize_document(doc, accesses),
|
||||||
]
|
]
|
||||||
@@ -1853,8 +1885,6 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
doc_access.save()
|
doc_access.save()
|
||||||
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
assert [call.args[0] for call in mock_push.call_args_list] == [
|
assert [call.args[0] for call in mock_push.call_args_list] == [
|
||||||
indexer.serialize_document(doc, accesses),
|
indexer.serialize_document(doc, accesses),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -16,8 +16,7 @@ from core import factories, models, utils
|
|||||||
from core.services.search_indexers import (
|
from core.services.search_indexers import (
|
||||||
BaseDocumentIndexer,
|
BaseDocumentIndexer,
|
||||||
FindDocumentIndexer,
|
FindDocumentIndexer,
|
||||||
default_document_indexer,
|
get_document_indexer,
|
||||||
get_document_indexer_class,
|
|
||||||
get_visited_document_ids_of,
|
get_visited_document_ids_of,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -37,41 +36,13 @@ class FakeDocumentIndexer(BaseDocumentIndexer):
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def test_services_search_indexer_class_is_empty(indexer_settings):
|
|
||||||
"""
|
|
||||||
Should raise ImproperlyConfigured if SEARCH_INDEXER_CLASS is None or empty.
|
|
||||||
"""
|
|
||||||
indexer_settings.SEARCH_INDEXER_CLASS = None
|
|
||||||
|
|
||||||
with pytest.raises(ImproperlyConfigured) as exc_info:
|
|
||||||
get_document_indexer_class()
|
|
||||||
|
|
||||||
assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value)
|
|
||||||
|
|
||||||
indexer_settings.SEARCH_INDEXER_CLASS = ""
|
|
||||||
|
|
||||||
# clear cache again
|
|
||||||
get_document_indexer_class.cache_clear()
|
|
||||||
|
|
||||||
with pytest.raises(ImproperlyConfigured) as exc_info:
|
|
||||||
get_document_indexer_class()
|
|
||||||
|
|
||||||
assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value)
|
|
||||||
|
|
||||||
|
|
||||||
def test_services_search_indexer_class_invalid(indexer_settings):
|
def test_services_search_indexer_class_invalid(indexer_settings):
|
||||||
"""
|
"""
|
||||||
Should raise RuntimeError if SEARCH_INDEXER_CLASS cannot be imported.
|
Should raise RuntimeError if SEARCH_INDEXER_CLASS cannot be imported.
|
||||||
"""
|
"""
|
||||||
indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown"
|
indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown"
|
||||||
|
|
||||||
with pytest.raises(ImproperlyConfigured) as exc_info:
|
assert get_document_indexer() is None
|
||||||
get_document_indexer_class()
|
|
||||||
|
|
||||||
assert (
|
|
||||||
"SEARCH_INDEXER_CLASS setting is not valid : No module named 'unknown'"
|
|
||||||
in str(exc_info.value)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_services_search_indexer_class(indexer_settings):
|
def test_services_search_indexer_class(indexer_settings):
|
||||||
@@ -82,8 +53,9 @@ def test_services_search_indexer_class(indexer_settings):
|
|||||||
"core.tests.test_services_search_indexers.FakeDocumentIndexer"
|
"core.tests.test_services_search_indexers.FakeDocumentIndexer"
|
||||||
)
|
)
|
||||||
|
|
||||||
assert get_document_indexer_class() == import_string(
|
assert isinstance(
|
||||||
"core.tests.test_services_search_indexers.FakeDocumentIndexer"
|
get_document_indexer(),
|
||||||
|
import_string("core.tests.test_services_search_indexers.FakeDocumentIndexer"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -95,28 +67,28 @@ def test_services_search_indexer_is_configured(indexer_settings):
|
|||||||
indexer_settings.SEARCH_INDEXER_CLASS = None
|
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||||
|
|
||||||
# None
|
# None
|
||||||
default_document_indexer.cache_clear()
|
get_document_indexer.cache_clear()
|
||||||
assert not default_document_indexer()
|
assert not get_document_indexer()
|
||||||
|
|
||||||
# Empty
|
# Empty
|
||||||
indexer_settings.SEARCH_INDEXER_CLASS = ""
|
indexer_settings.SEARCH_INDEXER_CLASS = ""
|
||||||
|
|
||||||
default_document_indexer.cache_clear()
|
get_document_indexer.cache_clear()
|
||||||
assert not default_document_indexer()
|
assert not get_document_indexer()
|
||||||
|
|
||||||
# Valid class
|
# Valid class
|
||||||
indexer_settings.SEARCH_INDEXER_CLASS = (
|
indexer_settings.SEARCH_INDEXER_CLASS = (
|
||||||
"core.services.search_indexers.FindDocumentIndexer"
|
"core.services.search_indexers.FindDocumentIndexer"
|
||||||
)
|
)
|
||||||
|
|
||||||
default_document_indexer.cache_clear()
|
get_document_indexer.cache_clear()
|
||||||
assert default_document_indexer() is not None
|
assert get_document_indexer() is not None
|
||||||
|
|
||||||
indexer_settings.SEARCH_INDEXER_URL = ""
|
indexer_settings.SEARCH_INDEXER_URL = ""
|
||||||
|
|
||||||
# Invalid url
|
# Invalid url
|
||||||
default_document_indexer.cache_clear()
|
get_document_indexer.cache_clear()
|
||||||
assert not default_document_indexer()
|
assert not get_document_indexer()
|
||||||
|
|
||||||
|
|
||||||
def test_services_search_indexer_url_is_none(indexer_settings):
|
def test_services_search_indexer_url_is_none(indexer_settings):
|
||||||
|
|||||||
@@ -111,6 +111,9 @@ class Base(Configuration):
|
|||||||
SEARCH_INDEXER_URL = values.Value(
|
SEARCH_INDEXER_URL = values.Value(
|
||||||
default=None, environ_name="SEARCH_INDEXER_URL", environ_prefix=None
|
default=None, environ_name="SEARCH_INDEXER_URL", environ_prefix=None
|
||||||
)
|
)
|
||||||
|
SEARCH_INDEXER_COUNTDOWN = values.IntegerValue(
|
||||||
|
default=1, environ_name="SEARCH_INDEXER_COUNTDOWN", environ_prefix=None
|
||||||
|
)
|
||||||
SEARCH_INDEXER_SECRET = values.Value(
|
SEARCH_INDEXER_SECRET = values.Value(
|
||||||
default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None
|
default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user