✨(backend) Index deleted documents
Add SEARCH_INDEXER_COUNTDOWN as configurable setting. Make the search backend creation simplier (only 'get_document_indexer' now). Allow indexation of deleted documents. Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
331a94ad2f
commit
a48f61e583
@@ -5,7 +5,7 @@ Unit test for `index` command.
|
||||
from operator import itemgetter
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.core.management import CommandError, call_command
|
||||
from django.db import transaction
|
||||
|
||||
import pytest
|
||||
@@ -51,3 +51,15 @@ def test_index():
|
||||
],
|
||||
key=itemgetter("id"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.mark.usefixtures("indexer_settings")
|
||||
def test_index_improperly_configured(indexer_settings):
|
||||
"""The command should raise an exception if the indexer is not configured"""
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||
|
||||
with pytest.raises(CommandError) as err:
|
||||
call_command("index")
|
||||
|
||||
assert str(err.value) == "The indexer is not enabled or properly configured."
|
||||
|
||||
@@ -34,12 +34,10 @@ def indexer_settings_fixture(settings):
|
||||
|
||||
# pylint: disable-next=import-outside-toplevel
|
||||
from core.services.search_indexers import ( # noqa: PLC0415
|
||||
default_document_indexer,
|
||||
get_document_indexer_class,
|
||||
get_document_indexer,
|
||||
)
|
||||
|
||||
default_document_indexer.cache_clear()
|
||||
get_document_indexer_class.cache_clear()
|
||||
get_document_indexer.cache_clear()
|
||||
|
||||
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer"
|
||||
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
|
||||
@@ -51,5 +49,4 @@ def indexer_settings_fixture(settings):
|
||||
yield settings
|
||||
|
||||
# clear cache to prevent issues with other tests
|
||||
default_document_indexer.cache_clear()
|
||||
get_document_indexer_class.cache_clear()
|
||||
get_document_indexer.cache_clear()
|
||||
|
||||
@@ -10,7 +10,7 @@ from faker import Faker
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories, models
|
||||
from core.services.search_indexers import default_document_indexer
|
||||
from core.services.search_indexers import get_document_indexer
|
||||
|
||||
fake = Faker()
|
||||
pytestmark = pytest.mark.django_db
|
||||
@@ -54,7 +54,7 @@ def test_api_documents_search_endpoint_is_none(indexer_settings):
|
||||
"""
|
||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
|
||||
|
||||
assert default_document_indexer() is None
|
||||
assert get_document_indexer() is None
|
||||
|
||||
user = factories.UserFactory()
|
||||
document = factories.DocumentFactory(title="alpha")
|
||||
@@ -130,7 +130,7 @@ def test_api_documents_search_format(indexer_settings):
|
||||
"""Validate the format of documents as returned by the search view."""
|
||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||
|
||||
assert default_document_indexer() is not None
|
||||
assert get_document_indexer() is not None
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
@@ -193,7 +193,7 @@ def test_api_documents_search_pagination(indexer_settings):
|
||||
"""Documents should be ordered by descending "updated_at" by default"""
|
||||
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||
|
||||
assert default_document_indexer() is not None
|
||||
assert get_document_indexer() is not None
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ Unit tests for the Document model
|
||||
|
||||
import random
|
||||
import smtplib
|
||||
import time
|
||||
from logging import Logger
|
||||
from operator import itemgetter
|
||||
from unittest import mock
|
||||
@@ -23,7 +22,6 @@ import pytest
|
||||
|
||||
from core import factories, models
|
||||
from core.services.search_indexers import FindDocumentIndexer
|
||||
from core.tasks.find import document_indexer_debounce_key
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
@@ -1630,6 +1628,48 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
|
||||
"""Test indexation task on document creation"""
|
||||
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||
|
||||
with transaction.atomic():
|
||||
doc1, doc2, doc3 = factories.DocumentFactory.create_batch(3)
|
||||
|
||||
accesses = {}
|
||||
data = [call.args[0] for call in mock_push.call_args_list]
|
||||
|
||||
indexer = FindDocumentIndexer()
|
||||
|
||||
assert sorted(data, key=itemgetter("id")) == sorted(
|
||||
[
|
||||
indexer.serialize_document(doc1, accesses),
|
||||
indexer.serialize_document(doc2, accesses),
|
||||
indexer.serialize_document(doc3, accesses),
|
||||
],
|
||||
key=itemgetter("id"),
|
||||
)
|
||||
|
||||
# The debounce counters should be reset
|
||||
assert cache.get(f"doc-indexer-debounce-{doc1.pk}") == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc2.pk}") == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
|
||||
|
||||
|
||||
@mock.patch.object(FindDocumentIndexer, "push")
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_models_documents_post_save_indexer_not_configured(mock_push, indexer_settings):
|
||||
"""Task should not start an indexation when disabled"""
|
||||
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||
|
||||
with transaction.atomic():
|
||||
factories.DocumentFactory()
|
||||
|
||||
assert mock_push.call_args_list == []
|
||||
|
||||
|
||||
@mock.patch.object(FindDocumentIndexer, "push")
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_models_documents_post_save_indexer_with_accesses(mock_push, indexer_settings):
|
||||
"""Test indexation task on document creation"""
|
||||
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
with transaction.atomic():
|
||||
@@ -1639,8 +1679,6 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
|
||||
factories.UserDocumentAccessFactory(document=doc2, user=user)
|
||||
factories.UserDocumentAccessFactory(document=doc3, user=user)
|
||||
|
||||
time.sleep(0.2) # waits for the end of the tasks
|
||||
|
||||
accesses = {
|
||||
str(doc1.path): {"users": [user.sub]},
|
||||
str(doc2.path): {"users": [user.sub]},
|
||||
@@ -1661,15 +1699,15 @@ def test_models_documents_post_save_indexer(mock_push, indexer_settings):
|
||||
)
|
||||
|
||||
# The debounce counters should be reset
|
||||
assert cache.get(document_indexer_debounce_key(doc1.pk)) == 0
|
||||
assert cache.get(document_indexer_debounce_key(doc2.pk)) == 0
|
||||
assert cache.get(document_indexer_debounce_key(doc3.pk)) == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc1.pk}") == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc2.pk}") == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc3.pk}") == 0
|
||||
|
||||
|
||||
@mock.patch.object(FindDocumentIndexer, "push")
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings):
|
||||
"""Skip indexation task on deleted or ancestor_deleted documents"""
|
||||
"""Indexation task on deleted or ancestor_deleted documents"""
|
||||
indexer_settings.SEARCH_INDEXER_COUNTDOWN = 0
|
||||
|
||||
user = factories.UserFactory()
|
||||
@@ -1694,8 +1732,6 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
|
||||
assert doc_ancestor_deleted.deleted_at is None
|
||||
assert doc_ancestor_deleted.ancestors_deleted_at is not None
|
||||
|
||||
time.sleep(0.2) # waits for the end of the tasks
|
||||
|
||||
accesses = {
|
||||
str(doc.path): {"users": [user.sub]},
|
||||
str(doc_deleted.path): {"users": [user.sub]},
|
||||
@@ -1706,17 +1742,21 @@ def test_models_documents_post_save_indexer_deleted(mock_push, indexer_settings)
|
||||
|
||||
indexer = FindDocumentIndexer()
|
||||
|
||||
# Only the not deleted document is indexed
|
||||
assert data == [
|
||||
indexer.serialize_document(doc, accesses),
|
||||
]
|
||||
# Even deleted document are re-indexed : only update their status in the future ?
|
||||
assert sorted(data, key=itemgetter("id")) == sorted(
|
||||
[
|
||||
indexer.serialize_document(doc, accesses),
|
||||
indexer.serialize_document(doc_deleted, accesses),
|
||||
indexer.serialize_document(doc_ancestor_deleted, accesses),
|
||||
indexer.serialize_document(doc_deleted, accesses), # soft_delete()
|
||||
],
|
||||
key=itemgetter("id"),
|
||||
)
|
||||
|
||||
# The debounce counters should be reset
|
||||
assert cache.get(document_indexer_debounce_key(doc.pk)) == 0
|
||||
|
||||
# These caches are not filled
|
||||
assert cache.get(document_indexer_debounce_key(doc_deleted.pk)) is None
|
||||
assert cache.get(document_indexer_debounce_key(doc_ancestor_deleted.pk)) is None
|
||||
assert cache.get(f"doc-indexer-debounce-{doc.pk}") == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc_deleted.pk}") == 0
|
||||
assert cache.get(f"doc-indexer-debounce-{doc_ancestor_deleted.pk}") == 0
|
||||
|
||||
|
||||
@mock.patch.object(FindDocumentIndexer, "push")
|
||||
@@ -1747,20 +1787,16 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
|
||||
assert doc_ancestor_deleted.deleted_at is None
|
||||
assert doc_ancestor_deleted.ancestors_deleted_at is not None
|
||||
|
||||
time.sleep(0.2) # waits for the end of the tasks
|
||||
doc_restored = models.Document.objects.get(pk=doc_deleted.pk)
|
||||
doc_restored.restore()
|
||||
|
||||
doc_deleted.restore()
|
||||
doc_ancestor_restored = models.Document.objects.get(pk=doc_ancestor_deleted.pk)
|
||||
|
||||
doc_deleted.refresh_from_db()
|
||||
doc_ancestor_deleted.refresh_from_db()
|
||||
assert doc_restored.deleted_at is None
|
||||
assert doc_restored.ancestors_deleted_at is None
|
||||
|
||||
assert doc_deleted.deleted_at is None
|
||||
assert doc_deleted.ancestors_deleted_at is None
|
||||
|
||||
assert doc_ancestor_deleted.deleted_at is None
|
||||
assert doc_ancestor_deleted.ancestors_deleted_at is None
|
||||
|
||||
time.sleep(0.2)
|
||||
assert doc_ancestor_restored.deleted_at is None
|
||||
assert doc_ancestor_restored.ancestors_deleted_at is None
|
||||
|
||||
accesses = {
|
||||
str(doc.path): {"users": [user.sub]},
|
||||
@@ -1777,7 +1813,9 @@ def test_models_documents_post_save_indexer_restored(mock_push, indexer_settings
|
||||
[
|
||||
indexer.serialize_document(doc, accesses),
|
||||
indexer.serialize_document(doc_deleted, accesses),
|
||||
# The restored document child is not saved so no indexation.
|
||||
indexer.serialize_document(doc_deleted, accesses), # soft_delete()
|
||||
indexer.serialize_document(doc_restored, accesses), # restore()
|
||||
indexer.serialize_document(doc_ancestor_deleted, accesses),
|
||||
],
|
||||
key=itemgetter("id"),
|
||||
)
|
||||
@@ -1800,31 +1838,25 @@ def test_models_documents_post_save_indexer_debounce(indexer_settings):
|
||||
str(doc.path): {"users": [user.sub]},
|
||||
}
|
||||
|
||||
time.sleep(0.1) # waits for the end of the tasks
|
||||
|
||||
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
|
||||
# Simulate 1 waiting task
|
||||
cache.set(document_indexer_debounce_key(doc.pk), 1)
|
||||
cache.set(f"doc-indexer-debounce-{doc.pk}", 1)
|
||||
|
||||
# save doc to trigger the indexer, but nothing should be done since
|
||||
# the counter is over 0
|
||||
with transaction.atomic():
|
||||
doc.save()
|
||||
|
||||
time.sleep(0.1)
|
||||
|
||||
assert [call.args[0] for call in mock_push.call_args_list] == []
|
||||
|
||||
with mock.patch.object(FindDocumentIndexer, "push") as mock_push:
|
||||
# No waiting task
|
||||
cache.set(document_indexer_debounce_key(doc.pk), 0)
|
||||
cache.set(f"doc-indexer-debounce-{doc.pk}", 0)
|
||||
|
||||
with transaction.atomic():
|
||||
doc = models.Document.objects.get(pk=doc.pk)
|
||||
doc.save()
|
||||
|
||||
time.sleep(0.1)
|
||||
|
||||
assert [call.args[0] for call in mock_push.call_args_list] == [
|
||||
indexer.serialize_document(doc, accesses),
|
||||
]
|
||||
@@ -1853,8 +1885,6 @@ def test_models_documents_access_post_save_indexer(indexer_settings):
|
||||
with transaction.atomic():
|
||||
doc_access.save()
|
||||
|
||||
time.sleep(0.1)
|
||||
|
||||
assert [call.args[0] for call in mock_push.call_args_list] == [
|
||||
indexer.serialize_document(doc, accesses),
|
||||
]
|
||||
|
||||
@@ -16,8 +16,7 @@ from core import factories, models, utils
|
||||
from core.services.search_indexers import (
|
||||
BaseDocumentIndexer,
|
||||
FindDocumentIndexer,
|
||||
default_document_indexer,
|
||||
get_document_indexer_class,
|
||||
get_document_indexer,
|
||||
get_visited_document_ids_of,
|
||||
)
|
||||
|
||||
@@ -37,41 +36,13 @@ class FakeDocumentIndexer(BaseDocumentIndexer):
|
||||
return {}
|
||||
|
||||
|
||||
def test_services_search_indexer_class_is_empty(indexer_settings):
|
||||
"""
|
||||
Should raise ImproperlyConfigured if SEARCH_INDEXER_CLASS is None or empty.
|
||||
"""
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||
|
||||
with pytest.raises(ImproperlyConfigured) as exc_info:
|
||||
get_document_indexer_class()
|
||||
|
||||
assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value)
|
||||
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = ""
|
||||
|
||||
# clear cache again
|
||||
get_document_indexer_class.cache_clear()
|
||||
|
||||
with pytest.raises(ImproperlyConfigured) as exc_info:
|
||||
get_document_indexer_class()
|
||||
|
||||
assert "SEARCH_INDEXER_CLASS must be set in Django settings." in str(exc_info.value)
|
||||
|
||||
|
||||
def test_services_search_indexer_class_invalid(indexer_settings):
|
||||
"""
|
||||
Should raise RuntimeError if SEARCH_INDEXER_CLASS cannot be imported.
|
||||
"""
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = "unknown.Unknown"
|
||||
|
||||
with pytest.raises(ImproperlyConfigured) as exc_info:
|
||||
get_document_indexer_class()
|
||||
|
||||
assert (
|
||||
"SEARCH_INDEXER_CLASS setting is not valid : No module named 'unknown'"
|
||||
in str(exc_info.value)
|
||||
)
|
||||
assert get_document_indexer() is None
|
||||
|
||||
|
||||
def test_services_search_indexer_class(indexer_settings):
|
||||
@@ -82,8 +53,9 @@ def test_services_search_indexer_class(indexer_settings):
|
||||
"core.tests.test_services_search_indexers.FakeDocumentIndexer"
|
||||
)
|
||||
|
||||
assert get_document_indexer_class() == import_string(
|
||||
"core.tests.test_services_search_indexers.FakeDocumentIndexer"
|
||||
assert isinstance(
|
||||
get_document_indexer(),
|
||||
import_string("core.tests.test_services_search_indexers.FakeDocumentIndexer"),
|
||||
)
|
||||
|
||||
|
||||
@@ -95,28 +67,28 @@ def test_services_search_indexer_is_configured(indexer_settings):
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = None
|
||||
|
||||
# None
|
||||
default_document_indexer.cache_clear()
|
||||
assert not default_document_indexer()
|
||||
get_document_indexer.cache_clear()
|
||||
assert not get_document_indexer()
|
||||
|
||||
# Empty
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = ""
|
||||
|
||||
default_document_indexer.cache_clear()
|
||||
assert not default_document_indexer()
|
||||
get_document_indexer.cache_clear()
|
||||
assert not get_document_indexer()
|
||||
|
||||
# Valid class
|
||||
indexer_settings.SEARCH_INDEXER_CLASS = (
|
||||
"core.services.search_indexers.FindDocumentIndexer"
|
||||
)
|
||||
|
||||
default_document_indexer.cache_clear()
|
||||
assert default_document_indexer() is not None
|
||||
get_document_indexer.cache_clear()
|
||||
assert get_document_indexer() is not None
|
||||
|
||||
indexer_settings.SEARCH_INDEXER_URL = ""
|
||||
|
||||
# Invalid url
|
||||
default_document_indexer.cache_clear()
|
||||
assert not default_document_indexer()
|
||||
get_document_indexer.cache_clear()
|
||||
assert not get_document_indexer()
|
||||
|
||||
|
||||
def test_services_search_indexer_url_is_none(indexer_settings):
|
||||
|
||||
Reference in New Issue
Block a user