✨(backend) Index partially empty documents
Only documents without title and content are ignored by indexer.
This commit is contained in:
committed by
Quentin BEY
parent
01c31ddd74
commit
331a94ad2f
@@ -21,8 +21,11 @@ class Command(BaseCommand):
|
||||
"""Launch and log search index generation."""
|
||||
logger.info("Starting to regenerate Find index...")
|
||||
start = time.perf_counter()
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
count = FindDocumentIndexer().index()
|
||||
|
||||
duration = time.perf_counter() - start
|
||||
logger.info("Search index regenerated in %.2f seconds.", duration)
|
||||
logger.info(
|
||||
"Search index regenerated from %d document(s) in %.2f seconds.",
|
||||
count,
|
||||
duration,
|
||||
)
|
||||
|
||||
@@ -146,6 +146,8 @@ class BaseDocumentIndexer(ABC):
|
||||
Fetch documents in batches, serialize them, and push to the search backend.
|
||||
"""
|
||||
last_id = 0
|
||||
count = 0
|
||||
|
||||
while True:
|
||||
documents_batch = list(
|
||||
models.Document.objects.filter(
|
||||
@@ -163,9 +165,13 @@ class BaseDocumentIndexer(ABC):
|
||||
serialized_batch = [
|
||||
self.serialize_document(document, accesses_by_document_path)
|
||||
for document in documents_batch
|
||||
if document.content
|
||||
if document.content or document.title
|
||||
]
|
||||
|
||||
self.push(serialized_batch)
|
||||
count += len(serialized_batch)
|
||||
|
||||
return count
|
||||
|
||||
@abstractmethod
|
||||
def serialize_document(self, document, accesses):
|
||||
|
||||
@@ -304,7 +304,7 @@ def test_services_search_indexers_batches_pass_only_batch_accesses(
|
||||
access = factories.UserDocumentAccessFactory(document=document)
|
||||
expected_user_subs[str(document.id)] = str(access.user.sub)
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
assert FindDocumentIndexer().index() == 5
|
||||
|
||||
# Should be 3 batches: 2 + 2 + 1
|
||||
assert mock_push.call_count == 3
|
||||
@@ -327,6 +327,34 @@ def test_services_search_indexers_batches_pass_only_batch_accesses(
|
||||
assert seen_doc_ids == {str(d.id) for d in documents}
|
||||
|
||||
|
||||
@patch.object(FindDocumentIndexer, "push")
|
||||
@pytest.mark.usefixtures("indexer_settings")
|
||||
def test_services_search_indexers_ignore_empty_documents(mock_push):
|
||||
"""
|
||||
Documents indexing should be processed in batches,
|
||||
and only the access data relevant to each batch should be used.
|
||||
"""
|
||||
document = factories.DocumentFactory()
|
||||
factories.DocumentFactory(content="", title="")
|
||||
empty_title = factories.DocumentFactory(title="")
|
||||
empty_content = factories.DocumentFactory(content="")
|
||||
|
||||
assert FindDocumentIndexer().index() == 3
|
||||
|
||||
assert mock_push.call_count == 1
|
||||
|
||||
# Make sure only not eempty documents are indexed
|
||||
results = {doc["id"] for doc in mock_push.call_args[0][0]}
|
||||
assert results == {
|
||||
str(d.id)
|
||||
for d in (
|
||||
document,
|
||||
empty_content,
|
||||
empty_title,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@patch.object(FindDocumentIndexer, "push")
|
||||
@pytest.mark.usefixtures("indexer_settings")
|
||||
def test_services_search_indexers_ancestors_link_reach(mock_push):
|
||||
@@ -338,7 +366,7 @@ def test_services_search_indexers_ancestors_link_reach(mock_push):
|
||||
parent = factories.DocumentFactory(parent=grand_parent, link_reach="public")
|
||||
document = factories.DocumentFactory(parent=parent, link_reach="restricted")
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
assert FindDocumentIndexer().index() == 4
|
||||
|
||||
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
|
||||
assert len(results) == 4
|
||||
@@ -358,7 +386,7 @@ def test_services_search_indexers_ancestors_users(mock_push):
|
||||
parent = factories.DocumentFactory(parent=grand_parent, users=[user_p])
|
||||
document = factories.DocumentFactory(parent=parent, users=[user_d])
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
assert FindDocumentIndexer().index() == 3
|
||||
|
||||
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
|
||||
assert len(results) == 3
|
||||
@@ -379,7 +407,7 @@ def test_services_search_indexers_ancestors_teams(mock_push):
|
||||
parent = factories.DocumentFactory(parent=grand_parent, teams=["team_p"])
|
||||
document = factories.DocumentFactory(parent=parent, teams=["team_d"])
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
assert FindDocumentIndexer().index() == 3
|
||||
|
||||
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
|
||||
assert len(results) == 3
|
||||
|
||||
Reference in New Issue
Block a user