(backend) Index partially empty documents

Only documents without title and content are ignored by indexer.
This commit is contained in:
Fabre Florian
2025-09-22 16:05:39 +02:00
committed by Quentin BEY
parent 01c31ddd74
commit 331a94ad2f
3 changed files with 45 additions and 8 deletions

View File

@@ -146,6 +146,8 @@ class BaseDocumentIndexer(ABC):
Fetch documents in batches, serialize them, and push to the search backend.
"""
last_id = 0
count = 0
while True:
documents_batch = list(
models.Document.objects.filter(
@@ -163,9 +165,13 @@ class BaseDocumentIndexer(ABC):
serialized_batch = [
self.serialize_document(document, accesses_by_document_path)
for document in documents_batch
if document.content
if document.content or document.title
]
self.push(serialized_batch)
count += len(serialized_batch)
return count
@abstractmethod
def serialize_document(self, document, accesses):