✨(backend) add async triggers to enable document indexation with find
On document content or permission changes, start a celery job that will call the indexation API of the app "Find". Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
1d9c2a8118
commit
72238c1ab6
96
src/backend/core/tasks/find.py
Normal file
96
src/backend/core/tasks/find.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""Trigger document indexation using celery task."""
|
||||
|
||||
from logging import getLogger
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.db import transaction
|
||||
|
||||
from core import models
|
||||
from core.services.search_indexers import (
|
||||
FindDocumentIndexer,
|
||||
get_batch_accesses_by_users_and_teams,
|
||||
)
|
||||
|
||||
from impress.celery_app import app
|
||||
|
||||
logger = getLogger(__file__)
|
||||
|
||||
|
||||
def document_indexer_debounce_key(document_id):
|
||||
"""Returns debounce cache key"""
|
||||
return f"doc-indexer-debounce-{document_id}"
|
||||
|
||||
|
||||
def incr_counter(key):
|
||||
"""Increase or reset counter"""
|
||||
try:
|
||||
return cache.incr(key)
|
||||
except ValueError:
|
||||
cache.set(key, 1)
|
||||
return 1
|
||||
|
||||
|
||||
def decr_counter(key):
|
||||
"""Decrease or reset counter"""
|
||||
try:
|
||||
return cache.decr(key)
|
||||
except ValueError:
|
||||
cache.set(key, 0)
|
||||
return 0
|
||||
|
||||
|
||||
@app.task
|
||||
def document_indexer_task(document_id):
|
||||
"""Send indexation query for a document using celery task."""
|
||||
key = document_indexer_debounce_key(document_id)
|
||||
|
||||
# check if the counter : if still up, skip the task. only the last one
|
||||
# within the countdown delay will do the query.
|
||||
if decr_counter(key) > 0:
|
||||
logger.info("Skip document %s indexation", document_id)
|
||||
return
|
||||
|
||||
doc = models.Document.objects.get(pk=document_id)
|
||||
indexer = FindDocumentIndexer()
|
||||
accesses = get_batch_accesses_by_users_and_teams((doc.path,))
|
||||
|
||||
data = indexer.serialize_document(document=doc, accesses=accesses)
|
||||
|
||||
logger.info("Start document %s indexation", document_id)
|
||||
indexer.push(data)
|
||||
|
||||
|
||||
def trigger_document_indexer(document, on_commit=False):
|
||||
"""
|
||||
Trigger indexation task with debounce a delay set by the SEARCH_INDEXER_COUNTDOWN setting.
|
||||
|
||||
Args:
|
||||
document (Document): The document instance.
|
||||
on_commit (bool): Wait for the end of the transaction before starting the task
|
||||
(some fields may be in wrong state within the transaction)
|
||||
"""
|
||||
|
||||
if document.deleted_at or document.ancestors_deleted_at:
|
||||
pass
|
||||
|
||||
if on_commit:
|
||||
|
||||
def _aux():
|
||||
trigger_document_indexer(document, on_commit=False)
|
||||
|
||||
transaction.on_commit(_aux)
|
||||
else:
|
||||
key = document_indexer_debounce_key(document.pk)
|
||||
countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1)
|
||||
|
||||
logger.info(
|
||||
"Add task for document %s indexation in %.2f seconds",
|
||||
document.pk, countdown
|
||||
)
|
||||
|
||||
# Each time this method is called during the countdown, we increment the
|
||||
# counter and each task decrease it, so the index be run only once.
|
||||
incr_counter(key)
|
||||
|
||||
document_indexer_task.apply_async(args=[document.pk], countdown=countdown)
|
||||
Reference in New Issue
Block a user