✨(backend) add async triggers to enable document indexation with find
On document content or permission changes, start a celery job that will call the indexation API of the app "Find". Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
1d9c2a8118
commit
72238c1ab6
@@ -5,6 +5,7 @@ from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
|
||||
import requests
|
||||
|
||||
@@ -18,11 +19,13 @@ def get_batch_accesses_by_users_and_teams(paths):
|
||||
Get accesses related to a list of document paths,
|
||||
grouped by users and teams, including all ancestor paths.
|
||||
"""
|
||||
print("paths: ", paths)
|
||||
ancestor_map = utils.get_ancestor_to_descendants_map(paths, steplen=models.Document.steplen)
|
||||
# print("paths: ", paths)
|
||||
ancestor_map = utils.get_ancestor_to_descendants_map(
|
||||
paths, steplen=models.Document.steplen
|
||||
)
|
||||
ancestor_paths = list(ancestor_map.keys())
|
||||
print("ancestor map: ", ancestor_map)
|
||||
print("ancestor paths: ", ancestor_paths)
|
||||
# print("ancestor map: ", ancestor_map)
|
||||
# print("ancestor paths: ", ancestor_paths)
|
||||
|
||||
access_qs = models.DocumentAccess.objects.filter(
|
||||
document__path__in=ancestor_paths
|
||||
@@ -44,6 +47,22 @@ def get_batch_accesses_by_users_and_teams(paths):
|
||||
return dict(access_by_document_path)
|
||||
|
||||
|
||||
def get_visited_document_ids_of(user):
|
||||
if isinstance(user, AnonymousUser):
|
||||
return []
|
||||
|
||||
# TODO : exclude links when user already have a specific access to the doc
|
||||
qs = models.LinkTrace.objects.filter(
|
||||
user=user
|
||||
).exclude(
|
||||
document__accesses__user=user,
|
||||
)
|
||||
|
||||
return list({
|
||||
str(id) for id in qs.values_list("document_id", flat=True)
|
||||
})
|
||||
|
||||
|
||||
class BaseDocumentIndexer(ABC):
|
||||
"""
|
||||
Base class for document indexers.
|
||||
@@ -84,6 +103,7 @@ class BaseDocumentIndexer(ABC):
|
||||
serialized_batch = [
|
||||
self.serialize_document(document, accesses_by_document_path)
|
||||
for document in documents_batch
|
||||
if document.content
|
||||
]
|
||||
self.push(serialized_batch)
|
||||
|
||||
@@ -103,6 +123,38 @@ class BaseDocumentIndexer(ABC):
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
|
||||
def search(self, text, user, token):
|
||||
"""
|
||||
Search for documents in Find app.
|
||||
"""
|
||||
visited_ids = get_visited_document_ids_of(user)
|
||||
|
||||
response = self.search_query(data={
|
||||
"q": text,
|
||||
"visited": visited_ids,
|
||||
"services": ["docs"],
|
||||
}, token=token)
|
||||
|
||||
print(response)
|
||||
|
||||
return self.format_response(response)
|
||||
|
||||
@abstractmethod
|
||||
def search_query(self, data, token) -> dict:
|
||||
"""
|
||||
Retreive documents from the Find app API.
|
||||
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def format_response(self, data: dict):
|
||||
"""
|
||||
Convert the JSON response from Find app as document queryset.
|
||||
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
|
||||
|
||||
class FindDocumentIndexer(BaseDocumentIndexer):
|
||||
"""
|
||||
@@ -121,10 +173,12 @@ class FindDocumentIndexer(BaseDocumentIndexer):
|
||||
dict: A JSON-serializable dictionary.
|
||||
"""
|
||||
doc_path = document.path
|
||||
text_content = utils.base64_yjs_to_text(document.content)
|
||||
doc_content = document.content
|
||||
text_content = utils.base64_yjs_to_text(doc_content) if doc_content else ""
|
||||
|
||||
return {
|
||||
"id": str(document.id),
|
||||
"title": document.title,
|
||||
"title": document.title or "",
|
||||
"content": text_content,
|
||||
"depth": document.depth,
|
||||
"path": document.path,
|
||||
@@ -138,6 +192,46 @@ class FindDocumentIndexer(BaseDocumentIndexer):
|
||||
"is_active": not bool(document.ancestors_deleted_at),
|
||||
}
|
||||
|
||||
def search_query(self, data, token) -> requests.Response:
|
||||
"""
|
||||
Retrieve documents from the Find app API.
|
||||
|
||||
Args:
|
||||
data (dict): search data
|
||||
token (str): OICD token
|
||||
|
||||
Returns:
|
||||
dict: A JSON-serializable dictionary.
|
||||
"""
|
||||
url = getattr(settings, "SEARCH_INDEXER_QUERY_URL", None)
|
||||
|
||||
if not url:
|
||||
raise RuntimeError(
|
||||
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
json=data,
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
logger.error("HTTPError: %s", e)
|
||||
logger.error("Response content: %s", response.text) # type: ignore
|
||||
raise
|
||||
|
||||
def format_response(self, data: dict):
|
||||
"""
|
||||
Retrieve documents ids from Find app response and return a queryset.
|
||||
"""
|
||||
return models.Document.objects.filter(pk__in=[
|
||||
d['_id'] for d in data
|
||||
])
|
||||
|
||||
def push(self, data):
|
||||
"""
|
||||
Push a batch of documents to the Find backend.
|
||||
@@ -156,6 +250,7 @@ class FindDocumentIndexer(BaseDocumentIndexer):
|
||||
raise RuntimeError(
|
||||
"SEARCH_INDEXER_SECRET must be set in Django settings before indexing."
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
url,
|
||||
|
||||
Reference in New Issue
Block a user