(backend) add fallback search & default ordering

Filter deleted documents from visited ones.
Set default ordering to the Find API search call (-updated_at)
BaseDocumentIndexer.search now returns a list of document ids instead of models.
Do not call the indexer in signals when SEARCH_INDEXER_CLASS is not defined
or properly configured.

Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
Fabre Florian
2025-09-17 07:47:15 +02:00
committed by Quentin BEY
parent bf978b5376
commit 01c31ddd74
11 changed files with 558 additions and 153 deletions

View File

@@ -8,6 +8,7 @@ from functools import cache
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.core.exceptions import ImproperlyConfigured
from django.db.models import Subquery
from django.utils.module_loading import import_string
import requests
@@ -18,7 +19,23 @@ logger = logging.getLogger(__name__)
@cache
def get_document_indexer_class() -> "BaseDocumentIndexer":
def default_document_indexer():
"""Returns default indexer service is enabled and properly configured."""
# For this usecase an empty indexer class is not an issue but a feature.
if not getattr(settings, "SEARCH_INDEXER_CLASS", None):
logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)")
return None
try:
return get_document_indexer_class()()
except ImproperlyConfigured as err:
logger.error("Document indexer is not properly configured : %s", err)
return None
@cache
def get_document_indexer_class():
"""Return the indexer backend class based on the settings."""
classpath = settings.SEARCH_INDEXER_CLASS
@@ -65,7 +82,7 @@ def get_batch_accesses_by_users_and_teams(paths):
return dict(access_by_document_path)
def get_visited_document_ids_of(user):
def get_visited_document_ids_of(queryset, user):
"""
Returns the ids of the documents that have a linktrace to the user and NOT owned.
It will be use to limit the opensearch responses to the public documents already
@@ -74,11 +91,18 @@ def get_visited_document_ids_of(user):
if isinstance(user, AnonymousUser):
return []
qs = models.LinkTrace.objects.filter(user=user).exclude(
document__accesses__user=user,
qs = models.LinkTrace.objects.filter(user=user)
docs = (
queryset.exclude(accesses__user=user)
.filter(
deleted_at__isnull=True,
ancestors_deleted_at__isnull=True,
)
.filter(pk__in=Subquery(qs.values("document_id")))
)
return list({str(id) for id in qs.values_list("document_id", flat=True)})
return list({str(id) for id in docs.values_list("pk", flat=True)})
class BaseDocumentIndexer(ABC):
@@ -159,22 +183,41 @@ class BaseDocumentIndexer(ABC):
Must be implemented by subclasses.
"""
def search(self, text, user, token):
# pylint: disable-next=too-many-arguments,too-many-positional-arguments
def search(self, text, token, visited=(), page=1, page_size=50):
"""
Search for documents in Find app.
"""
visited_ids = get_visited_document_ids_of(user)
Ensure the same default ordering as "Docs" list : -updated_at
Returns ids of the documents
Args:
text (str): Text search content.
token (str): OIDC Authentication token.
visited (list, optional):
List of ids of active public documents with LinkTrace
Defaults to settings.SEARCH_INDEXER_BATCH_SIZE.
page (int, optional):
The page number to retrieve.
Defaults to 1 if not specified.
page_size (int, optional):
The number of results to return per page.
Defaults to 50 if not specified.
"""
response = self.search_query(
data={
"q": text,
"visited": visited_ids,
"visited": visited,
"services": ["docs"],
"page_number": page,
"page_size": page_size,
"order_by": "updated_at",
"order_direction": "desc",
},
token=token,
)
return self.format_response(response)
return [d["_id"] for d in response]
@abstractmethod
def search_query(self, data, token) -> dict:
@@ -184,14 +227,6 @@ class BaseDocumentIndexer(ABC):
Must be implemented by subclasses.
"""
@abstractmethod
def format_response(self, data: dict):
"""
Convert the JSON response from Find app as document queryset.
Must be implemented by subclasses.
"""
class FindDocumentIndexer(BaseDocumentIndexer):
"""
@@ -253,12 +288,6 @@ class FindDocumentIndexer(BaseDocumentIndexer):
logger.error("HTTPError: %s", e)
raise
def format_response(self, data: dict):
"""
Retrieve documents ids from Find app response and return a queryset.
"""
return models.Document.objects.filter(pk__in=[d["_id"] for d in data])
def push(self, data):
"""
Push a batch of documents to the Find backend.
@@ -266,7 +295,6 @@ class FindDocumentIndexer(BaseDocumentIndexer):
Args:
data (list): List of document dictionaries.
"""
try:
response = requests.post(
self.indexer_url,