(backend) add document search indexer

Add indexer that loops across documents in the database, formats them
as json objects and indexes them in the remote "Find" mico-service.
This commit is contained in:
Samuel Paccoud - DINUM
2025-07-24 12:31:20 +02:00
committed by Quentin BEY
parent f4bdde7e59
commit 1d9c2a8118
7 changed files with 503 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
"""Utils for the core app."""
import base64
from collections import defaultdict
import re
import pycrdt
@@ -9,6 +10,27 @@ from bs4 import BeautifulSoup
from core import enums
def get_ancestor_to_descendants_map(paths, steplen):
"""
Given a list of document paths, return a mapping of ancestor_path -> set of descendant_paths.
Each path is assumed to use materialized path format with fixed-length segments.
Args:
paths (list of str): List of full document paths.
steplen (int): Length of each path segment.
Returns:
dict[str, set[str]]: Mapping from ancestor path to its descendant paths (including itself).
"""
ancestor_map = defaultdict(set)
for path in paths:
for i in range(steplen, len(path) + 1, steplen):
ancestor = path[:i]
ancestor_map[ancestor].add(path)
return ancestor_map
def filter_descendants(paths, root_paths, skip_sorting=False):
"""
Filters paths to keep only those that are descendants of any path in root_paths.