🚸(backend) sort user search results by proximity with the active user (#1802)

## Purpose
Allows a user to find more easily the other users they search, with the
following order of priority:
- users they already share documents with (more recent first)
- users that share the same full email domain
- ~~users that share the same partial email domain (last two parts)~~
- ~~other users~~

Edit: We need to ilter out other users in order to not reveal email
addresses from members of other organisations. It's still possible to
invite them by email.

Solves #1521

## Proposal
- [x] Add a new function in `core/utils.py`:
`users_sharing_documents_with()`
- [x] Use it as a key to sort the results of a basic user search
- [x] Filter user results to avoid reveal of users (and email addresses)
of other orgs or that have not been interacted with.
- [x] User research through "full" email address (contains the '@') is
left unaffected.

---------

Co-authored-by: Anthony LC <anthony.le-courric@mail.numerique.gouv.fr>
This commit is contained in:
Sylvain Boissel
2026-02-11 18:51:45 +01:00
committed by GitHub
parent 9af540de35
commit 685464f2d7
14 changed files with 416 additions and 35 deletions

View File

@@ -1,13 +1,21 @@
"""Utils for the core app."""
import base64
import logging
import re
import time
from collections import defaultdict
from django.core.cache import cache
from django.db import models as db
from django.db.models import Subquery
import pycrdt
from bs4 import BeautifulSoup
from core import enums
from core import enums, models
logger = logging.getLogger(__name__)
def get_ancestor_to_descendants_map(paths, steplen):
@@ -96,3 +104,46 @@ def extract_attachments(content):
xml_content = base64_yjs_to_xml(content)
return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content)
def get_users_sharing_documents_with_cache_key(user):
"""Generate a unique cache key for each user."""
return f"users_sharing_documents_with_{user.id}"
def users_sharing_documents_with(user):
"""
Returns a map of users sharing documents with the given user,
sorted by last shared date.
"""
start_time = time.time()
cache_key = get_users_sharing_documents_with_cache_key(user)
cached_result = cache.get(cache_key)
if cached_result is not None:
elapsed = time.time() - start_time
logger.info(
"users_sharing_documents_with cache hit for user %s (took %.3fs)",
user.id,
elapsed,
)
return cached_result
user_docs_qs = models.DocumentAccess.objects.filter(user=user).values_list(
"document_id", flat=True
)
shared_qs = (
models.DocumentAccess.objects.filter(document_id__in=Subquery(user_docs_qs))
.exclude(user=user)
.values("user")
.annotate(last_shared=db.Max("created_at"))
)
result = {item["user"]: item["last_shared"] for item in shared_qs}
cache.set(cache_key, result, 86400) # Cache for 1 day
elapsed = time.time() - start_time
logger.info(
"users_sharing_documents_with cache miss for user %s (took %.3fs)",
user.id,
elapsed,
)
return result