🚸(backend) sort user search results by proximity with the active user (#1802)

## Purpose
Allows a user to find more easily the other users they search, with the
following order of priority:
- users they already share documents with (more recent first)
- users that share the same full email domain
- ~~users that share the same partial email domain (last two parts)~~
- ~~other users~~

Edit: We need to ilter out other users in order to not reveal email
addresses from members of other organisations. It's still possible to
invite them by email.

Solves #1521

## Proposal
- [x] Add a new function in `core/utils.py`:
`users_sharing_documents_with()`
- [x] Use it as a key to sort the results of a basic user search
- [x] Filter user results to avoid reveal of users (and email addresses)
of other orgs or that have not been interacted with.
- [x] User research through "full" email address (contains the '@') is
left unaffected.

---------

Co-authored-by: Anthony LC <anthony.le-courric@mail.numerique.gouv.fr>
This commit is contained in:
Sylvain Boissel
2026-02-11 18:51:45 +01:00
committed by GitHub
parent 9af540de35
commit 685464f2d7
14 changed files with 416 additions and 35 deletions

View File

@@ -37,6 +37,7 @@ from csp.constants import NONE
from csp.decorators import csp_update
from lasuite.malware_detection import malware_detection
from lasuite.oidc_login.decorators import refresh_oidc_access_token
from lasuite.tools.email import get_domain_from_email
from rest_framework import filters, status, viewsets
from rest_framework import response as drf_response
from rest_framework.permissions import AllowAny
@@ -61,7 +62,11 @@ from core.services.search_indexers import (
get_visited_document_ids_of,
)
from core.tasks.mail import send_ask_for_access_mail
from core.utils import extract_attachments, filter_descendants
from core.utils import (
extract_attachments,
filter_descendants,
users_sharing_documents_with,
)
from . import permissions, serializers, utils
from .filters import DocumentFilter, ListDocumentFilter, UserSearchFilter
@@ -220,18 +225,80 @@ class UserViewSet(
# Use trigram similarity for non-email-like queries
# For performance reasons we filter first by similarity, which relies on an
# index, then only calculate precise similarity scores for sorting purposes
# index, then only calculate precise similarity scores for sorting purposes.
#
# Additionally results are reordered to prefer users "closer" to the current
# user: users they recently shared documents with, then same email domain.
# To achieve that without complex SQL, we build a proximity score in Python
# and return the top N results.
# For security results, users that match neither of these proximity criteria
# are not returned at all, to prevent email enumeration.
current_user = self.request.user
shared_map = users_sharing_documents_with(current_user)
return (
user_email_domain = get_domain_from_email(current_user.email) or ""
candidates = list(
queryset.annotate(
sim_email=TrigramSimilarity("email", query),
sim_name=TrigramSimilarity("full_name", query),
)
.annotate(similarity=Greatest("sim_email", "sim_name"))
.filter(similarity__gt=0.2)
.order_by("-similarity")[: settings.API_USERS_LIST_LIMIT]
.order_by("-similarity")
)
# Keep only users that either share documents with the current user
# or have an email with the same domain as the current user.
filtered_candidates = []
for u in candidates:
candidate_domain = get_domain_from_email(u.email) or ""
if shared_map.get(u.id) or (
user_email_domain and candidate_domain == user_email_domain
):
filtered_candidates.append(u)
candidates = filtered_candidates
# Build ordering key for each candidate
def _sort_key(u):
# shared priority: most recent first
# Use shared_last_at timestamp numeric for secondary ordering when shared.
shared_last_at = shared_map.get(u.id)
if shared_last_at:
is_shared = 1
shared_score = int(shared_last_at.timestamp())
else:
is_shared = 0
shared_score = 0
# domain proximity
candidate_email_domain = get_domain_from_email(u.email) or ""
same_full_domain = (
1
if candidate_email_domain
and candidate_email_domain == user_email_domain
else 0
)
# similarity fallback
sim = getattr(u, "similarity", 0) or 0
return (
is_shared,
shared_score,
same_full_domain,
sim,
)
# Sort candidates by the key descending and return top N as a queryset-like
# list. Keep return type consistent with previous behavior (QuerySet slice
# was returned) by returning a list of model instances.
candidates.sort(key=_sort_key, reverse=True)
return candidates[: settings.API_USERS_LIST_LIMIT]
@drf.decorators.action(
detail=False,
methods=["get"],
@@ -2338,6 +2405,7 @@ class ConfigView(drf.views.APIView):
"""
array_settings = [
"AI_FEATURE_ENABLED",
"API_USERS_SEARCH_QUERY_MIN_LENGTH",
"COLLABORATION_WS_URL",
"COLLABORATION_WS_NOT_CONNECTED_READY_ONLY",
"CONVERSION_FILE_EXTENSIONS_ALLOWED",