🚸(backend) sort user search results by proximity with the active user (#1802)
## Purpose Allows a user to find more easily the other users they search, with the following order of priority: - users they already share documents with (more recent first) - users that share the same full email domain - ~~users that share the same partial email domain (last two parts)~~ - ~~other users~~ Edit: We need to ilter out other users in order to not reveal email addresses from members of other organisations. It's still possible to invite them by email. Solves #1521 ## Proposal - [x] Add a new function in `core/utils.py`: `users_sharing_documents_with()` - [x] Use it as a key to sort the results of a basic user search - [x] Filter user results to avoid reveal of users (and email addresses) of other orgs or that have not been interacted with. - [x] User research through "full" email address (contains the '@') is left unaffected. --------- Co-authored-by: Anthony LC <anthony.le-courric@mail.numerique.gouv.fr>
This commit is contained in:
@@ -37,6 +37,7 @@ from csp.constants import NONE
|
||||
from csp.decorators import csp_update
|
||||
from lasuite.malware_detection import malware_detection
|
||||
from lasuite.oidc_login.decorators import refresh_oidc_access_token
|
||||
from lasuite.tools.email import get_domain_from_email
|
||||
from rest_framework import filters, status, viewsets
|
||||
from rest_framework import response as drf_response
|
||||
from rest_framework.permissions import AllowAny
|
||||
@@ -61,7 +62,11 @@ from core.services.search_indexers import (
|
||||
get_visited_document_ids_of,
|
||||
)
|
||||
from core.tasks.mail import send_ask_for_access_mail
|
||||
from core.utils import extract_attachments, filter_descendants
|
||||
from core.utils import (
|
||||
extract_attachments,
|
||||
filter_descendants,
|
||||
users_sharing_documents_with,
|
||||
)
|
||||
|
||||
from . import permissions, serializers, utils
|
||||
from .filters import DocumentFilter, ListDocumentFilter, UserSearchFilter
|
||||
@@ -220,18 +225,80 @@ class UserViewSet(
|
||||
|
||||
# Use trigram similarity for non-email-like queries
|
||||
# For performance reasons we filter first by similarity, which relies on an
|
||||
# index, then only calculate precise similarity scores for sorting purposes
|
||||
# index, then only calculate precise similarity scores for sorting purposes.
|
||||
#
|
||||
# Additionally results are reordered to prefer users "closer" to the current
|
||||
# user: users they recently shared documents with, then same email domain.
|
||||
# To achieve that without complex SQL, we build a proximity score in Python
|
||||
# and return the top N results.
|
||||
# For security results, users that match neither of these proximity criteria
|
||||
# are not returned at all, to prevent email enumeration.
|
||||
current_user = self.request.user
|
||||
shared_map = users_sharing_documents_with(current_user)
|
||||
|
||||
return (
|
||||
user_email_domain = get_domain_from_email(current_user.email) or ""
|
||||
|
||||
candidates = list(
|
||||
queryset.annotate(
|
||||
sim_email=TrigramSimilarity("email", query),
|
||||
sim_name=TrigramSimilarity("full_name", query),
|
||||
)
|
||||
.annotate(similarity=Greatest("sim_email", "sim_name"))
|
||||
.filter(similarity__gt=0.2)
|
||||
.order_by("-similarity")[: settings.API_USERS_LIST_LIMIT]
|
||||
.order_by("-similarity")
|
||||
)
|
||||
|
||||
# Keep only users that either share documents with the current user
|
||||
# or have an email with the same domain as the current user.
|
||||
filtered_candidates = []
|
||||
for u in candidates:
|
||||
candidate_domain = get_domain_from_email(u.email) or ""
|
||||
if shared_map.get(u.id) or (
|
||||
user_email_domain and candidate_domain == user_email_domain
|
||||
):
|
||||
filtered_candidates.append(u)
|
||||
|
||||
candidates = filtered_candidates
|
||||
|
||||
# Build ordering key for each candidate
|
||||
def _sort_key(u):
|
||||
# shared priority: most recent first
|
||||
# Use shared_last_at timestamp numeric for secondary ordering when shared.
|
||||
shared_last_at = shared_map.get(u.id)
|
||||
if shared_last_at:
|
||||
is_shared = 1
|
||||
shared_score = int(shared_last_at.timestamp())
|
||||
else:
|
||||
is_shared = 0
|
||||
shared_score = 0
|
||||
|
||||
# domain proximity
|
||||
candidate_email_domain = get_domain_from_email(u.email) or ""
|
||||
|
||||
same_full_domain = (
|
||||
1
|
||||
if candidate_email_domain
|
||||
and candidate_email_domain == user_email_domain
|
||||
else 0
|
||||
)
|
||||
|
||||
# similarity fallback
|
||||
sim = getattr(u, "similarity", 0) or 0
|
||||
|
||||
return (
|
||||
is_shared,
|
||||
shared_score,
|
||||
same_full_domain,
|
||||
sim,
|
||||
)
|
||||
|
||||
# Sort candidates by the key descending and return top N as a queryset-like
|
||||
# list. Keep return type consistent with previous behavior (QuerySet slice
|
||||
# was returned) by returning a list of model instances.
|
||||
candidates.sort(key=_sort_key, reverse=True)
|
||||
|
||||
return candidates[: settings.API_USERS_LIST_LIMIT]
|
||||
|
||||
@drf.decorators.action(
|
||||
detail=False,
|
||||
methods=["get"],
|
||||
@@ -2338,6 +2405,7 @@ class ConfigView(drf.views.APIView):
|
||||
"""
|
||||
array_settings = [
|
||||
"AI_FEATURE_ENABLED",
|
||||
"API_USERS_SEARCH_QUERY_MIN_LENGTH",
|
||||
"COLLABORATION_WS_URL",
|
||||
"COLLABORATION_WS_NOT_CONNECTED_READY_ONLY",
|
||||
"CONVERSION_FILE_EXTENSIONS_ALLOWED",
|
||||
|
||||
Reference in New Issue
Block a user