🚸(backend) sort user search results by proximity with the active user (#1802)

## Purpose
Allows a user to find more easily the other users they search, with the
following order of priority:
- users they already share documents with (more recent first)
- users that share the same full email domain
- ~~users that share the same partial email domain (last two parts)~~
- ~~other users~~

Edit: We need to ilter out other users in order to not reveal email
addresses from members of other organisations. It's still possible to
invite them by email.

Solves #1521

## Proposal
- [x] Add a new function in `core/utils.py`:
`users_sharing_documents_with()`
- [x] Use it as a key to sort the results of a basic user search
- [x] Filter user results to avoid reveal of users (and email addresses)
of other orgs or that have not been interacted with.
- [x] User research through "full" email address (contains the '@') is
left unaffected.

---------

Co-authored-by: Anthony LC <anthony.le-courric@mail.numerique.gouv.fr>
This commit is contained in:
Sylvain Boissel
2026-02-11 18:51:45 +01:00
committed by GitHub
parent 9af540de35
commit 685464f2d7
14 changed files with 416 additions and 35 deletions

View File

@@ -20,6 +20,7 @@ pytestmark = pytest.mark.django_db
@override_settings(
AI_FEATURE_ENABLED=False,
API_USERS_SEARCH_QUERY_MIN_LENGTH=6,
COLLABORATION_WS_URL="http://testcollab/",
COLLABORATION_WS_NOT_CONNECTED_READY_ONLY=True,
CRISP_WEBSITE_ID="123",
@@ -44,6 +45,7 @@ def test_api_config(is_authenticated):
assert response.status_code == HTTP_200_OK
assert response.json() == {
"AI_FEATURE_ENABLED": False,
"API_USERS_SEARCH_QUERY_MIN_LENGTH": 6,
"COLLABORATION_WS_URL": "http://testcollab/",
"COLLABORATION_WS_NOT_CONNECTED_READY_ONLY": True,
"CONVERSION_FILE_EXTENSIONS_ALLOWED": [".docx", ".md"],

View File

@@ -2,6 +2,8 @@
Test users API endpoints in the impress core app.
"""
from django.utils import timezone
import pytest
from rest_framework.test import APIClient
@@ -121,12 +123,12 @@ def test_api_users_list_query_full_name():
Authenticated users should be able to list users and filter by full name.
Only results with a Trigram similarity greater than 0.2 with the query should be returned.
"""
user = factories.UserFactory()
user = factories.UserFactory(email="user@example.com")
client = APIClient()
client.force_login(user)
dave = factories.UserFactory(email="contact@work.com", full_name="David Bowman")
dave = factories.UserFactory(email="contact@example.com", full_name="David Bowman")
response = client.get(
"/api/v1.0/users/?q=David",
@@ -166,13 +168,13 @@ def test_api_users_list_query_accented_full_name():
Authenticated users should be able to list users and filter by full name with accents.
Only results with a Trigram similarity greater than 0.2 with the query should be returned.
"""
user = factories.UserFactory()
user = factories.UserFactory(email="user@example.com")
client = APIClient()
client.force_login(user)
fred = factories.UserFactory(
email="contact@work.com", full_name="Frédérique Lefèvre"
email="contact@example.com", full_name="Frédérique Lefèvre"
)
response = client.get("/api/v1.0/users/?q=Frédérique")
@@ -201,12 +203,82 @@ def test_api_users_list_query_accented_full_name():
assert users == []
def test_api_users_list_sorted_by_closest_match():
"""
Authenticated users should be able to list users and the results should be
sorted by closest match to the query.
Sorting criteria are :
- Shared documents with the user (most recent first)
- Same full email domain (example.gouv.fr)
Addresses that match neither criteria should be excluded from the results.
Case in point: the logged-in user has recently shared documents
with pierre.dupont@beta.gouv.fr and less recently with pierre.durand@impots.gouv.fr.
Other users named Pierre also exist:
- pierre.thomas@example.com
- pierre.petit@anct.gouv.fr
- pierre.robert@culture.gouv.fr
The search results should be ordered as follows:
# Shared with first
- pierre.dupond@beta.gouv.fr # Most recent first
- pierre.durand@impots.gouv.fr
# Same full domain second
- pierre.petit@anct.gouv.fr
"""
user = factories.UserFactory(
email="martin.bernard@anct.gouv.fr", full_name="Martin Bernard"
)
client = APIClient()
client.force_login(user)
pierre_1 = factories.UserFactory(email="pierre.dupont@beta.gouv.fr")
pierre_2 = factories.UserFactory(email="pierre.durand@impots.gouv.fr")
_pierre_3 = factories.UserFactory(email="pierre.thomas@example.com")
pierre_4 = factories.UserFactory(email="pierre.petit@anct.gouv.fr")
_pierre_5 = factories.UserFactory(email="pierre.robert@culture.gouv.fr")
document_1 = factories.DocumentFactory(creator=user)
document_2 = factories.DocumentFactory(creator=user)
factories.UserDocumentAccessFactory(user=user, document=document_1)
factories.UserDocumentAccessFactory(user=user, document=document_2)
now = timezone.now()
last_week = now - timezone.timedelta(days=7)
last_month = now - timezone.timedelta(days=30)
# The factory cannot set the created_at directly, so we force it after creation
p1_d1 = factories.UserDocumentAccessFactory(user=pierre_1, document=document_1)
p1_d1.created_at = last_week
p1_d1.save()
p2_d2 = factories.UserDocumentAccessFactory(user=pierre_2, document=document_2)
p2_d2.created_at = last_month
p2_d2.save()
response = client.get("/api/v1.0/users/?q=Pierre")
assert response.status_code == 200
user_ids = [user["email"] for user in response.json()]
assert user_ids == [
str(pierre_1.email),
str(pierre_2.email),
str(pierre_4.email),
]
def test_api_users_list_limit(settings):
"""
Authenticated users should be able to list users and the number of results
should be limited to 10.
should be limited to API_USERS_LIST_LIMIT (by default 5).
"""
user = factories.UserFactory()
user = factories.UserFactory(email="user@example.com")
client = APIClient()
client.force_login(user)
@@ -309,28 +381,16 @@ def test_api_users_list_query_email_exclude_doc_user():
def test_api_users_list_query_short_queries():
"""
Queries shorter than 5 characters should return an empty result set.
If API_USERS_SEARCH_QUERY_MIN_LENGTH is not set, the default minimum length should be 3.
"""
user = factories.UserFactory(email="paul@example.com", full_name="Paul")
client = APIClient()
client.force_login(user)
factories.UserFactory(email="john.doe@example.com")
factories.UserFactory(email="john.lennon@example.com")
factories.UserFactory(email="john.doe@example.com", full_name="John Doe")
factories.UserFactory(email="john.lennon@example.com", full_name="John Lennon")
response = client.get("/api/v1.0/users/?q=jo")
assert response.status_code == 400
assert response.json() == {
"q": ["Ensure this value has at least 5 characters (it has 2)."]
}
response = client.get("/api/v1.0/users/?q=john")
assert response.status_code == 400
assert response.json() == {
"q": ["Ensure this value has at least 5 characters (it has 4)."]
}
response = client.get("/api/v1.0/users/?q=john.")
response = client.get("/api/v1.0/users/?q=joh")
assert response.status_code == 200
assert len(response.json()) == 2
@@ -356,7 +416,7 @@ def test_api_users_list_query_long_queries():
def test_api_users_list_query_inactive():
"""Inactive users should not be listed."""
user = factories.UserFactory()
user = factories.UserFactory(email="user@example.com")
client = APIClient()
client.force_login(user)

View File

@@ -3,9 +3,14 @@
import base64
import uuid
import pycrdt
from django.core.cache import cache
from core import utils
import pycrdt
import pytest
from core import factories, utils
pytestmark = pytest.mark.django_db
# This base64 string is an example of what is saved in the database.
# This base64 is generated from the blocknote editor, it contains
@@ -100,3 +105,103 @@ def test_utils_get_ancestor_to_descendants_map_multiple_paths():
"000100020005": {"000100020005"},
"00010003": {"00010003"},
}
def test_utils_users_sharing_documents_with_cache_miss():
"""Test cache miss: should query database and cache result."""
user1 = factories.UserFactory()
user2 = factories.UserFactory()
user3 = factories.UserFactory()
doc1 = factories.DocumentFactory()
doc2 = factories.DocumentFactory()
factories.UserDocumentAccessFactory(user=user1, document=doc1)
factories.UserDocumentAccessFactory(user=user2, document=doc1)
factories.UserDocumentAccessFactory(user=user3, document=doc2)
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.delete(cache_key)
result = utils.users_sharing_documents_with(user1)
assert user2.id in result
cached_data = cache.get(cache_key)
assert cached_data == result
def test_utils_users_sharing_documents_with_cache_hit():
"""Test cache hit: should return cached data without querying database."""
user1 = factories.UserFactory()
user2 = factories.UserFactory()
doc1 = factories.DocumentFactory()
factories.UserDocumentAccessFactory(user=user1, document=doc1)
factories.UserDocumentAccessFactory(user=user2, document=doc1)
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
test_cached_data = {user2.id: "2025-02-10"}
cache.set(cache_key, test_cached_data, 86400)
result = utils.users_sharing_documents_with(user1)
assert result == test_cached_data
def test_utils_users_sharing_documents_with_cache_invalidation_on_create():
"""Test that cache is invalidated when a DocumentAccess is created."""
# Create test data
user1 = factories.UserFactory()
user2 = factories.UserFactory()
doc1 = factories.DocumentFactory()
# Pre-populate cache
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.set(cache_key, {}, 86400)
# Verify cache exists
assert cache.get(cache_key) is not None
# Create new DocumentAccess
factories.UserDocumentAccessFactory(user=user2, document=doc1)
# Cache should still exist (only created for user2 who was added)
# But if we create access for user1 being shared with, cache should be cleared
cache.set(cache_key, {"test": "data"}, 86400)
factories.UserDocumentAccessFactory(user=user1, document=doc1)
# Cache for user1 should be invalidated (cleared)
assert cache.get(cache_key) is None
def test_utils_users_sharing_documents_with_cache_invalidation_on_delete():
"""Test that cache is invalidated when a DocumentAccess is deleted."""
user1 = factories.UserFactory()
user2 = factories.UserFactory()
doc1 = factories.DocumentFactory()
doc_access = factories.UserDocumentAccessFactory(user=user1, document=doc1)
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.set(cache_key, {user2.id: "2025-02-10"}, 86400)
assert cache.get(cache_key) is not None
doc_access.delete()
assert cache.get(cache_key) is None
def test_utils_users_sharing_documents_with_empty_result():
"""Test when user is not sharing any documents."""
user1 = factories.UserFactory()
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.delete(cache_key)
result = utils.users_sharing_documents_with(user1)
assert result == {}
cached_data = cache.get(cache_key)
assert cached_data == {}

View File

@@ -0,0 +1,62 @@
"""Tests for utils.users_sharing_documents_with function."""
from django.utils import timezone
import pytest
from core import factories, utils
pytestmark = pytest.mark.django_db
def test_utils_users_sharing_documents_with():
"""Test users_sharing_documents_with function."""
user = factories.UserFactory(
email="martin.bernard@anct.gouv.fr", full_name="Martin Bernard"
)
pierre_1 = factories.UserFactory(
email="pierre.dupont@beta.gouv.fr", full_name="Pierre Dupont"
)
pierre_2 = factories.UserFactory(
email="pierre.durand@impots.gouv.fr", full_name="Pierre Durand"
)
now = timezone.now()
yesterday = now - timezone.timedelta(days=1)
last_week = now - timezone.timedelta(days=7)
last_month = now - timezone.timedelta(days=30)
document_1 = factories.DocumentFactory(creator=user)
document_2 = factories.DocumentFactory(creator=user)
document_3 = factories.DocumentFactory(creator=user)
factories.UserDocumentAccessFactory(user=user, document=document_1)
factories.UserDocumentAccessFactory(user=user, document=document_2)
factories.UserDocumentAccessFactory(user=user, document=document_3)
# The factory cannot set the created_at directly, so we force it after creation
doc_1_pierre_1 = factories.UserDocumentAccessFactory(
user=pierre_1, document=document_1, created_at=last_week
)
doc_1_pierre_1.created_at = last_week
doc_1_pierre_1.save()
doc_2_pierre_2 = factories.UserDocumentAccessFactory(
user=pierre_2, document=document_2
)
doc_2_pierre_2.created_at = last_month
doc_2_pierre_2.save()
doc_3_pierre_2 = factories.UserDocumentAccessFactory(
user=pierre_2, document=document_3
)
doc_3_pierre_2.created_at = yesterday
doc_3_pierre_2.save()
shared_map = utils.users_sharing_documents_with(user)
assert shared_map == {
pierre_1.id: last_week,
pierre_2.id: yesterday,
}