diff --git a/CHANGELOG.md b/CHANGELOG.md index 23e673ca..935ca21c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to ## [Unreleased] +## Changed + +- 🚸(backend) improve users similarity search and sort results #391 + ## [1.7.0] - 2024-10-24 ## Added diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 98560eb8..25d56c62 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg +from django.contrib.postgres.search import TrigramSimilarity from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from django.db.models import ( @@ -156,8 +157,21 @@ class UserViewSet( # Filter users by email similarity if query := self.request.GET.get("q", ""): + # For performance reasons we filter first by similarity, which relies on an index, + # then only calculate precise similarity scores for sorting purposes queryset = queryset.filter(email__trigram_word_similar=query) + queryset = queryset.annotate( + similarity=TrigramSimilarity("email", query) + ) + # When the query only is on the name part, we should try to make many proposals + # But when the query looks like an email we should only propose serious matches + threshold = 0.6 if "@" in query else 0.1 + + queryset = queryset.filter(similarity__gt=threshold).order_by( + "-similarity" + ) + return queryset @decorators.action( diff --git a/src/backend/core/tests/test_api_users.py b/src/backend/core/tests/test_api_users.py index d95153f2..e739d4d1 100644 --- a/src/backend/core/tests/test_api_users.py +++ b/src/backend/core/tests/test_api_users.py @@ -69,6 +69,48 @@ def test_api_users_list_query_email(): assert user_ids == [str(nicole.id), str(frank.id)] +def test_api_users_list_query_email_matching(): + """While filtering by email, results should be filtered and sorted by similarity""" + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + alice = factories.UserFactory(email="alice.johnson@example.gouv.fr") + factories.UserFactory(email="jane.smith@example.gouv.fr") + michael_wilson = factories.UserFactory(email="michael.wilson@example.gouv.fr") + factories.UserFactory(email="david.jones@example.gouv.fr") + michael_brown = factories.UserFactory(email="michael.brown@example.gouv.fr") + factories.UserFactory(email="sophia.taylor@example.gouv.fr") + + response = client.get( + "/api/v1.0/users/?q=michael.johnson@example.gouv.f", + ) + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()["results"]] + assert user_ids == [str(michael_wilson.id)] + + response = client.get("/api/v1.0/users/?q=michael.johnson@example.gouv.fr") + + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()["results"]] + assert user_ids == [str(michael_wilson.id), str(alice.id), str(michael_brown.id)] + + response = client.get( + "/api/v1.0/users/?q=ajohnson@example.gouv.f", + ) + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()["results"]] + assert user_ids == [str(alice.id)] + + response = client.get( + "/api/v1.0/users/?q=michael.wilson@example.gouv.f", + ) + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()["results"]] + assert user_ids == [str(michael_wilson.id)] + + def test_api_users_list_query_email_exclude_doc_user(): """ Authenticated users should be able to list users