🚸(backend) use unaccented full name for user search
We have the user full name through OIDC in the database, but the search only used the email field. This change allows to search for a user by their first and/or last name (fix #929). Given that user names are more likely than emails to include diacritics, it unaccents both the query and the database entry for search (fix #1091). It also unaccents for email so that internationalized domain names are managed whether or not the accent is included in the search. An unaccented gin index is added on users full_name an email fields. Using a manual migration because a wrapper around unaccent is necessary to make it IMMUTABLE (cf. https://stackoverflow.com/questions/9063402/ )
This commit is contained in:
@@ -64,6 +64,7 @@ and this project adheres to
|
||||
- ♻️(frontend) preserve @ character when esc is pressed after typing it #1512
|
||||
- ♻️(frontend) make summary button fixed to remain visible during scroll #1581
|
||||
- ♻️(frontend) pdf embed use full width #1526
|
||||
- 🚸(backend) use unaccented full name for user search #1637
|
||||
|
||||
### Fixed
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""API endpoints"""
|
||||
|
||||
# pylint: disable=too-many-lines
|
||||
|
||||
import base64
|
||||
@@ -18,7 +19,7 @@ from django.core.validators import URLValidator
|
||||
from django.db import connection, transaction
|
||||
from django.db import models as db
|
||||
from django.db.models.expressions import RawSQL
|
||||
from django.db.models.functions import Left, Length
|
||||
from django.db.models.functions import Greatest, Left, Length
|
||||
from django.http import Http404, StreamingHttpResponse
|
||||
from django.urls import reverse
|
||||
from django.utils import timezone
|
||||
@@ -37,6 +38,7 @@ from rest_framework import response as drf_response
|
||||
from rest_framework.permissions import AllowAny
|
||||
|
||||
from core import authentication, choices, enums, models
|
||||
from core.api.filters import remove_accents
|
||||
from core.services.ai_services import AIService
|
||||
from core.services.collaboration_services import CollaborationService
|
||||
from core.services.converter_services import (
|
||||
@@ -188,13 +190,15 @@ class UserViewSet(
|
||||
queryset = queryset.exclude(documentaccess__document_id=document_id)
|
||||
|
||||
filter_data = filterset.form.cleaned_data
|
||||
query = filter_data["q"]
|
||||
query = remove_accents(filter_data["q"])
|
||||
|
||||
# For emails, match emails by Levenstein distance to prevent typing errors
|
||||
if "@" in query:
|
||||
return (
|
||||
queryset.annotate(
|
||||
distance=RawSQL("levenshtein(email::text, %s::text)", (query,))
|
||||
distance=RawSQL(
|
||||
"levenshtein(unaccent(email::text), %s::text)", (query,)
|
||||
)
|
||||
)
|
||||
.filter(distance__lte=3)
|
||||
.order_by("distance", "email")[: settings.API_USERS_LIST_LIMIT]
|
||||
@@ -203,11 +207,15 @@ class UserViewSet(
|
||||
# Use trigram similarity for non-email-like queries
|
||||
# For performance reasons we filter first by similarity, which relies on an
|
||||
# index, then only calculate precise similarity scores for sorting purposes
|
||||
|
||||
return (
|
||||
queryset.filter(email__trigram_word_similar=query)
|
||||
.annotate(similarity=TrigramSimilarity("email", query))
|
||||
queryset.annotate(
|
||||
sim_email=TrigramSimilarity("email", query),
|
||||
sim_name=TrigramSimilarity("full_name", query),
|
||||
)
|
||||
.annotate(similarity=Greatest("sim_email", "sim_name"))
|
||||
.filter(similarity__gt=0.2)
|
||||
.order_by("-similarity", "email")[: settings.API_USERS_LIST_LIMIT]
|
||||
.order_by("-similarity")[: settings.API_USERS_LIST_LIMIT]
|
||||
)
|
||||
|
||||
@drf.decorators.action(
|
||||
|
||||
37
src/backend/core/migrations/0027_auto_20251120_0956.py
Normal file
37
src/backend/core/migrations/0027_auto_20251120_0956.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# Generated by Django 5.2.8 on 2025-11-20 09:56
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("core", "0026_comments"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
CREATE OR REPLACE FUNCTION public.immutable_unaccent(regdictionary, text)
|
||||
RETURNS text
|
||||
LANGUAGE c IMMUTABLE PARALLEL SAFE STRICT AS
|
||||
'$libdir/unaccent', 'unaccent_dict';
|
||||
|
||||
CREATE OR REPLACE FUNCTION public.f_unaccent(text)
|
||||
RETURNS text
|
||||
LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT
|
||||
RETURN public.immutable_unaccent(regdictionary 'public.unaccent', $1);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS user_email_unaccent_trgm_idx
|
||||
ON impress_user
|
||||
USING gin (f_unaccent(email) gin_trgm_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS user_full_name_unaccent_trgm_idx
|
||||
ON impress_user
|
||||
USING gin (f_unaccent(full_name) gin_trgm_ops);
|
||||
""",
|
||||
reverse_sql="""
|
||||
DROP INDEX IF EXISTS user_email_unaccent_trgm_idx;
|
||||
DROP INDEX IF EXISTS user_full_name_unaccent_trgm_idx;
|
||||
""",
|
||||
),
|
||||
]
|
||||
@@ -76,6 +76,131 @@ def test_api_users_list_query_email():
|
||||
assert user_ids == []
|
||||
|
||||
|
||||
def test_api_users_list_query_email_with_internationalized_domain_names():
|
||||
"""
|
||||
Authenticated users should be able to list users and filter by email.
|
||||
It should work even if the email address contains an internationalized domain name.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
jean = factories.UserFactory(email="jean.martin@éducation.fr")
|
||||
marie = factories.UserFactory(email="marie.durand@education.fr")
|
||||
kurokawa = factories.UserFactory(email="contact@黒川.日本")
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=jean.martin@education.fr")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(jean.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=jean.martin@éducation.fr")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(jean.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=marie.durand@education.fr")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(marie.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=marie.durand@éducation.fr")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(marie.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=contact@黒川.日本")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(kurokawa.id)]
|
||||
|
||||
|
||||
def test_api_users_list_query_full_name():
|
||||
"""
|
||||
Authenticated users should be able to list users and filter by full name.
|
||||
Only results with a Trigram similarity greater than 0.2 with the query should be returned.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
dave = factories.UserFactory(email="contact@work.com", full_name="David Bowman")
|
||||
|
||||
response = client.get(
|
||||
"/api/v1.0/users/?q=David",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(dave.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=Bowman")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(dave.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=bowman")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(dave.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=BOWMAN")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(dave.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=BoWmAn")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(dave.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=Bovin")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == []
|
||||
|
||||
|
||||
def test_api_users_list_query_accented_full_name():
|
||||
"""
|
||||
Authenticated users should be able to list users and filter by full name with accents.
|
||||
Only results with a Trigram similarity greater than 0.2 with the query should be returned.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
fred = factories.UserFactory(
|
||||
email="contact@work.com", full_name="Frédérique Lefèvre"
|
||||
)
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=Frédérique")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(fred.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=Frederique")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(fred.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=Lefèvre")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(fred.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=Lefevre")
|
||||
assert response.status_code == 200
|
||||
user_ids = [user["id"] for user in response.json()]
|
||||
assert user_ids == [str(fred.id)]
|
||||
|
||||
response = client.get("/api/v1.0/users/?q=François Lorfebvre")
|
||||
assert response.status_code == 200
|
||||
users = [user["full_name"] for user in response.json()]
|
||||
assert users == []
|
||||
|
||||
|
||||
def test_api_users_list_limit(settings):
|
||||
"""
|
||||
Authenticated users should be able to list users and the number of results
|
||||
|
||||
Reference in New Issue
Block a user