🔥(teams) remove search users by trigram

This feature is not necessary for our users now
and we got some strange results so we decided
to remove this feature.
This commit is contained in:
Sabrina Demagny
2024-10-30 01:11:28 +01:00
committed by Nathan Panchout
parent faf8dcc7e5
commit ababcde0d6
3 changed files with 35 additions and 54 deletions

View File

@@ -12,6 +12,10 @@ and this project adheres to
- ✨(teams) register contacts on admin views - ✨(teams) register contacts on admin views
### Changed
- 🔥(teams) remove search users by trigram
### Fixed ### Fixed
- 💚(ci) improve E2E tests #492 - 💚(ci) improve E2E tests #492

View File

@@ -182,9 +182,7 @@ class UserViewSet(
User viewset for all interactions with user infos and teams. User viewset for all interactions with user infos and teams.
GET /api/users/&q=query GET /api/users/&q=query
Return a list of users whose email matches the query. Similarity is Return a list of users whose email or name matches the query.
calculated using trigram similarity, allowing for partial,
case-insensitive matches and accented queries.
""" """
permission_classes = [permissions.IsSelf] permission_classes = [permissions.IsSelf]
@@ -207,22 +205,11 @@ class UserViewSet(
if team_id := self.request.GET.get("team_id", ""): if team_id := self.request.GET.get("team_id", ""):
queryset = queryset.exclude(teams__id=team_id) queryset = queryset.exclude(teams__id=team_id)
# Search by case-insensitive and accent-insensitive trigram similarity # Search by case-insensitive and accent-insensitive
if query := self.request.GET.get("q", ""): if query := self.request.GET.get("q", ""):
similarity = Max( queryset = queryset.filter(
TrigramSimilarity( Q(name__unaccent__icontains=query)
Coalesce(Func("email", function="unaccent"), Value("")), | Q(email__unaccent__icontains=query)
Func(Value(query), function="unaccent"),
)
+ TrigramSimilarity(
Coalesce(Func("name", function="unaccent"), Value("")),
Func(Value(query), function="unaccent"),
)
)
queryset = (
queryset.annotate(similarity=similarity)
.filter(similarity__gte=SIMILARITY_THRESHOLD)
.order_by("-similarity")
) )
return queryset return queryset

View File

@@ -78,28 +78,16 @@ def test_api_users_authenticated_list_by_email():
user_ids = [user["id"] for user in response.json()["results"]] user_ids = [user["id"] for user in response.json()["results"]]
assert user_ids[0] == str(frank.id) assert user_ids[0] == str(frank.id)
# Result that matches a trigram twice ranks better than result that matches once
response = client.get("/api/v1.0/users/?q=ole") response = client.get("/api/v1.0/users/?q=ole")
assert response.status_code == HTTP_200_OK assert response.status_code == HTTP_200_OK
user_ids = [user["id"] for user in response.json()["results"]] user_ids = [user["id"] for user in response.json()["results"]]
# "Nicole Foole" matches twice on "ole" assert user_ids == [str(frank.id), str(nicole.id)]
assert user_ids == [str(nicole.id), str(frank.id)]
# Even with a low similarity threshold, query should match expected emails
response = client.get("/api/v1.0/users/?q=ool") response = client.get("/api/v1.0/users/?q=ool")
assert response.status_code == HTTP_200_OK assert response.status_code == HTTP_200_OK
assert response.json()["results"] == [ assert response.json()["results"] == [
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
{ {
"id": str(frank.id), "id": str(frank.id),
"email": frank.email, "email": frank.email,
@@ -109,6 +97,15 @@ def test_api_users_authenticated_list_by_email():
"language": frank.language, "language": frank.language,
"timezone": str(frank.timezone), "timezone": str(frank.timezone),
}, },
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
] ]
@@ -118,9 +115,9 @@ def test_api_users_authenticated_list_by_name():
partial query on the name. partial query on the name.
""" """
user = factories.UserFactory(email="tester@ministry.fr", name="john doe") user = factories.UserFactory(email="tester@ministry.fr", name="john doe")
dave = factories.UserFactory(name="dave bowman", email=None) dave = factories.UserFactory(name="Dave bowman", email=None)
nicole = factories.UserFactory(name="nicole foole", email=None) nicole = factories.UserFactory(name="nicole foole", email=None)
frank = factories.UserFactory(name="frank poole", email=None) frank = factories.UserFactory(name="frank poolé", email=None)
factories.UserFactory(name="heywood floyd", email=None) factories.UserFactory(name="heywood floyd", email=None)
client = APIClient() client = APIClient()
@@ -128,7 +125,7 @@ def test_api_users_authenticated_list_by_name():
# Full query should work # Full query should work
response = client.get( response = client.get(
"/api/v1.0/users/?q=david.bowman@work.com", "/api/v1.0/users/?q=dave",
) )
assert response.status_code == HTTP_200_OK assert response.status_code == HTTP_200_OK
@@ -142,28 +139,16 @@ def test_api_users_authenticated_list_by_name():
user_ids = [user["id"] for user in response.json()["results"]] user_ids = [user["id"] for user in response.json()["results"]]
assert user_ids[0] == str(frank.id) assert user_ids[0] == str(frank.id)
# Result that matches a trigram twice ranks better than result that matches once
response = client.get("/api/v1.0/users/?q=ole") response = client.get("/api/v1.0/users/?q=ole")
assert response.status_code == HTTP_200_OK assert response.status_code == HTTP_200_OK
user_ids = [user["id"] for user in response.json()["results"]] user_ids = [user["id"] for user in response.json()["results"]]
# "Nicole Foole" matches twice on "ole" assert user_ids == [str(frank.id), str(nicole.id)]
assert user_ids == [str(nicole.id), str(frank.id)]
# Even with a low similarity threshold, query should match expected user response = client.get("/api/v1.0/users/?q=oole")
response = client.get("/api/v1.0/users/?q=ool")
assert response.status_code == HTTP_200_OK assert response.status_code == HTTP_200_OK
assert response.json()["results"] == [ assert response.json()["results"] == [
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
{ {
"id": str(frank.id), "id": str(frank.id),
"email": frank.email, "email": frank.email,
@@ -173,6 +158,15 @@ def test_api_users_authenticated_list_by_name():
"language": frank.language, "language": frank.language,
"timezone": str(frank.timezone), "timezone": str(frank.timezone),
}, },
{
"id": str(nicole.id),
"email": nicole.email,
"name": nicole.name,
"is_device": nicole.is_device,
"is_staff": nicole.is_staff,
"language": nicole.language,
"timezone": str(nicole.timezone),
},
] ]
@@ -184,22 +178,18 @@ def test_api_users_authenticated_list_by_name_and_email():
user = factories.UserFactory(email="tester@ministry.fr", name="john doe") user = factories.UserFactory(email="tester@ministry.fr", name="john doe")
nicole = factories.UserFactory(email="nicole_foole@work.com", name="nicole foole") nicole = factories.UserFactory(email="nicole_foole@work.com", name="nicole foole")
frank = factories.UserFactory(email="oleg_poole@work.com", name=None) oleg = factories.UserFactory(email="oleg_poole@work.com", name=None)
david = factories.UserFactory(email=None, name="david role") david = factories.UserFactory(email=None, name="david role")
client = APIClient() client = APIClient()
client.force_login(user) client.force_login(user)
# Result that matches a trigram in name and email ranks better than result that matches once
response = client.get("/api/v1.0/users/?q=ole") response = client.get("/api/v1.0/users/?q=ole")
assert response.status_code == HTTP_200_OK assert response.status_code == HTTP_200_OK
user_ids = [user["id"] for user in response.json()["results"]] user_ids = [user["id"] for user in response.json()["results"]]
# "Nicole Foole" matches twice on "ole" in her name and twice on "ole" in her email assert user_ids == [str(david.id), str(oleg.id), str(nicole.id)]
# "Oleg poole" matches twice on "ole" in her email
# "David role" matches once on "ole" in his name
assert user_ids == [str(nicole.id), str(frank.id), str(david.id)]
def test_api_users_authenticated_list_exclude_users_already_in_team( def test_api_users_authenticated_list_exclude_users_already_in_team(