From 019ce99a8623eafeef3899ee9bf94a361bd74de0 Mon Sep 17 00:00:00 2001 From: Quentin BEY Date: Thu, 5 Dec 2024 13:49:41 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(contacts)=20filter=20list=20API=20wit?= =?UTF-8?q?h=20email?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to lookup onto emails for the "magic filter" on the API list endpoint. --- CHANGELOG.md | 1 + src/backend/core/api/client/viewsets.py | 14 +++- .../0009_contacts_add_index_on_data_emails.py | 18 +++++ .../contacts/test_core_api_contacts_list.py | 72 +++++++++++++++++++ src/backend/core/tests/test_utils_raw_sql.py | 19 +++++ src/backend/core/utils/raw_sql.py | 41 +++++++++++ 6 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 src/backend/core/migrations/0009_contacts_add_index_on_data_emails.py create mode 100644 src/backend/core/tests/test_utils_raw_sql.py create mode 100644 src/backend/core/utils/raw_sql.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 2243cac..c91628a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to ### Added +- ✨(contacts) allow filter list API with email - ✨(contacts) return profile contact from same organization - ✨(dimail) automate allows requests to dimail - ✨(contacts) add notes & force full_name #565 diff --git a/src/backend/core/api/client/viewsets.py b/src/backend/core/api/client/viewsets.py index f98f9d9..600101f 100644 --- a/src/backend/core/api/client/viewsets.py +++ b/src/backend/core/api/client/viewsets.py @@ -19,6 +19,7 @@ from rest_framework.permissions import AllowAny from core import models from core.api import permissions from core.api.client import serializers +from core.utils.raw_sql import gen_sql_filter_json_array SIMILARITY_THRESHOLD = 0.04 @@ -157,11 +158,22 @@ class ContactViewSet( overridden_by__isnull=True, ) - # Search by case-insensitive and accent-insensitive similarity + # Search by case-insensitive and accent-insensitive on: + # - full name + # - short name + # - email (from data `emails` field) if query := self.request.GET.get("q", ""): queryset = queryset.filter( Q(full_name__unaccent__icontains=query) | Q(short_name__unaccent__icontains=query) + | Q( + id__in=gen_sql_filter_json_array( + queryset.model, + "data->'emails'", + "value", + query, + ) + ) ) serializer = self.get_serializer(queryset, many=True) diff --git a/src/backend/core/migrations/0009_contacts_add_index_on_data_emails.py b/src/backend/core/migrations/0009_contacts_add_index_on_data_emails.py new file mode 100644 index 0000000..a157636 --- /dev/null +++ b/src/backend/core/migrations/0009_contacts_add_index_on_data_emails.py @@ -0,0 +1,18 @@ +# Generated by Django 5.1.3 on 2024-12-05 13:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0008_change_user_profile_to_contact'), + ] + + operations = [ + # From 100ms to 78ms for 5 000 contacts on my computer + # This is not a big improvement and may need further investigation + migrations.RunSQL( + "CREATE INDEX json_field_index_emails_value ON people_contact USING GIN ((data->'emails'->'value') jsonb_path_ops);" + ) + ] diff --git a/src/backend/core/tests/contacts/test_core_api_contacts_list.py b/src/backend/core/tests/contacts/test_core_api_contacts_list.py index 99b614a..3613861 100644 --- a/src/backend/core/tests/contacts/test_core_api_contacts_list.py +++ b/src/backend/core/tests/contacts/test_core_api_contacts_list.py @@ -133,6 +133,78 @@ def test_api_contacts_list_authenticated_by_full_name(): assert contact_ids == [str(frank.id), str(nicole.id)] +def test_api_contacts_list_authenticated_by_email(): + """ + Authenticated users should be able to search users with a case insensitive and + partial query on the email. + """ + user = factories.UserFactory() + + dave = factories.BaseContactFactory( + full_name="0", # don't match on full name but allow ordering + data={ + "emails": [ + {"type": "Home", "value": "dave@personal.com"}, + {"type": "Work", "value": "david.bowman@example.com"}, + ], + }, + ) + nicole = factories.BaseContactFactory( + full_name="1", # don't match on full name but allow ordering + data={ + "emails": [ + {"type": "Work", "value": "nicole.foole@example.com"}, + ], + }, + ) + frank = factories.BaseContactFactory( + full_name="2", # don't match on full name but allow ordering + data={ + "emails": [ + {"type": "Home", "value": "francky@personal.com"}, + {"type": "Work", "value": "franck.poole@example.com"}, + ], + }, + ) + factories.BaseContactFactory( + full_name="3", # don't match on full name but allow ordering + data={ + "emails": [ + {"type": "Work", "value": "heywood.floyd@example.com"}, + ], + }, + ) + + # Full query should work + client = APIClient() + client.force_login(user) + + response = client.get("/api/v1.0/contacts/?q=david.bowman@example.com") + + assert response.status_code == 200 + contact_ids = [contact["id"] for contact in response.json()] + assert contact_ids == [str(dave.pk)] + + # Partial query should work + response = client.get("/api/v1.0/contacts/?q=anc") + + assert response.status_code == 200 + contact_ids = [contact["id"] for contact in response.json()] + assert contact_ids == [str(frank.pk)] + + response = client.get("/api/v1.0/contacts/?q=olé") # accented + + assert response.status_code == 200 + contact_ids = [contact["id"] for contact in response.json()] + assert contact_ids == [str(nicole.pk), str(frank.pk)] + + response = client.get("/api/v1.0/contacts/?q=oOl") # mixed case + + assert response.status_code == 200 + contact_ids = [contact["id"] for contact in response.json()] + assert contact_ids == [str(nicole.pk), str(frank.pk)] + + def test_api_contacts_list_authenticated_uppercase_content(): """Upper case content should be found by lower case query.""" user = factories.UserFactory() diff --git a/src/backend/core/tests/test_utils_raw_sql.py b/src/backend/core/tests/test_utils_raw_sql.py new file mode 100644 index 0000000..773adf9 --- /dev/null +++ b/src/backend/core/tests/test_utils_raw_sql.py @@ -0,0 +1,19 @@ +"""Tests for the raw SQL utility functions.""" + +from django.db.models.expressions import RawSQL + +from core.models import Contact +from core.utils.raw_sql import gen_sql_filter_json_array + + +def test_gen_sql_filter_json_array(): + """Test the generation of a raw SQL query to filter on a JSON array.""" + raw_sql = gen_sql_filter_json_array(Contact, "data->'emails'", "value", "blah") + + assert isinstance(raw_sql, RawSQL) + assert raw_sql.sql == ( + "SELECT people_contact.id FROM people_contact, " + "jsonb_array_elements(data->'emails') AS temp_filter_table WHERE " + "unaccent(temp_filter_table->>'value') ILIKE unaccent(%s)" + ) + assert raw_sql.params == ["%blah%"] diff --git a/src/backend/core/utils/raw_sql.py b/src/backend/core/utils/raw_sql.py new file mode 100644 index 0000000..00b1f1a --- /dev/null +++ b/src/backend/core/utils/raw_sql.py @@ -0,0 +1,41 @@ +"""Helper functions to generate raw SQL queries for Django models.""" + +from typing import Type + +from django.db import models +from django.db.models.expressions import RawSQL + + +def gen_sql_filter_json_array( + model: Type[models.Model], + lookup_path: str, + nested_key: str, + lookup_value: str, +) -> RawSQL: + """ + Filter a queryset on a nested JSON key in an array field with + a case-insensitive and unaccent match. + + Highly inspired from + https://gist.github.com/TobeTek/df2e9783a64e431c228c513441eaa8df#file-utils-py + + :param Type[models.Model] model: Your Django model to filter on + :param str lookup_path: The lookup path of the array field/key in + Postgres format e.g `data->"sub-key1"->"sub-key2"` + :param str nested_key: The name of the nested key to filter on + :param str lookup_value: The value to match/filter the queryset on + """ + # Disabling S608 Possible SQL injection vector through string-based query construction + # because we are using Django's RawSQL with parameters. + # Disabling S611 Use of `RawSQL` can lead to SQL injection vulnerabilities + # for the same reason. + + table_name = model._meta.db_table # noqa: SLF001 + + query = ( + f"SELECT {table_name}.id FROM {table_name}, " # noqa: S608 + f"jsonb_array_elements({lookup_path}) AS temp_filter_table " + f"WHERE unaccent(temp_filter_table->>'{nested_key}') ILIKE unaccent(%s)" + ) + + return RawSQL(sql=query, params=[f"%{lookup_value}%"]) # noqa: S611