diff --git a/CHANGELOG.md b/CHANGELOG.md index f4f1a643..40e36bf1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to ## Added +- 🚸(backend) make document search on title accent-insensitive #874 - 🚩 add homepage feature flag #861 diff --git a/src/backend/core/api/filters.py b/src/backend/core/api/filters.py index 731deb15..5b0721fb 100644 --- a/src/backend/core/api/filters.py +++ b/src/backend/core/api/filters.py @@ -1,5 +1,7 @@ """API filters for Impress' core application.""" +import unicodedata + from django.utils.translation import gettext_lazy as _ import django_filters @@ -7,13 +9,42 @@ import django_filters from core import models -class DocumentFilter(django_filters.FilterSet): +def remove_accents(value): + """Remove accents from a string (vélo -> velo).""" + return "".join( + c + for c in unicodedata.normalize("NFD", value) + if unicodedata.category(c) != "Mn" + ) + + +class AccentInsensitiveCharFilter(django_filters.CharFilter): """ - Custom filter for filtering documents. + A custom CharFilter that filters on the accent-insensitive value searched. """ - title = django_filters.CharFilter( - field_name="title", lookup_expr="icontains", label=_("Title") + def filter(self, qs, value): + """ + Apply the filter to the queryset using the unaccented version of the field. + + Args: + qs: The queryset to filter. + value: The value to search for in the unaccented field. + Returns: + A filtered queryset. + """ + if value: + value = remove_accents(value) + return super().filter(qs, value) + + +class DocumentFilter(django_filters.FilterSet): + """ + Custom filter for filtering documents on title (accent and case insensitive). + """ + + title = AccentInsensitiveCharFilter( + field_name="title", lookup_expr="unaccent__icontains", label=_("Title") ) class Meta: diff --git a/src/backend/core/migrations/0021_activate_unaccent_extension.py b/src/backend/core/migrations/0021_activate_unaccent_extension.py new file mode 100644 index 00000000..b3bd5ec4 --- /dev/null +++ b/src/backend/core/migrations/0021_activate_unaccent_extension.py @@ -0,0 +1,10 @@ +from django.contrib.postgres.operations import UnaccentExtension +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"), + ] + + operations = [UnaccentExtension()] diff --git a/src/backend/core/tests/documents/test_api_documents_descendants_filters.py b/src/backend/core/tests/documents/test_api_documents_descendants_filters.py index dec34895..342ead70 100644 --- a/src/backend/core/tests/documents/test_api_documents_descendants_filters.py +++ b/src/backend/core/tests/documents/test_api_documents_descendants_filters.py @@ -7,6 +7,7 @@ from faker import Faker from rest_framework.test import APIClient from core import factories +from core.api.filters import remove_accents fake = Faker() pytestmark = pytest.mark.django_db @@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field(): [ ("Project Alpha", 1), # Exact match ("project", 2), # Partial match (case-insensitive) - ("Guide", 1), # Word match within a title + ("Guide", 2), # Word match within a title ("Special", 0), # No match (nonexistent keyword) ("2024", 2), # Match by numeric keyword - ("", 5), # Empty string + ("", 6), # Empty string + ("velo", 1), # Accent-insensitive match (velo vs vélo) + ("bêta", 1), # Accent-insensitive match (bêta vs beta) ], ) def test_api_documents_descendants_filter_title(query, nb_results): - """Authenticated users should be able to search documents by their title.""" + """Authenticated users should be able to search documents by their unaccented title.""" user = factories.UserFactory() client = APIClient() client.force_login(user) @@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results): "User Guide", "Financial Report 2024", "Annual Review 2024", + "Guide du vélo urbain", # <-- Title with accent for accent-insensitive test ] for title in titles: factories.DocumentFactory(title=title, parent=document) @@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results): # Ensure all results contain the query in their title for result in results: - assert query.lower().strip() in result["title"].lower() + assert ( + remove_accents(query).lower().strip() + in remove_accents(result["title"]).lower() + )