🚸(backend) make document search on title accent-insensitive

This should work in both cases: - search for "vélo" when the document title contains "velo" - search for "velo" when the document title contains "vélo"
2025-04-17 18:36:29 +02:00
parent ecd06560c6
commit 419079ac69
4 changed files with 57 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to
 ## Added
 - 🚸(backend) make document search on title accent-insensitive #874
 - 🚩 add homepage feature flag #861
--- a/src/backend/core/api/filters.py
+++ b/src/backend/core/api/filters.py
@@ -1,5 +1,7 @@
 """API filters for Impress' core application."""
 import unicodedata
 from django.utils.translation import gettext_lazy as _
 import django_filters
@@ -7,13 +9,42 @@ import django_filters
 from core import models
-class DocumentFilter(django_filters.FilterSet):
+def remove_accents(value):
    """Remove accents from a string (vélo -> velo)."""
    return "".join(
        c
        for c in unicodedata.normalize("NFD", value)
        if unicodedata.category(c) != "Mn"
    )
 class AccentInsensitiveCharFilter(django_filters.CharFilter):
    """
-    Custom filter for filtering documents.
+    A custom CharFilter that filters on the accent-insensitive value searched.
    """
-    title = django_filters.CharFilter(
+    def filter(self, qs, value):
-        field_name="title", lookup_expr="icontains", label=_("Title")
+        """
        Apply the filter to the queryset using the unaccented version of the field.
        Args:
            qs: The queryset to filter.
            value: The value to search for in the unaccented field.
        Returns:
            A filtered queryset.
        """
        if value:
            value = remove_accents(value)
        return super().filter(qs, value)
 class DocumentFilter(django_filters.FilterSet):
    """
    Custom filter for filtering documents on title (accent and case insensitive).
    """
    title = AccentInsensitiveCharFilter(
        field_name="title", lookup_expr="unaccent__icontains", label=_("Title")
    )
    class Meta:
--- a/src/backend/core/migrations/0021_activate_unaccent_extension.py
+++ b/src/backend/core/migrations/0021_activate_unaccent_extension.py
@@ -0,0 +1,10 @@
 from django.contrib.postgres.operations import UnaccentExtension
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"),
    ]
    operations = [UnaccentExtension()]
--- a/src/backend/core/tests/documents/test_api_documents_descendants_filters.py
+++ b/src/backend/core/tests/documents/test_api_documents_descendants_filters.py
@@ -7,6 +7,7 @@ from faker import Faker
 from rest_framework.test import APIClient
 from core import factories
 from core.api.filters import remove_accents
 fake = Faker()
 pytestmark = pytest.mark.django_db
@@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field():
    [
        ("Project Alpha", 1),  # Exact match
        ("project", 2),  # Partial match (case-insensitive)
-        ("Guide", 1),  # Word match within a title
+        ("Guide", 2),  # Word match within a title
        ("Special", 0),  # No match (nonexistent keyword)
        ("2024", 2),  # Match by numeric keyword
-        ("", 5),  # Empty string
+        ("", 6),  # Empty string
        ("velo", 1),  # Accent-insensitive match (velo vs vélo)
        ("bêta", 1),  # Accent-insensitive match (bêta vs beta)
    ],
 )
 def test_api_documents_descendants_filter_title(query, nb_results):
-    """Authenticated users should be able to search documents by their title."""
+    """Authenticated users should be able to search documents by their unaccented title."""
    user = factories.UserFactory()
    client = APIClient()
    client.force_login(user)
@@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
        "User Guide",
        "Financial Report 2024",
        "Annual Review 2024",
        "Guide du vélo urbain",  # <-- Title with accent for accent-insensitive test
    ]
    for title in titles:
        factories.DocumentFactory(title=title, parent=document)
@@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
    # Ensure all results contain the query in their title
    for result in results:
-        assert query.lower().strip() in result["title"].lower()
+        assert (
            remove_accents(query).lower().strip()
            in remove_accents(result["title"]).lower()
        )