🚸(backend) make document search on title accent-insensitive

This should work in both cases:
- search for "vélo" when the document title contains "velo"
- search for "velo" when the document title contains "vélo"
This commit is contained in:
Samuel Paccoud - DINUM
2025-04-17 18:36:29 +02:00
parent ecd06560c6
commit 419079ac69
4 changed files with 57 additions and 8 deletions

View File

@@ -10,6 +10,7 @@ and this project adheres to
## Added
- 🚸(backend) make document search on title accent-insensitive #874
- 🚩 add homepage feature flag #861

View File

@@ -1,5 +1,7 @@
"""API filters for Impress' core application."""
import unicodedata
from django.utils.translation import gettext_lazy as _
import django_filters
@@ -7,13 +9,42 @@ import django_filters
from core import models
class DocumentFilter(django_filters.FilterSet):
def remove_accents(value):
"""Remove accents from a string (vélo -> velo)."""
return "".join(
c
for c in unicodedata.normalize("NFD", value)
if unicodedata.category(c) != "Mn"
)
class AccentInsensitiveCharFilter(django_filters.CharFilter):
"""
Custom filter for filtering documents.
A custom CharFilter that filters on the accent-insensitive value searched.
"""
title = django_filters.CharFilter(
field_name="title", lookup_expr="icontains", label=_("Title")
def filter(self, qs, value):
"""
Apply the filter to the queryset using the unaccented version of the field.
Args:
qs: The queryset to filter.
value: The value to search for in the unaccented field.
Returns:
A filtered queryset.
"""
if value:
value = remove_accents(value)
return super().filter(qs, value)
class DocumentFilter(django_filters.FilterSet):
"""
Custom filter for filtering documents on title (accent and case insensitive).
"""
title = AccentInsensitiveCharFilter(
field_name="title", lookup_expr="unaccent__icontains", label=_("Title")
)
class Meta:

View File

@@ -0,0 +1,10 @@
from django.contrib.postgres.operations import UnaccentExtension
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"),
]
operations = [UnaccentExtension()]

View File

@@ -7,6 +7,7 @@ from faker import Faker
from rest_framework.test import APIClient
from core import factories
from core.api.filters import remove_accents
fake = Faker()
pytestmark = pytest.mark.django_db
@@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field():
[
("Project Alpha", 1), # Exact match
("project", 2), # Partial match (case-insensitive)
("Guide", 1), # Word match within a title
("Guide", 2), # Word match within a title
("Special", 0), # No match (nonexistent keyword)
("2024", 2), # Match by numeric keyword
("", 5), # Empty string
("", 6), # Empty string
("velo", 1), # Accent-insensitive match (velo vs vélo)
("bêta", 1), # Accent-insensitive match (bêta vs beta)
],
)
def test_api_documents_descendants_filter_title(query, nb_results):
"""Authenticated users should be able to search documents by their title."""
"""Authenticated users should be able to search documents by their unaccented title."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
@@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
"User Guide",
"Financial Report 2024",
"Annual Review 2024",
"Guide du vélo urbain", # <-- Title with accent for accent-insensitive test
]
for title in titles:
factories.DocumentFactory(title=title, parent=document)
@@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
# Ensure all results contain the query in their title
for result in results:
assert query.lower().strip() in result["title"].lower()
assert (
remove_accents(query).lower().strip()
in remove_accents(result["title"]).lower()
)