🚸(backend) make document search on title accent-insensitive
This should work in both cases: - search for "vélo" when the document title contains "velo" - search for "velo" when the document title contains "vélo"
This commit is contained in:
@@ -10,6 +10,7 @@ and this project adheres to
|
||||
|
||||
## Added
|
||||
|
||||
- 🚸(backend) make document search on title accent-insensitive #874
|
||||
- 🚩 add homepage feature flag #861
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""API filters for Impress' core application."""
|
||||
|
||||
import unicodedata
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
import django_filters
|
||||
@@ -7,13 +9,42 @@ import django_filters
|
||||
from core import models
|
||||
|
||||
|
||||
class DocumentFilter(django_filters.FilterSet):
|
||||
def remove_accents(value):
|
||||
"""Remove accents from a string (vélo -> velo)."""
|
||||
return "".join(
|
||||
c
|
||||
for c in unicodedata.normalize("NFD", value)
|
||||
if unicodedata.category(c) != "Mn"
|
||||
)
|
||||
|
||||
|
||||
class AccentInsensitiveCharFilter(django_filters.CharFilter):
|
||||
"""
|
||||
Custom filter for filtering documents.
|
||||
A custom CharFilter that filters on the accent-insensitive value searched.
|
||||
"""
|
||||
|
||||
title = django_filters.CharFilter(
|
||||
field_name="title", lookup_expr="icontains", label=_("Title")
|
||||
def filter(self, qs, value):
|
||||
"""
|
||||
Apply the filter to the queryset using the unaccented version of the field.
|
||||
|
||||
Args:
|
||||
qs: The queryset to filter.
|
||||
value: The value to search for in the unaccented field.
|
||||
Returns:
|
||||
A filtered queryset.
|
||||
"""
|
||||
if value:
|
||||
value = remove_accents(value)
|
||||
return super().filter(qs, value)
|
||||
|
||||
|
||||
class DocumentFilter(django_filters.FilterSet):
|
||||
"""
|
||||
Custom filter for filtering documents on title (accent and case insensitive).
|
||||
"""
|
||||
|
||||
title = AccentInsensitiveCharFilter(
|
||||
field_name="title", lookup_expr="unaccent__icontains", label=_("Title")
|
||||
)
|
||||
|
||||
class Meta:
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
from django.contrib.postgres.operations import UnaccentExtension
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"),
|
||||
]
|
||||
|
||||
operations = [UnaccentExtension()]
|
||||
@@ -7,6 +7,7 @@ from faker import Faker
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories
|
||||
from core.api.filters import remove_accents
|
||||
|
||||
fake = Faker()
|
||||
pytestmark = pytest.mark.django_db
|
||||
@@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field():
|
||||
[
|
||||
("Project Alpha", 1), # Exact match
|
||||
("project", 2), # Partial match (case-insensitive)
|
||||
("Guide", 1), # Word match within a title
|
||||
("Guide", 2), # Word match within a title
|
||||
("Special", 0), # No match (nonexistent keyword)
|
||||
("2024", 2), # Match by numeric keyword
|
||||
("", 5), # Empty string
|
||||
("", 6), # Empty string
|
||||
("velo", 1), # Accent-insensitive match (velo vs vélo)
|
||||
("bêta", 1), # Accent-insensitive match (bêta vs beta)
|
||||
],
|
||||
)
|
||||
def test_api_documents_descendants_filter_title(query, nb_results):
|
||||
"""Authenticated users should be able to search documents by their title."""
|
||||
"""Authenticated users should be able to search documents by their unaccented title."""
|
||||
user = factories.UserFactory()
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
@@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
|
||||
"User Guide",
|
||||
"Financial Report 2024",
|
||||
"Annual Review 2024",
|
||||
"Guide du vélo urbain", # <-- Title with accent for accent-insensitive test
|
||||
]
|
||||
for title in titles:
|
||||
factories.DocumentFactory(title=title, parent=document)
|
||||
@@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
|
||||
|
||||
# Ensure all results contain the query in their title
|
||||
for result in results:
|
||||
assert query.lower().strip() in result["title"].lower()
|
||||
assert (
|
||||
remove_accents(query).lower().strip()
|
||||
in remove_accents(result["title"]).lower()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user