🚸(backend) make document search on title accent-insensitive
This should work in both cases: - search for "vélo" when the document title contains "velo" - search for "velo" when the document title contains "vélo"
This commit is contained in:
@@ -10,6 +10,7 @@ and this project adheres to
|
|||||||
|
|
||||||
## Added
|
## Added
|
||||||
|
|
||||||
|
- 🚸(backend) make document search on title accent-insensitive #874
|
||||||
- 🚩 add homepage feature flag #861
|
- 🚩 add homepage feature flag #861
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
"""API filters for Impress' core application."""
|
"""API filters for Impress' core application."""
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
import django_filters
|
import django_filters
|
||||||
@@ -7,13 +9,42 @@ import django_filters
|
|||||||
from core import models
|
from core import models
|
||||||
|
|
||||||
|
|
||||||
class DocumentFilter(django_filters.FilterSet):
|
def remove_accents(value):
|
||||||
|
"""Remove accents from a string (vélo -> velo)."""
|
||||||
|
return "".join(
|
||||||
|
c
|
||||||
|
for c in unicodedata.normalize("NFD", value)
|
||||||
|
if unicodedata.category(c) != "Mn"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AccentInsensitiveCharFilter(django_filters.CharFilter):
|
||||||
"""
|
"""
|
||||||
Custom filter for filtering documents.
|
A custom CharFilter that filters on the accent-insensitive value searched.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
title = django_filters.CharFilter(
|
def filter(self, qs, value):
|
||||||
field_name="title", lookup_expr="icontains", label=_("Title")
|
"""
|
||||||
|
Apply the filter to the queryset using the unaccented version of the field.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
qs: The queryset to filter.
|
||||||
|
value: The value to search for in the unaccented field.
|
||||||
|
Returns:
|
||||||
|
A filtered queryset.
|
||||||
|
"""
|
||||||
|
if value:
|
||||||
|
value = remove_accents(value)
|
||||||
|
return super().filter(qs, value)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentFilter(django_filters.FilterSet):
|
||||||
|
"""
|
||||||
|
Custom filter for filtering documents on title (accent and case insensitive).
|
||||||
|
"""
|
||||||
|
|
||||||
|
title = AccentInsensitiveCharFilter(
|
||||||
|
field_name="title", lookup_expr="unaccent__icontains", label=_("Title")
|
||||||
)
|
)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
from django.contrib.postgres.operations import UnaccentExtension
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [UnaccentExtension()]
|
||||||
@@ -7,6 +7,7 @@ from faker import Faker
|
|||||||
from rest_framework.test import APIClient
|
from rest_framework.test import APIClient
|
||||||
|
|
||||||
from core import factories
|
from core import factories
|
||||||
|
from core.api.filters import remove_accents
|
||||||
|
|
||||||
fake = Faker()
|
fake = Faker()
|
||||||
pytestmark = pytest.mark.django_db
|
pytestmark = pytest.mark.django_db
|
||||||
@@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field():
|
|||||||
[
|
[
|
||||||
("Project Alpha", 1), # Exact match
|
("Project Alpha", 1), # Exact match
|
||||||
("project", 2), # Partial match (case-insensitive)
|
("project", 2), # Partial match (case-insensitive)
|
||||||
("Guide", 1), # Word match within a title
|
("Guide", 2), # Word match within a title
|
||||||
("Special", 0), # No match (nonexistent keyword)
|
("Special", 0), # No match (nonexistent keyword)
|
||||||
("2024", 2), # Match by numeric keyword
|
("2024", 2), # Match by numeric keyword
|
||||||
("", 5), # Empty string
|
("", 6), # Empty string
|
||||||
|
("velo", 1), # Accent-insensitive match (velo vs vélo)
|
||||||
|
("bêta", 1), # Accent-insensitive match (bêta vs beta)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_api_documents_descendants_filter_title(query, nb_results):
|
def test_api_documents_descendants_filter_title(query, nb_results):
|
||||||
"""Authenticated users should be able to search documents by their title."""
|
"""Authenticated users should be able to search documents by their unaccented title."""
|
||||||
user = factories.UserFactory()
|
user = factories.UserFactory()
|
||||||
client = APIClient()
|
client = APIClient()
|
||||||
client.force_login(user)
|
client.force_login(user)
|
||||||
@@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
|
|||||||
"User Guide",
|
"User Guide",
|
||||||
"Financial Report 2024",
|
"Financial Report 2024",
|
||||||
"Annual Review 2024",
|
"Annual Review 2024",
|
||||||
|
"Guide du vélo urbain", # <-- Title with accent for accent-insensitive test
|
||||||
]
|
]
|
||||||
for title in titles:
|
for title in titles:
|
||||||
factories.DocumentFactory(title=title, parent=document)
|
factories.DocumentFactory(title=title, parent=document)
|
||||||
@@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
|
|||||||
|
|
||||||
# Ensure all results contain the query in their title
|
# Ensure all results contain the query in their title
|
||||||
for result in results:
|
for result in results:
|
||||||
assert query.lower().strip() in result["title"].lower()
|
assert (
|
||||||
|
remove_accents(query).lower().strip()
|
||||||
|
in remove_accents(result["title"]).lower()
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user