From c24f46067b047321c324952501043f5afedd1c12 Mon Sep 17 00:00:00 2001 From: Fabre Florian Date: Fri, 14 Nov 2025 13:31:09 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(backend)=20adapt=20to=20Find=20new=20?= =?UTF-8?q?search=20pagination?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use nb_results instead of page/page_size argument for /search API. Signed-off-by: Fabre Florian --- docs/env.md | 5 ++- docs/search.md | 2 + src/backend/core/api/viewsets.py | 2 - src/backend/core/services/search_indexers.py | 14 +++---- src/backend/core/tasks/search.py | 8 ++-- .../documents/test_api_documents_search.py | 3 +- .../tests/test_services_search_indexers.py | 42 ++++++++++++++++++- src/backend/impress/settings.py | 3 ++ 8 files changed, 59 insertions(+), 20 deletions(-) diff --git a/docs/env.md b/docs/env.md index f11e6c62..58e9cd92 100644 --- a/docs/env.md +++ b/docs/env.md @@ -90,14 +90,17 @@ These are the environment variables you can set for the `impress-backend` contai | OIDC_RP_SCOPES | Scopes requested for OIDC | openid email | | OIDC_RP_SIGN_ALGO | verification algorithm used OIDC tokens | RS256 | | OIDC_STORE_ID_TOKEN | Store OIDC token | true | -| OIDC_USE_NONCE | Use nonce for OIDC | true | | OIDC_USERINFO_FULLNAME_FIELDS | OIDC token claims to create full name | ["first_name", "last_name"] | | OIDC_USERINFO_SHORTNAME_FIELD | OIDC token claims to create shortname | first_name | +| OIDC_USE_NONCE | Use nonce for OIDC | true | | POSTHOG_KEY | Posthog key for analytics | | | REDIS_URL | Cache url | redis://redis:6379/1 | | SEARCH_INDEXER_BATCH_SIZE | Size of each batch for indexation of all documents | 100000 | +| SEARCH_INDEXER_CLASS | Class of the backend for document indexation & search | | | SEARCH_INDEXER_COUNTDOWN | Minimum debounce delay of indexation jobs (in seconds) | 1 | +| SEARCH_INDEXER_QUERY_LIMIT | Maximum number of results expected from search endpoint | 50 | | SEARCH_INDEXER_SECRET | Token for indexation queries | | +| SEARCH_INDEXER_URL | Find application endpoint for indexation | | | SENTRY_DSN | Sentry host | | | SESSION_COOKIE_AGE | duration of the cookie session | 60*60*12 | | SIGNUP_NEW_USER_TO_MARKETING_EMAIL | Register new user to the marketing onboarding. If True, see env LASUITE_MARKETING_* system | False | diff --git a/docs/search.md b/docs/search.md index 63581081..416f972b 100644 --- a/docs/search.md +++ b/docs/search.md @@ -27,6 +27,8 @@ SEARCH_INDEXER_URL="http://find:8000/api/v1.0/documents/index/" # Search endpoint. Uses the OIDC token for authentication SEARCH_INDEXER_QUERY_URL="http://find:8000/api/v1.0/documents/search/" +# Maximum number of results expected from the search endpoint +SEARCH_INDEXER_QUERY_LIMIT=50 ``` We also need to enable the **OIDC Token** refresh or the authentication will fail quickly. diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 43a39f56..eccbf495 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1119,8 +1119,6 @@ class DocumentViewSet( text=text, token=access_token, visited=get_visited_document_ids_of(queryset, user), - page=1, - page_size=100, ) docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)} diff --git a/src/backend/core/services/search_indexers.py b/src/backend/core/services/search_indexers.py index 424897c9..a4bb9eec 100644 --- a/src/backend/core/services/search_indexers.py +++ b/src/backend/core/services/search_indexers.py @@ -110,6 +110,7 @@ class BaseDocumentIndexer(ABC): self.indexer_url = settings.SEARCH_INDEXER_URL self.indexer_secret = settings.SEARCH_INDEXER_SECRET self.search_url = settings.SEARCH_INDEXER_QUERY_URL + self.search_limit = settings.SEARCH_INDEXER_QUERY_LIMIT if not self.indexer_url: raise ImproperlyConfigured( @@ -184,7 +185,7 @@ class BaseDocumentIndexer(ABC): """ # pylint: disable-next=too-many-arguments,too-many-positional-arguments - def search(self, text, token, visited=(), page=1, page_size=50): + def search(self, text, token, visited=(), nb_results=None): """ Search for documents in Find app. Ensure the same default ordering as "Docs" list : -updated_at @@ -197,20 +198,17 @@ class BaseDocumentIndexer(ABC): visited (list, optional): List of ids of active public documents with LinkTrace Defaults to settings.SEARCH_INDEXER_BATCH_SIZE. - page (int, optional): - The page number to retrieve. - Defaults to 1 if not specified. - page_size (int, optional): - The number of results to return per page. + nb_results (int, optional): + The number of results to return. Defaults to 50 if not specified. """ + nb_results = nb_results or self.search_limit response = self.search_query( data={ "q": text, "visited": visited, "services": ["docs"], - "page_number": page, - "page_size": page_size, + "nb_results": nb_results, "order_by": "updated_at", "order_direction": "desc", }, diff --git a/src/backend/core/tasks/search.py b/src/backend/core/tasks/search.py index ebdf6c15..4b30c6a7 100644 --- a/src/backend/core/tasks/search.py +++ b/src/backend/core/tasks/search.py @@ -23,11 +23,9 @@ def document_indexer_task(document_id): """Celery Task : Sends indexation query for a document.""" indexer = get_document_indexer() - if indexer is None: - return - - logger.info("Start document %s indexation", document_id) - indexer.index(models.Document.objects.filter(pk=document_id)) + if indexer: + logger.info("Start document %s indexation", document_id) + indexer.index(models.Document.objects.filter(pk=document_id)) def batch_indexer_throttle_acquire(timeout: int = 0, atomic: bool = True): diff --git a/src/backend/core/tests/documents/test_api_documents_search.py b/src/backend/core/tests/documents/test_api_documents_search.py index 34766178..c6d0d8e3 100644 --- a/src/backend/core/tests/documents/test_api_documents_search.py +++ b/src/backend/core/tests/documents/test_api_documents_search.py @@ -326,8 +326,7 @@ def test_api_documents_search_pagination( "q": "alpha", "visited": [], "services": ["docs"], - "page_number": 1, - "page_size": 100, + "nb_results": 50, "order_by": "updated_at", "order_direction": "desc", } diff --git a/src/backend/core/tests/test_services_search_indexers.py b/src/backend/core/tests/test_services_search_indexers.py index 0a6daf40..61488a92 100644 --- a/src/backend/core/tests/test_services_search_indexers.py +++ b/src/backend/core/tests/test_services_search_indexers.py @@ -588,10 +588,48 @@ def test_services_search_indexers_search(mock_post, indexer_settings): assert query_data["q"] == "alpha" assert sorted(query_data["visited"]) == sorted([str(doc1.pk), str(doc2.pk)]) assert query_data["services"] == ["docs"] - assert query_data["page_number"] == 1 - assert query_data["page_size"] == 50 + assert query_data["nb_results"] == 50 assert query_data["order_by"] == "updated_at" assert query_data["order_direction"] == "desc" assert kwargs.get("headers") == {"Authorization": "Bearer mytoken"} assert kwargs.get("timeout") == 10 + + +@patch("requests.post") +def test_services_search_indexers_search_nb_results(mock_post, indexer_settings): + """ + Find API call should have nb_results == SEARCH_INDEXER_QUERY_LIMIT + or the given nb_results argument. + """ + indexer_settings.SEARCH_INDEXER_QUERY_LIMIT = 25 + + user = factories.UserFactory() + indexer = SearchIndexer() + + mock_response = mock_post.return_value + mock_response.raise_for_status.return_value = None # No error + + doc1, doc2, _ = factories.DocumentFactory.create_batch(3) + + create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False) + + create_link(document=doc1) + create_link(document=doc2) + + visited = get_visited_document_ids_of(models.Document.objects.all(), user) + + indexer.search("alpha", visited=visited, token="mytoken") + + args, kwargs = mock_post.call_args + + assert args[0] == indexer_settings.SEARCH_INDEXER_QUERY_URL + assert kwargs.get("json")["nb_results"] == 25 + + # The argument overrides the setting value + indexer.search("alpha", visited=visited, token="mytoken", nb_results=109) + + args, kwargs = mock_post.call_args + + assert args[0] == indexer_settings.SEARCH_INDEXER_QUERY_URL + assert kwargs.get("json")["nb_results"] == 109 diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index 1f88d38e..4b456ebb 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -120,6 +120,9 @@ class Base(Configuration): SEARCH_INDEXER_QUERY_URL = values.Value( default=None, environ_name="SEARCH_INDEXER_QUERY_URL", environ_prefix=None ) + SEARCH_INDEXER_QUERY_LIMIT = values.PositiveIntegerValue( + default=50, environ_name="SEARCH_INDEXER_QUERY_LIMIT", environ_prefix=None + ) # Static files (CSS, JavaScript, Images) STATIC_URL = "/static/"