✨(backend) add document search view
New API view that calls the indexed documents search view (resource server) of app "Find". Signed-off-by: Fabre Florian <ffabre@hybird.org>
This commit is contained in:
committed by
Quentin BEY
parent
3228f65092
commit
d721b97f68
@@ -49,6 +49,11 @@ LOGOUT_REDIRECT_URL=http://localhost:3000
|
||||
OIDC_REDIRECT_ALLOWED_HOSTS=["http://localhost:8083", "http://localhost:3000"]
|
||||
OIDC_AUTH_REQUEST_EXTRA_PARAMS={"acr_values": "eidas1"}
|
||||
|
||||
# Store OIDC tokens in the session
|
||||
OIDC_STORE_ACCESS_TOKEN = True # Store the access token in the session
|
||||
OIDC_STORE_REFRESH_TOKEN = True # Store the encrypted refresh token in the session
|
||||
OIDC_STORE_REFRESH_TOKEN_KEY = AnExampleKeyForDevPurposeOnly
|
||||
|
||||
# AI
|
||||
AI_FEATURE_ENABLED=true
|
||||
AI_BASE_URL=https://openaiendpoint.com
|
||||
|
||||
@@ -1013,3 +1013,17 @@ class ThreadSerializer(serializers.ModelSerializer):
|
||||
if request:
|
||||
return thread.get_abilities(request.user)
|
||||
return {}
|
||||
|
||||
|
||||
class FindDocumentSerializer(serializers.Serializer):
|
||||
"""Serializer for Find search requests"""
|
||||
|
||||
q = serializers.CharField(required=True)
|
||||
|
||||
def validate_q(self, value):
|
||||
"""Ensure the text field is not empty."""
|
||||
|
||||
if len(value.strip()) == 0:
|
||||
raise serializers.ValidationError("Text field cannot be empty.")
|
||||
|
||||
return value
|
||||
|
||||
@@ -23,6 +23,7 @@ from django.db.models.functions import Greatest, Left, Length
|
||||
from django.http import Http404, StreamingHttpResponse
|
||||
from django.urls import reverse
|
||||
from django.utils import timezone
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils.text import capfirst, slugify
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
@@ -33,6 +34,7 @@ from botocore.exceptions import ClientError
|
||||
from csp.constants import NONE
|
||||
from csp.decorators import csp_update
|
||||
from lasuite.malware_detection import malware_detection
|
||||
from lasuite.oidc_login.decorators import refresh_oidc_access_token
|
||||
from rest_framework import filters, status, viewsets
|
||||
from rest_framework import response as drf_response
|
||||
from rest_framework.permissions import AllowAny
|
||||
@@ -50,6 +52,7 @@ from core.services.converter_services import (
|
||||
from core.services.converter_services import (
|
||||
YdocConverter,
|
||||
)
|
||||
from core.services.search_indexers import FindDocumentIndexer
|
||||
from core.tasks.mail import send_ask_for_access_mail
|
||||
from core.utils import extract_attachments, filter_descendants
|
||||
|
||||
@@ -387,6 +390,7 @@ class DocumentViewSet(
|
||||
list_serializer_class = serializers.ListDocumentSerializer
|
||||
trashbin_serializer_class = serializers.ListDocumentSerializer
|
||||
tree_serializer_class = serializers.ListDocumentSerializer
|
||||
search_serializer_class = serializers.ListDocumentSerializer
|
||||
|
||||
def get_queryset(self):
|
||||
"""Get queryset performing all annotation and filtering on the document tree structure."""
|
||||
@@ -1078,10 +1082,37 @@ class DocumentViewSet(
|
||||
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
|
||||
)
|
||||
|
||||
# TODO
|
||||
# @drf.decorators.action(detail=False, methods=["get"])
|
||||
# def search(self, request, *args, **kwargs):
|
||||
# index.search()
|
||||
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
|
||||
@method_decorator(refresh_oidc_access_token)
|
||||
def search(self, request, *args, **kwargs):
|
||||
"""
|
||||
Returns a DRF response containing the filtered, annotated and ordered document list.
|
||||
The filtering allows full text search through the opensearch indexation app "find".
|
||||
"""
|
||||
access_token = request.session.get("oidc_access_token")
|
||||
|
||||
serializer = serializers.FindDocumentSerializer(data=request.query_params)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
|
||||
try:
|
||||
indexer = FindDocumentIndexer()
|
||||
queryset = indexer.search(
|
||||
text=serializer.validated_data.get("q", ""),
|
||||
user=request.user,
|
||||
token=access_token,
|
||||
)
|
||||
except RuntimeError:
|
||||
return drf.response.Response(
|
||||
{"detail": "The service is not configured properly."},
|
||||
status=status.HTTP_401_UNAUTHORIZED,
|
||||
)
|
||||
|
||||
return self.get_response_for_queryset(
|
||||
queryset,
|
||||
context={
|
||||
"request": request,
|
||||
},
|
||||
)
|
||||
|
||||
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
|
||||
def versions_list(self, request, *args, **kwargs):
|
||||
|
||||
@@ -41,8 +41,8 @@ from .choices import (
|
||||
RoleChoices,
|
||||
get_equivalent_link_definition,
|
||||
)
|
||||
from .validators import sub_validator
|
||||
from .tasks.find import trigger_document_indexer
|
||||
from .validators import sub_validator
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
@@ -48,19 +48,19 @@ def get_batch_accesses_by_users_and_teams(paths):
|
||||
|
||||
|
||||
def get_visited_document_ids_of(user):
|
||||
"""
|
||||
Returns the ids of the documents that have a linktrace to the user and NOT owned.
|
||||
It will be use to limit the opensearch responses to the public documents already
|
||||
"visited" by the user.
|
||||
"""
|
||||
if isinstance(user, AnonymousUser):
|
||||
return []
|
||||
|
||||
# TODO : exclude links when user already have a specific access to the doc
|
||||
qs = models.LinkTrace.objects.filter(
|
||||
user=user
|
||||
).exclude(
|
||||
qs = models.LinkTrace.objects.filter(user=user).exclude(
|
||||
document__accesses__user=user,
|
||||
)
|
||||
|
||||
return list({
|
||||
str(id) for id in qs.values_list("document_id", flat=True)
|
||||
})
|
||||
return list({str(id) for id in qs.values_list("document_id", flat=True)})
|
||||
|
||||
|
||||
class BaseDocumentIndexer(ABC):
|
||||
@@ -129,13 +129,14 @@ class BaseDocumentIndexer(ABC):
|
||||
"""
|
||||
visited_ids = get_visited_document_ids_of(user)
|
||||
|
||||
response = self.search_query(data={
|
||||
"q": text,
|
||||
"visited": visited_ids,
|
||||
"services": ["docs"],
|
||||
}, token=token)
|
||||
|
||||
print(response)
|
||||
response = self.search_query(
|
||||
data={
|
||||
"q": text,
|
||||
"visited": visited_ids,
|
||||
"services": ["docs"],
|
||||
},
|
||||
token=token,
|
||||
)
|
||||
|
||||
return self.format_response(response)
|
||||
|
||||
@@ -207,7 +208,7 @@ class FindDocumentIndexer(BaseDocumentIndexer):
|
||||
|
||||
if not url:
|
||||
raise RuntimeError(
|
||||
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
|
||||
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -228,9 +229,7 @@ class FindDocumentIndexer(BaseDocumentIndexer):
|
||||
"""
|
||||
Retrieve documents ids from Find app response and return a queryset.
|
||||
"""
|
||||
return models.Document.objects.filter(pk__in=[
|
||||
d['_id'] for d in data
|
||||
])
|
||||
return models.Document.objects.filter(pk__in=[d["_id"] for d in data])
|
||||
|
||||
def push(self, data):
|
||||
"""
|
||||
|
||||
@@ -86,7 +86,8 @@ def trigger_document_indexer(document, on_commit=False):
|
||||
|
||||
logger.info(
|
||||
"Add task for document %s indexation in %.2f seconds",
|
||||
document.pk, countdown
|
||||
document.pk,
|
||||
countdown,
|
||||
)
|
||||
|
||||
# Each time this method is called during the countdown, we increment the
|
||||
|
||||
@@ -21,7 +21,7 @@ def test_index():
|
||||
|
||||
with transaction.atomic():
|
||||
doc = factories.DocumentFactory()
|
||||
empty_doc = factories.DocumentFactory(title=None, content='')
|
||||
empty_doc = factories.DocumentFactory(title=None, content="")
|
||||
no_title_doc = factories.DocumentFactory(title=None)
|
||||
|
||||
factories.UserDocumentAccessFactory(document=doc, user=user)
|
||||
@@ -43,7 +43,10 @@ def test_index():
|
||||
push_call_args = [call.args[0] for call in mock_push.call_args_list]
|
||||
|
||||
assert len(push_call_args) == 1 # called once but with a batch of docs
|
||||
assert sorted(push_call_args[0], key=sortkey) == sorted([
|
||||
indexer.serialize_document(doc, accesses),
|
||||
indexer.serialize_document(no_title_doc, accesses),
|
||||
], key=sortkey)
|
||||
assert sorted(push_call_args[0], key=sortkey) == sorted(
|
||||
[
|
||||
indexer.serialize_document(doc, accesses),
|
||||
indexer.serialize_document(no_title_doc, accesses),
|
||||
],
|
||||
key=sortkey,
|
||||
)
|
||||
|
||||
137
src/backend/core/tests/documents/test_api_documents_search.py
Normal file
137
src/backend/core/tests/documents/test_api_documents_search.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
Tests for Documents API endpoint in impress's core app: list
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import responses
|
||||
from faker import Faker
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories, models
|
||||
|
||||
fake = Faker()
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
|
||||
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
|
||||
@responses.activate
|
||||
def test_api_documents_search_anonymous(reach, role, settings):
|
||||
"""
|
||||
Anonymous users should not be allowed to search documents whatever the
|
||||
link reach and link role
|
||||
"""
|
||||
factories.DocumentFactory(link_reach=reach, link_role=role)
|
||||
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||
|
||||
factories.DocumentFactory(link_reach=reach, link_role=role)
|
||||
|
||||
# Find response
|
||||
responses.add(
|
||||
responses.POST,
|
||||
"http://find/api/v1.0/search",
|
||||
json=[],
|
||||
status=200,
|
||||
)
|
||||
|
||||
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {
|
||||
"count": 0,
|
||||
"next": None,
|
||||
"previous": None,
|
||||
"results": [],
|
||||
}
|
||||
|
||||
|
||||
def test_api_documents_search_endpoint_is_none(settings):
|
||||
"""Missing SEARCH_INDEXER_QUERY_URL should throw an error"""
|
||||
settings.SEARCH_INDEXER_QUERY_URL = None
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
|
||||
|
||||
assert response.status_code == 401
|
||||
assert response.json() == {"detail": "The service is not configured properly."}
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_api_documents_search_invalid_params(settings):
|
||||
"""Validate the format of documents as returned by the search view."""
|
||||
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
response = APIClient().get("/api/v1.0/documents/search/")
|
||||
|
||||
assert response.status_code == 400
|
||||
assert response.json() == {"q": ["This field is required."]}
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_api_documents_search_format(settings):
|
||||
"""Validate the format of documents as returned by the search view."""
|
||||
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
|
||||
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
user_a, user_b, user_c = factories.UserFactory.create_batch(3)
|
||||
document = factories.DocumentFactory(
|
||||
title="alpha",
|
||||
users=(user_a, user_c),
|
||||
link_traces=(user, user_b),
|
||||
)
|
||||
access = factories.UserDocumentAccessFactory(document=document, user=user)
|
||||
|
||||
# Find response
|
||||
responses.add(
|
||||
responses.POST,
|
||||
"http://find/api/v1.0/search",
|
||||
json=[
|
||||
{"_id": str(document.pk)},
|
||||
],
|
||||
status=200,
|
||||
)
|
||||
response = client.get("/api/v1.0/documents/search/", data={"q": "alpha"})
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.json()
|
||||
results = content.pop("results")
|
||||
assert content == {
|
||||
"count": 1,
|
||||
"next": None,
|
||||
"previous": None,
|
||||
}
|
||||
assert len(results) == 1
|
||||
assert results[0] == {
|
||||
"id": str(document.id),
|
||||
"abilities": document.get_abilities(user),
|
||||
"ancestors_link_reach": None,
|
||||
"ancestors_link_role": None,
|
||||
"computed_link_reach": document.computed_link_reach,
|
||||
"computed_link_role": document.computed_link_role,
|
||||
"created_at": document.created_at.isoformat().replace("+00:00", "Z"),
|
||||
"creator": str(document.creator.id),
|
||||
"depth": 1,
|
||||
"excerpt": document.excerpt,
|
||||
"link_reach": document.link_reach,
|
||||
"link_role": document.link_role,
|
||||
"nb_accesses_ancestors": 3,
|
||||
"nb_accesses_direct": 3,
|
||||
"numchild": 0,
|
||||
"path": document.path,
|
||||
"title": document.title,
|
||||
"updated_at": document.updated_at.isoformat().replace("+00:00", "Z"),
|
||||
"user_role": access.role,
|
||||
}
|
||||
@@ -1,11 +1,17 @@
|
||||
"""Tests for Documents search indexers"""
|
||||
|
||||
from functools import partial
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
|
||||
import pytest
|
||||
|
||||
from core import factories, utils
|
||||
from core.services.search_indexers import FindDocumentIndexer
|
||||
from core import factories, models, utils
|
||||
from core.services.search_indexers import (
|
||||
FindDocumentIndexer,
|
||||
get_visited_document_ids_of,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
@@ -187,7 +193,6 @@ def test_services_search_indexers_ancestors_link_reach(mock_push):
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
|
||||
seen_doc_ids = set()
|
||||
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
|
||||
assert len(results) == 4
|
||||
assert results[str(great_grand_parent.id)]["reach"] == "restricted"
|
||||
@@ -207,7 +212,6 @@ def test_services_search_indexers_ancestors_users(mock_push):
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
|
||||
seen_doc_ids = set()
|
||||
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
|
||||
assert len(results) == 3
|
||||
assert results[str(grand_parent.id)]["users"] == [str(user_gp.sub)]
|
||||
@@ -228,7 +232,6 @@ def test_services_search_indexers_ancestors_teams(mock_push):
|
||||
|
||||
FindDocumentIndexer().index()
|
||||
|
||||
seen_doc_ids = set()
|
||||
results = {doc["id"]: doc for doc in mock_push.call_args[0][0]}
|
||||
assert len(results) == 3
|
||||
assert results[str(grand_parent.id)]["groups"] == ["team_gp"]
|
||||
@@ -258,3 +261,85 @@ def test_push_uses_correct_url_and_data(mock_post, settings):
|
||||
assert args[0] == settings.SEARCH_INDEXER_URL
|
||||
assert kwargs.get("json") == sample_data
|
||||
assert kwargs.get("timeout") == 10
|
||||
|
||||
|
||||
def test_get_visited_document_ids_of():
|
||||
"""
|
||||
get_visited_document_ids_of() returns the ids of the documents viewed
|
||||
by the user BUT without specific access configuration (like public ones)
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
other = factories.UserFactory()
|
||||
anonymous = AnonymousUser()
|
||||
|
||||
assert not get_visited_document_ids_of(anonymous)
|
||||
assert not get_visited_document_ids_of(user)
|
||||
|
||||
doc1, doc2, _ = factories.DocumentFactory.create_batch(3)
|
||||
|
||||
create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False)
|
||||
|
||||
create_link(document=doc1)
|
||||
create_link(document=doc2)
|
||||
|
||||
# The third document is not visited
|
||||
assert sorted(get_visited_document_ids_of(user)) == sorted(
|
||||
[str(doc1.pk), str(doc2.pk)]
|
||||
)
|
||||
|
||||
factories.UserDocumentAccessFactory(user=other, document=doc1)
|
||||
factories.UserDocumentAccessFactory(user=user, document=doc2)
|
||||
|
||||
# The second document have an access for the user
|
||||
assert get_visited_document_ids_of(user) == [str(doc1.pk)]
|
||||
|
||||
|
||||
@patch("requests.post")
|
||||
def test_services_search_indexers_search(mock_post, settings):
|
||||
"""
|
||||
search() should call requests.post to SEARCH_INDEXER_QUERY_URL with the
|
||||
document ids from linktraces.
|
||||
"""
|
||||
user = factories.UserFactory()
|
||||
indexer = FindDocumentIndexer()
|
||||
|
||||
mock_response = mock_post.return_value
|
||||
mock_response.raise_for_status.return_value = None # No error
|
||||
|
||||
doc1, doc2, _ = factories.DocumentFactory.create_batch(3)
|
||||
|
||||
create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False)
|
||||
|
||||
create_link(document=doc1)
|
||||
create_link(document=doc2)
|
||||
|
||||
indexer.search("alpha", user=user, token="mytoken")
|
||||
|
||||
args, kwargs = mock_post.call_args
|
||||
|
||||
assert args[0] == settings.SEARCH_INDEXER_QUERY_URL
|
||||
|
||||
query_data = kwargs.get("json")
|
||||
assert query_data["q"] == "alpha"
|
||||
assert sorted(query_data["visited"]) == sorted([str(doc1.pk), str(doc2.pk)])
|
||||
assert query_data["services"] == ["docs"]
|
||||
|
||||
assert kwargs.get("headers") == {"Authorization": "Bearer mytoken"}
|
||||
assert kwargs.get("timeout") == 10
|
||||
|
||||
|
||||
def test_search_query_raises_error_if_search_endpoint_is_none(settings):
|
||||
"""
|
||||
Indexer should raise RuntimeError if SEARCH_INDEXER_QUERY_URL is None or empty.
|
||||
"""
|
||||
settings.SEARCH_INDEXER_QUERY_URL = None
|
||||
indexer = FindDocumentIndexer()
|
||||
user = factories.UserFactory()
|
||||
|
||||
with pytest.raises(RuntimeError) as exc_info:
|
||||
indexer.search("alpha", user=user, token="mytoken")
|
||||
|
||||
assert (
|
||||
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
|
||||
in str(exc_info.value)
|
||||
)
|
||||
|
||||
@@ -109,6 +109,9 @@ class Base(Configuration):
|
||||
SEARCH_INDEXER_SECRET = values.Value(
|
||||
default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None
|
||||
)
|
||||
SEARCH_INDEXER_QUERY_URL = values.Value(
|
||||
default=None, environ_name="SEARCH_INDEXER_QUERY_URL", environ_prefix=None
|
||||
)
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
STATIC_URL = "/static/"
|
||||
|
||||
Reference in New Issue
Block a user