From a2a184bb9316070ccd6795d2d7d5cd16d13cb0b0 Mon Sep 17 00:00:00 2001 From: Samuel Paccoud - DINUM Date: Mon, 16 Sep 2024 19:27:48 +0200 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F(api)=20refactor=20getting=20?= =?UTF-8?q?versions=20to=20expose=20pagination?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Getting versions was not working properly. Some versions returned were not accessible by the user requesting the list of available versions. We refactor the code to make it simpler and let the frontend handle pagination (load more style). --- CHANGELOG.md | 4 +- src/backend/core/api/serializers.py | 12 +- src/backend/core/api/viewsets.py | 45 +++-- src/backend/core/models.py | 95 ++++----- .../documents/test_api_document_versions.py | 189 +++++++++++++++--- .../core/tests/test_models_documents.py | 31 ++- 6 files changed, 269 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fef5ecd..ec30b59b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,8 @@ and this project adheres to ## Fixed -- 🐛(backend) gitlab oicd userinfo endpoint #232 - +- 🐛 (backend) gitlab oicd userinfo endpoint #232 +- ♻️ (backend) getting list of document versions available for a user #258 ## [1.4.0] - 2024-09-17 diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 3ef1ebad..e09bfef4 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -343,10 +343,10 @@ class InvitationSerializer(serializers.ModelSerializer): return attrs -class DocumentVersionSerializer(serializers.Serializer): - """Serialize Versions.""" +class VersionFilterSerializer(serializers.Serializer): + """Validate version filters applied to the list endpoint.""" - etag = serializers.CharField() - is_latest = serializers.BooleanField() - last_modified = serializers.DateTimeField() - version_id = serializers.CharField() + version_id = serializers.CharField(required=False, allow_blank=True) + page_size = serializers.IntegerField( + required=False, min_value=1, max_value=50, default=20 + ) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 26093007..e3a0003d 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -10,6 +10,7 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from django.db.models import ( + Min, OuterRef, Q, Subquery, @@ -373,28 +374,36 @@ class DocumentViewSet( Return the document's versions but only those created after the user got access to the document """ - if not request.user.is_authenticated: + user = request.user + if not user.is_authenticated: raise exceptions.PermissionDenied("Authentication required.") + # Validate query parameters using dedicated serializer + serializer = serializers.VersionFilterSerializer(data=request.query_params) + serializer.is_valid(raise_exception=True) + document = self.get_object() - user = request.user - from_datetime = min( - access.created_at - for access in document.accesses.filter( - Q(user=user) | Q(team__in=user.teams), + + # Users should not see version history dating from before they gained access to the + # document. Filter to get the minimum access date for the logged-in user + access_queryset = document.accesses.filter( + Q(user=user) | Q(team__in=user.teams) + ).aggregate(min_date=Min("created_at")) + + # Handle the case where the user has no accesses + min_datetime = access_queryset["min_date"] + if not min_datetime: + return exceptions.PermissionDenied( + "Only users with specific access can see version history" ) + + versions_data = document.get_versions_slice( + from_version_id=serializer.validated_data.get("version_id"), + min_datetime=min_datetime, + page_size=serializer.validated_data.get("page_size"), ) - versions_data = document.get_versions_slice(from_datetime=from_datetime)[ - "versions" - ] - paginator = pagination.PageNumberPagination() - paginated_versions = paginator.paginate_queryset(versions_data, request) - serialized_versions = serializers.DocumentVersionSerializer( - paginated_versions, many=True - ) - - return paginator.get_paginated_response(serialized_versions.data) + return drf_response.Response(versions_data) @decorators.action( detail=True, @@ -414,13 +423,13 @@ class DocumentViewSet( # Don't let users access versions that were created before they were given access # to the document user = request.user - from_datetime = min( + min_datetime = min( access.created_at for access in document.accesses.filter( Q(user=user) | Q(team__in=user.teams), ) ) - if response["LastModified"] < from_datetime: + if response["LastModified"] < min_datetime: raise Http404 if request.method == "DELETE": diff --git a/src/backend/core/models.py b/src/backend/core/models.py index 203c57ab..c4390f97 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -416,73 +416,62 @@ class Document(BaseModel): Bucket=default_storage.bucket_name, Key=self.file_key, VersionId=version_id ) - def get_versions_slice( - self, from_version_id="", from_datetime=None, page_size=None - ): + def get_versions_slice(self, from_version_id="", min_datetime=None, page_size=None): """Get document versions from object storage with pagination and starting conditions""" # /!\ Trick here /!\ # The "KeyMarker" and "VersionIdMarker" fields must either be both set or both not set. # The error we get otherwise is not helpful at all. - token = {} + markers = {} if from_version_id: - token.update( + markers.update( {"KeyMarker": self.file_key, "VersionIdMarker": from_version_id} ) - if from_datetime: - response = default_storage.connection.meta.client.list_object_versions( - Bucket=default_storage.bucket_name, - Prefix=self.file_key, - MaxKeys=settings.DOCUMENT_VERSIONS_PAGE_SIZE, - **token, - ) - - # Find the first version after the given datetime - version = None - for version in response.get("Versions", []): - if version["LastModified"] >= from_datetime: - token = { - "KeyMarker": self.file_key, - "VersionIdMarker": version["VersionId"], - } - break - else: - if version is None or version["LastModified"] < from_datetime: - if response["NextVersionIdMarker"]: - return self.get_versions_slice( - from_version_id=response["NextVersionIdMarker"], - page_size=settings.DOCUMENT_VERSIONS_PAGE_SIZE, - from_datetime=from_datetime, - ) - return { - "next_version_id_marker": "", - "is_truncated": False, - "versions": [], - } + real_page_size = ( + min(page_size, settings.DOCUMENT_VERSIONS_PAGE_SIZE) + if page_size + else settings.DOCUMENT_VERSIONS_PAGE_SIZE + ) response = default_storage.connection.meta.client.list_object_versions( Bucket=default_storage.bucket_name, Prefix=self.file_key, - MaxKeys=min(page_size, settings.DOCUMENT_VERSIONS_PAGE_SIZE) - if page_size - else settings.DOCUMENT_VERSIONS_PAGE_SIZE, - **token, + # compensate the latest version that we exclude below and get one more to + # know if there are more pages + MaxKeys=real_page_size + 2, + **markers, ) + + min_last_modified = min_datetime or self.created_at + versions = [ + { + key_snake: version[key_camel] + for key_snake, key_camel in [ + ("etag", "ETag"), + ("is_latest", "IsLatest"), + ("last_modified", "LastModified"), + ("version_id", "VersionId"), + ] + } + for version in response.get("Versions", []) + if version["LastModified"] >= min_last_modified + and version["IsLatest"] is False + ] + results = versions[:real_page_size] + + count = len(results) + if count == len(versions): + is_truncated = False + next_version_id_marker = "" + else: + is_truncated = True + next_version_id_marker = versions[count - 1]["version_id"] + return { - "next_version_id_marker": response["NextVersionIdMarker"], - "is_truncated": response["IsTruncated"], - "versions": [ - { - key_snake: version[key_camel] - for key_camel, key_snake in [ - ("ETag", "etag"), - ("IsLatest", "is_latest"), - ("LastModified", "last_modified"), - ("VersionId", "version_id"), - ] - } - for version in response.get("Versions", []) - ], + "next_version_id_marker": next_version_id_marker, + "is_truncated": is_truncated, + "versions": results, + "count": count, } def delete_version(self, version_id): diff --git a/src/backend/core/tests/documents/test_api_document_versions.py b/src/backend/core/tests/documents/test_api_document_versions.py index 1fce440c..69181218 100644 --- a/src/backend/core/tests/documents/test_api_document_versions.py +++ b/src/backend/core/tests/documents/test_api_document_versions.py @@ -60,7 +60,7 @@ def test_api_document_versions_list_authenticated_unrelated(reach): @pytest.mark.parametrize("via", VIA) -def test_api_document_versions_list_authenticated_related(via, mock_user_teams): +def test_api_document_versions_list_authenticated_related_success(via, mock_user_teams): """ Authenticated users should be able to list document versions for a document to which they are directly related, whatever their role in the document. @@ -95,12 +95,12 @@ def test_api_document_versions_list_authenticated_related(via, mock_user_teams): assert response.status_code == 200 content = response.json() - assert len(content["results"]) == 0 assert content["count"] == 0 # Add a new version to the document - document.content = "new content" - document.save() + for i in range(3): + document.content = f"new content {i:d}" + document.save() response = client.get( f"/api/v1.0/documents/{document.id!s}/versions/", @@ -108,8 +108,100 @@ def test_api_document_versions_list_authenticated_related(via, mock_user_teams): assert response.status_code == 200 content = response.json() - assert len(content["results"]) == 1 + # The current version is not listed + assert content["count"] == 2 + + +@pytest.mark.parametrize("via", VIA) +def test_api_document_versions_list_authenticated_related_pagination( + via, mock_user_teams +): + """ + The list of versions should be paginated and exclude versions that were created prior to the + user gaining access to the document. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + document = factories.DocumentFactory() + for i in range(3): + document.content = f"before {i:d}" + document.save() + + if via == USER: + models.DocumentAccess.objects.create( + document=document, + user=user, + role=random.choice(models.RoleChoices.choices)[0], + ) + elif via == TEAM: + mock_user_teams.return_value = ["lasuite", "unknown"] + models.DocumentAccess.objects.create( + document=document, + team="lasuite", + role=random.choice(models.RoleChoices.choices)[0], + ) + + for i in range(4): + document.content = f"after {i:d}" + document.save() + + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/", + ) + + content = response.json() + assert content["is_truncated"] is False + # The current version is not listed + assert content["count"] == 3 + assert content["next_version_id_marker"] == "" + all_version_ids = [version["version_id"] for version in content["versions"]] + + # - set page size + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/?page_size=2", + ) + + content = response.json() + assert content["count"] == 2 + assert content["is_truncated"] is True + marker = content["next_version_id_marker"] + assert marker == all_version_ids[1] + assert [ + version["version_id"] for version in content["versions"] + ] == all_version_ids[:2] + + # - get page 2 + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/?page_size=2&version_id={marker:s}", + ) + + content = response.json() assert content["count"] == 1 + assert content["is_truncated"] is False + assert content["next_version_id_marker"] == "" + assert content["versions"][0]["version_id"] == all_version_ids[2] + + +def test_api_document_versions_list_exceeds_max_page_size(): + """Page size should not exceed the limit set on the serializer""" + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + document = factories.DocumentFactory(users=[user]) + document.content = "version 2" + document.save() + + response = client.get(f"/api/v1.0/documents/{document.id!s}/versions/?page_size=51") + + assert response.status_code == 400 + assert response.json() == { + "page_size": ["Ensure this value is less than or equal to 50."] + } @pytest.mark.parametrize("reach", models.LinkReachChoices.values) @@ -119,6 +211,9 @@ def test_api_document_versions_retrieve_anonymous(reach): restricted or authenticated link reach. """ document = factories.DocumentFactory(link_reach=reach) + document.content = "new content" + document.save() + version_id = document.get_versions_slice()["versions"][0]["version_id"] url = f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/" @@ -142,6 +237,9 @@ def test_api_document_versions_retrieve_authenticated_unrelated(reach): client.force_login(user) document = factories.DocumentFactory(link_reach=reach) + document.content = "new content" + document.save() + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.get( @@ -157,7 +255,7 @@ def test_api_document_versions_retrieve_authenticated_unrelated(reach): def test_api_document_versions_retrieve_authenticated_related(via, mock_user_teams): """ A user who is related to a document should be allowed to retrieve the - associated document user accesses. + associated document versions. """ user = factories.UserFactory() @@ -165,6 +263,10 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea client.force_login(user) document = factories.DocumentFactory() + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 version_id = document.get_versions_slice()["versions"][0]["version_id"] if via == USER: @@ -173,6 +275,8 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea mock_user_teams.return_value = ["lasuite", "unknown"] factories.TeamDocumentAccessFactory(document=document, team="lasuite") + time.sleep(1) # minio stores datetimes with the precision of a second + # Versions created before the document was shared should not be seen by the user response = client.get( f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", @@ -180,11 +284,26 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea assert response.status_code == 404 - # Create a new version should make it available to the user - time.sleep(1) # minio stores datetimes with the precision of a second - document.content = "new content" + # Create a new version should not make it available to the user because + # only the current version is available to the user but it is excluded + # from the list + document.content = "new content 1" document.save() + assert len(document.get_versions_slice()["versions"]) == 2 + version_id = document.get_versions_slice()["versions"][0]["version_id"] + + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", + ) + + assert response.status_code == 404 + + # Adding one more version should make the previous version available to the user + document.content = "new content 2" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 3 version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.get( @@ -192,7 +311,7 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea ) assert response.status_code == 200 - assert response.json()["content"] == "new content" + assert response.json()["content"] == "new content 1" def test_api_document_versions_create_anonymous(): @@ -260,10 +379,15 @@ def test_api_document_versions_create_authenticated_related(via, mock_user_teams def test_api_document_versions_update_anonymous(): """Anonymous users should not be allowed to update a document version.""" access = factories.UserDocumentAccessFactory() - version_id = access.document.get_versions_slice()["versions"][0]["version_id"] + document = access.document + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = APIClient().put( - f"/api/v1.0/documents/{access.document_id!s}/versions/{version_id:s}/", + f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", {"foo": "bar"}, format="json", ) @@ -281,7 +405,12 @@ def test_api_document_versions_update_authenticated_unrelated(): client.force_login(user) access = factories.UserDocumentAccessFactory() - version_id = access.document.get_versions_slice()["versions"][0]["version_id"] + document = access.document + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.put( f"/api/v1.0/documents/{access.document_id!s}/versions/{version_id:s}/", @@ -303,7 +432,6 @@ def test_api_document_versions_update_authenticated_related(via, mock_user_teams client.force_login(user) document = factories.DocumentFactory() - version_id = document.get_versions_slice()["versions"][0]["version_id"] if via == USER: factories.UserDocumentAccessFactory(document=document, user=user) @@ -311,6 +439,14 @@ def test_api_document_versions_update_authenticated_related(via, mock_user_teams mock_user_teams.return_value = ["lasuite", "unknown"] factories.TeamDocumentAccessFactory(document=document, team="lasuite") + time.sleep(1) # minio stores datetimes with the precision of a second + + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 + version_id = document.get_versions_slice()["versions"][0]["version_id"] + response = client.put( f"/api/v1.0/documents/{document.id!s}/versions/{version_id!s}/", {"foo": "bar"}, @@ -345,6 +481,9 @@ def test_api_document_versions_delete_authenticated(reach): client.force_login(user) document = factories.DocumentFactory(link_reach=reach) + document.content = "new content" + document.save() + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.delete( @@ -381,13 +520,7 @@ def test_api_document_versions_delete_reader_or_editor(via, role, mock_user_team document.save() versions = document.get_versions_slice()["versions"] - assert len(versions) == 2 - - version_id = versions[1]["version_id"] - response = client.delete( - f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", - ) - assert response.status_code == 403 + assert len(versions) == 1 version_id = versions[0]["version_id"] response = client.delete( @@ -396,7 +529,7 @@ def test_api_document_versions_delete_reader_or_editor(via, role, mock_user_team assert response.status_code == 403 versions = document.get_versions_slice()["versions"] - assert len(versions) == 2 + assert len(versions) == 1 @pytest.mark.parametrize("via", VIA) @@ -421,19 +554,25 @@ def test_api_document_versions_delete_administrator_or_owner(via, mock_user_team # Create a new version should make it available to the user time.sleep(1) # minio stores datetimes with the precision of a second - document.content = "new content" + document.content = "new content 1" document.save() versions = document.get_versions_slice()["versions"] - assert len(versions) == 2 + assert len(versions) == 1 - version_id = versions[1]["version_id"] + version_id = versions[0]["version_id"] response = client.delete( f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", ) # 404 because the version was created before the user was given access to the document assert response.status_code == 404 + document.content = "new content 2" + document.save() + + versions = document.get_versions_slice()["versions"] + assert len(versions) == 2 + version_id = versions[0]["version_id"] response = client.delete( f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index fcb63e52..ed76321f 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -10,6 +10,7 @@ from django.contrib.auth.models import AnonymousUser from django.core import mail from django.core.exceptions import ValidationError from django.core.files.storage import default_storage +from django.utils import timezone import pytest @@ -260,7 +261,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries): } -def test_models_documents_get_versions_slice(settings): +def test_models_documents_get_versions_slice_pagination(settings): """ The "get_versions_slice" method should allow navigating all versions of the document with pagination. @@ -273,7 +274,7 @@ def test_models_documents_get_versions_slice(settings): document.content = f"bar{i:d}" document.save() - # Add a version not related to the first document + # Add a document version not related to the first document factories.DocumentFactory() # - Get default max versions @@ -291,7 +292,7 @@ def test_models_documents_get_versions_slice(settings): from_version_id=response["next_version_id_marker"] ) assert response["is_truncated"] is False - assert len(response["versions"]) == 3 + assert len(response["versions"]) == 2 assert response["next_version_id_marker"] == "" # - Get custom max versions @@ -301,6 +302,30 @@ def test_models_documents_get_versions_slice(settings): assert response["next_version_id_marker"] != "" +def test_models_documents_get_versions_slice_min_datetime(): + """ + The "get_versions_slice" method should filter out versions anterior to + the from_datetime passed in argument and the current version. + """ + document = factories.DocumentFactory() + from_dt = [] + for i in range(6): + from_dt.append(timezone.now()) + document.content = f"bar{i:d}" + document.save() + + response = document.get_versions_slice(min_datetime=from_dt[2]) + + assert len(response["versions"]) == 3 + for version in response["versions"]: + assert version["last_modified"] > from_dt[2] + + response = document.get_versions_slice(min_datetime=from_dt[4]) + + assert len(response["versions"]) == 1 + assert response["versions"][0]["last_modified"] > from_dt[4] + + def test_models_documents_version_duplicate(): """A new version should be created in object storage only if the content has changed.""" document = factories.DocumentFactory()