diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fef5ecd..ec30b59b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,8 @@ and this project adheres to ## Fixed -- 🐛(backend) gitlab oicd userinfo endpoint #232 - +- 🐛 (backend) gitlab oicd userinfo endpoint #232 +- ♻️ (backend) getting list of document versions available for a user #258 ## [1.4.0] - 2024-09-17 diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 3ef1ebad..e09bfef4 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -343,10 +343,10 @@ class InvitationSerializer(serializers.ModelSerializer): return attrs -class DocumentVersionSerializer(serializers.Serializer): - """Serialize Versions.""" +class VersionFilterSerializer(serializers.Serializer): + """Validate version filters applied to the list endpoint.""" - etag = serializers.CharField() - is_latest = serializers.BooleanField() - last_modified = serializers.DateTimeField() - version_id = serializers.CharField() + version_id = serializers.CharField(required=False, allow_blank=True) + page_size = serializers.IntegerField( + required=False, min_value=1, max_value=50, default=20 + ) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 26093007..e3a0003d 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -10,6 +10,7 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from django.db.models import ( + Min, OuterRef, Q, Subquery, @@ -373,28 +374,36 @@ class DocumentViewSet( Return the document's versions but only those created after the user got access to the document """ - if not request.user.is_authenticated: + user = request.user + if not user.is_authenticated: raise exceptions.PermissionDenied("Authentication required.") + # Validate query parameters using dedicated serializer + serializer = serializers.VersionFilterSerializer(data=request.query_params) + serializer.is_valid(raise_exception=True) + document = self.get_object() - user = request.user - from_datetime = min( - access.created_at - for access in document.accesses.filter( - Q(user=user) | Q(team__in=user.teams), + + # Users should not see version history dating from before they gained access to the + # document. Filter to get the minimum access date for the logged-in user + access_queryset = document.accesses.filter( + Q(user=user) | Q(team__in=user.teams) + ).aggregate(min_date=Min("created_at")) + + # Handle the case where the user has no accesses + min_datetime = access_queryset["min_date"] + if not min_datetime: + return exceptions.PermissionDenied( + "Only users with specific access can see version history" ) + + versions_data = document.get_versions_slice( + from_version_id=serializer.validated_data.get("version_id"), + min_datetime=min_datetime, + page_size=serializer.validated_data.get("page_size"), ) - versions_data = document.get_versions_slice(from_datetime=from_datetime)[ - "versions" - ] - paginator = pagination.PageNumberPagination() - paginated_versions = paginator.paginate_queryset(versions_data, request) - serialized_versions = serializers.DocumentVersionSerializer( - paginated_versions, many=True - ) - - return paginator.get_paginated_response(serialized_versions.data) + return drf_response.Response(versions_data) @decorators.action( detail=True, @@ -414,13 +423,13 @@ class DocumentViewSet( # Don't let users access versions that were created before they were given access # to the document user = request.user - from_datetime = min( + min_datetime = min( access.created_at for access in document.accesses.filter( Q(user=user) | Q(team__in=user.teams), ) ) - if response["LastModified"] < from_datetime: + if response["LastModified"] < min_datetime: raise Http404 if request.method == "DELETE": diff --git a/src/backend/core/models.py b/src/backend/core/models.py index 203c57ab..c4390f97 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -416,73 +416,62 @@ class Document(BaseModel): Bucket=default_storage.bucket_name, Key=self.file_key, VersionId=version_id ) - def get_versions_slice( - self, from_version_id="", from_datetime=None, page_size=None - ): + def get_versions_slice(self, from_version_id="", min_datetime=None, page_size=None): """Get document versions from object storage with pagination and starting conditions""" # /!\ Trick here /!\ # The "KeyMarker" and "VersionIdMarker" fields must either be both set or both not set. # The error we get otherwise is not helpful at all. - token = {} + markers = {} if from_version_id: - token.update( + markers.update( {"KeyMarker": self.file_key, "VersionIdMarker": from_version_id} ) - if from_datetime: - response = default_storage.connection.meta.client.list_object_versions( - Bucket=default_storage.bucket_name, - Prefix=self.file_key, - MaxKeys=settings.DOCUMENT_VERSIONS_PAGE_SIZE, - **token, - ) - - # Find the first version after the given datetime - version = None - for version in response.get("Versions", []): - if version["LastModified"] >= from_datetime: - token = { - "KeyMarker": self.file_key, - "VersionIdMarker": version["VersionId"], - } - break - else: - if version is None or version["LastModified"] < from_datetime: - if response["NextVersionIdMarker"]: - return self.get_versions_slice( - from_version_id=response["NextVersionIdMarker"], - page_size=settings.DOCUMENT_VERSIONS_PAGE_SIZE, - from_datetime=from_datetime, - ) - return { - "next_version_id_marker": "", - "is_truncated": False, - "versions": [], - } + real_page_size = ( + min(page_size, settings.DOCUMENT_VERSIONS_PAGE_SIZE) + if page_size + else settings.DOCUMENT_VERSIONS_PAGE_SIZE + ) response = default_storage.connection.meta.client.list_object_versions( Bucket=default_storage.bucket_name, Prefix=self.file_key, - MaxKeys=min(page_size, settings.DOCUMENT_VERSIONS_PAGE_SIZE) - if page_size - else settings.DOCUMENT_VERSIONS_PAGE_SIZE, - **token, + # compensate the latest version that we exclude below and get one more to + # know if there are more pages + MaxKeys=real_page_size + 2, + **markers, ) + + min_last_modified = min_datetime or self.created_at + versions = [ + { + key_snake: version[key_camel] + for key_snake, key_camel in [ + ("etag", "ETag"), + ("is_latest", "IsLatest"), + ("last_modified", "LastModified"), + ("version_id", "VersionId"), + ] + } + for version in response.get("Versions", []) + if version["LastModified"] >= min_last_modified + and version["IsLatest"] is False + ] + results = versions[:real_page_size] + + count = len(results) + if count == len(versions): + is_truncated = False + next_version_id_marker = "" + else: + is_truncated = True + next_version_id_marker = versions[count - 1]["version_id"] + return { - "next_version_id_marker": response["NextVersionIdMarker"], - "is_truncated": response["IsTruncated"], - "versions": [ - { - key_snake: version[key_camel] - for key_camel, key_snake in [ - ("ETag", "etag"), - ("IsLatest", "is_latest"), - ("LastModified", "last_modified"), - ("VersionId", "version_id"), - ] - } - for version in response.get("Versions", []) - ], + "next_version_id_marker": next_version_id_marker, + "is_truncated": is_truncated, + "versions": results, + "count": count, } def delete_version(self, version_id): diff --git a/src/backend/core/tests/documents/test_api_document_versions.py b/src/backend/core/tests/documents/test_api_document_versions.py index 1fce440c..69181218 100644 --- a/src/backend/core/tests/documents/test_api_document_versions.py +++ b/src/backend/core/tests/documents/test_api_document_versions.py @@ -60,7 +60,7 @@ def test_api_document_versions_list_authenticated_unrelated(reach): @pytest.mark.parametrize("via", VIA) -def test_api_document_versions_list_authenticated_related(via, mock_user_teams): +def test_api_document_versions_list_authenticated_related_success(via, mock_user_teams): """ Authenticated users should be able to list document versions for a document to which they are directly related, whatever their role in the document. @@ -95,12 +95,12 @@ def test_api_document_versions_list_authenticated_related(via, mock_user_teams): assert response.status_code == 200 content = response.json() - assert len(content["results"]) == 0 assert content["count"] == 0 # Add a new version to the document - document.content = "new content" - document.save() + for i in range(3): + document.content = f"new content {i:d}" + document.save() response = client.get( f"/api/v1.0/documents/{document.id!s}/versions/", @@ -108,8 +108,100 @@ def test_api_document_versions_list_authenticated_related(via, mock_user_teams): assert response.status_code == 200 content = response.json() - assert len(content["results"]) == 1 + # The current version is not listed + assert content["count"] == 2 + + +@pytest.mark.parametrize("via", VIA) +def test_api_document_versions_list_authenticated_related_pagination( + via, mock_user_teams +): + """ + The list of versions should be paginated and exclude versions that were created prior to the + user gaining access to the document. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + document = factories.DocumentFactory() + for i in range(3): + document.content = f"before {i:d}" + document.save() + + if via == USER: + models.DocumentAccess.objects.create( + document=document, + user=user, + role=random.choice(models.RoleChoices.choices)[0], + ) + elif via == TEAM: + mock_user_teams.return_value = ["lasuite", "unknown"] + models.DocumentAccess.objects.create( + document=document, + team="lasuite", + role=random.choice(models.RoleChoices.choices)[0], + ) + + for i in range(4): + document.content = f"after {i:d}" + document.save() + + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/", + ) + + content = response.json() + assert content["is_truncated"] is False + # The current version is not listed + assert content["count"] == 3 + assert content["next_version_id_marker"] == "" + all_version_ids = [version["version_id"] for version in content["versions"]] + + # - set page size + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/?page_size=2", + ) + + content = response.json() + assert content["count"] == 2 + assert content["is_truncated"] is True + marker = content["next_version_id_marker"] + assert marker == all_version_ids[1] + assert [ + version["version_id"] for version in content["versions"] + ] == all_version_ids[:2] + + # - get page 2 + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/?page_size=2&version_id={marker:s}", + ) + + content = response.json() assert content["count"] == 1 + assert content["is_truncated"] is False + assert content["next_version_id_marker"] == "" + assert content["versions"][0]["version_id"] == all_version_ids[2] + + +def test_api_document_versions_list_exceeds_max_page_size(): + """Page size should not exceed the limit set on the serializer""" + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + document = factories.DocumentFactory(users=[user]) + document.content = "version 2" + document.save() + + response = client.get(f"/api/v1.0/documents/{document.id!s}/versions/?page_size=51") + + assert response.status_code == 400 + assert response.json() == { + "page_size": ["Ensure this value is less than or equal to 50."] + } @pytest.mark.parametrize("reach", models.LinkReachChoices.values) @@ -119,6 +211,9 @@ def test_api_document_versions_retrieve_anonymous(reach): restricted or authenticated link reach. """ document = factories.DocumentFactory(link_reach=reach) + document.content = "new content" + document.save() + version_id = document.get_versions_slice()["versions"][0]["version_id"] url = f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/" @@ -142,6 +237,9 @@ def test_api_document_versions_retrieve_authenticated_unrelated(reach): client.force_login(user) document = factories.DocumentFactory(link_reach=reach) + document.content = "new content" + document.save() + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.get( @@ -157,7 +255,7 @@ def test_api_document_versions_retrieve_authenticated_unrelated(reach): def test_api_document_versions_retrieve_authenticated_related(via, mock_user_teams): """ A user who is related to a document should be allowed to retrieve the - associated document user accesses. + associated document versions. """ user = factories.UserFactory() @@ -165,6 +263,10 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea client.force_login(user) document = factories.DocumentFactory() + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 version_id = document.get_versions_slice()["versions"][0]["version_id"] if via == USER: @@ -173,6 +275,8 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea mock_user_teams.return_value = ["lasuite", "unknown"] factories.TeamDocumentAccessFactory(document=document, team="lasuite") + time.sleep(1) # minio stores datetimes with the precision of a second + # Versions created before the document was shared should not be seen by the user response = client.get( f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", @@ -180,11 +284,26 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea assert response.status_code == 404 - # Create a new version should make it available to the user - time.sleep(1) # minio stores datetimes with the precision of a second - document.content = "new content" + # Create a new version should not make it available to the user because + # only the current version is available to the user but it is excluded + # from the list + document.content = "new content 1" document.save() + assert len(document.get_versions_slice()["versions"]) == 2 + version_id = document.get_versions_slice()["versions"][0]["version_id"] + + response = client.get( + f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", + ) + + assert response.status_code == 404 + + # Adding one more version should make the previous version available to the user + document.content = "new content 2" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 3 version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.get( @@ -192,7 +311,7 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea ) assert response.status_code == 200 - assert response.json()["content"] == "new content" + assert response.json()["content"] == "new content 1" def test_api_document_versions_create_anonymous(): @@ -260,10 +379,15 @@ def test_api_document_versions_create_authenticated_related(via, mock_user_teams def test_api_document_versions_update_anonymous(): """Anonymous users should not be allowed to update a document version.""" access = factories.UserDocumentAccessFactory() - version_id = access.document.get_versions_slice()["versions"][0]["version_id"] + document = access.document + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = APIClient().put( - f"/api/v1.0/documents/{access.document_id!s}/versions/{version_id:s}/", + f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", {"foo": "bar"}, format="json", ) @@ -281,7 +405,12 @@ def test_api_document_versions_update_authenticated_unrelated(): client.force_login(user) access = factories.UserDocumentAccessFactory() - version_id = access.document.get_versions_slice()["versions"][0]["version_id"] + document = access.document + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.put( f"/api/v1.0/documents/{access.document_id!s}/versions/{version_id:s}/", @@ -303,7 +432,6 @@ def test_api_document_versions_update_authenticated_related(via, mock_user_teams client.force_login(user) document = factories.DocumentFactory() - version_id = document.get_versions_slice()["versions"][0]["version_id"] if via == USER: factories.UserDocumentAccessFactory(document=document, user=user) @@ -311,6 +439,14 @@ def test_api_document_versions_update_authenticated_related(via, mock_user_teams mock_user_teams.return_value = ["lasuite", "unknown"] factories.TeamDocumentAccessFactory(document=document, team="lasuite") + time.sleep(1) # minio stores datetimes with the precision of a second + + document.content = "new content" + document.save() + + assert len(document.get_versions_slice()["versions"]) == 1 + version_id = document.get_versions_slice()["versions"][0]["version_id"] + response = client.put( f"/api/v1.0/documents/{document.id!s}/versions/{version_id!s}/", {"foo": "bar"}, @@ -345,6 +481,9 @@ def test_api_document_versions_delete_authenticated(reach): client.force_login(user) document = factories.DocumentFactory(link_reach=reach) + document.content = "new content" + document.save() + version_id = document.get_versions_slice()["versions"][0]["version_id"] response = client.delete( @@ -381,13 +520,7 @@ def test_api_document_versions_delete_reader_or_editor(via, role, mock_user_team document.save() versions = document.get_versions_slice()["versions"] - assert len(versions) == 2 - - version_id = versions[1]["version_id"] - response = client.delete( - f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", - ) - assert response.status_code == 403 + assert len(versions) == 1 version_id = versions[0]["version_id"] response = client.delete( @@ -396,7 +529,7 @@ def test_api_document_versions_delete_reader_or_editor(via, role, mock_user_team assert response.status_code == 403 versions = document.get_versions_slice()["versions"] - assert len(versions) == 2 + assert len(versions) == 1 @pytest.mark.parametrize("via", VIA) @@ -421,19 +554,25 @@ def test_api_document_versions_delete_administrator_or_owner(via, mock_user_team # Create a new version should make it available to the user time.sleep(1) # minio stores datetimes with the precision of a second - document.content = "new content" + document.content = "new content 1" document.save() versions = document.get_versions_slice()["versions"] - assert len(versions) == 2 + assert len(versions) == 1 - version_id = versions[1]["version_id"] + version_id = versions[0]["version_id"] response = client.delete( f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", ) # 404 because the version was created before the user was given access to the document assert response.status_code == 404 + document.content = "new content 2" + document.save() + + versions = document.get_versions_slice()["versions"] + assert len(versions) == 2 + version_id = versions[0]["version_id"] response = client.delete( f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/", diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index fcb63e52..ed76321f 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -10,6 +10,7 @@ from django.contrib.auth.models import AnonymousUser from django.core import mail from django.core.exceptions import ValidationError from django.core.files.storage import default_storage +from django.utils import timezone import pytest @@ -260,7 +261,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries): } -def test_models_documents_get_versions_slice(settings): +def test_models_documents_get_versions_slice_pagination(settings): """ The "get_versions_slice" method should allow navigating all versions of the document with pagination. @@ -273,7 +274,7 @@ def test_models_documents_get_versions_slice(settings): document.content = f"bar{i:d}" document.save() - # Add a version not related to the first document + # Add a document version not related to the first document factories.DocumentFactory() # - Get default max versions @@ -291,7 +292,7 @@ def test_models_documents_get_versions_slice(settings): from_version_id=response["next_version_id_marker"] ) assert response["is_truncated"] is False - assert len(response["versions"]) == 3 + assert len(response["versions"]) == 2 assert response["next_version_id_marker"] == "" # - Get custom max versions @@ -301,6 +302,30 @@ def test_models_documents_get_versions_slice(settings): assert response["next_version_id_marker"] != "" +def test_models_documents_get_versions_slice_min_datetime(): + """ + The "get_versions_slice" method should filter out versions anterior to + the from_datetime passed in argument and the current version. + """ + document = factories.DocumentFactory() + from_dt = [] + for i in range(6): + from_dt.append(timezone.now()) + document.content = f"bar{i:d}" + document.save() + + response = document.get_versions_slice(min_datetime=from_dt[2]) + + assert len(response["versions"]) == 3 + for version in response["versions"]: + assert version["last_modified"] > from_dt[2] + + response = document.get_versions_slice(min_datetime=from_dt[4]) + + assert len(response["versions"]) == 1 + assert response["versions"][0]["last_modified"] > from_dt[4] + + def test_models_documents_version_duplicate(): """A new version should be created in object storage only if the content has changed.""" document = factories.DocumentFactory()