♻️(api) refactor getting versions to expose pagination

Getting versions was not working properly. Some versions returned
were not accessible by the user requesting the list of available
versions.

We refactor the code to make it simpler and let the frontend handle
pagination (load more style).
This commit is contained in:
Samuel Paccoud - DINUM
2024-09-16 19:27:48 +02:00
committed by Samuel Paccoud
parent 827d8cc8e1
commit a2a184bb93
6 changed files with 269 additions and 107 deletions

View File

@@ -16,8 +16,8 @@ and this project adheres to
## Fixed
- 🐛(backend) gitlab oicd userinfo endpoint #232
- 🐛 (backend) gitlab oicd userinfo endpoint #232
- ♻️ (backend) getting list of document versions available for a user #258
## [1.4.0] - 2024-09-17

View File

@@ -343,10 +343,10 @@ class InvitationSerializer(serializers.ModelSerializer):
return attrs
class DocumentVersionSerializer(serializers.Serializer):
"""Serialize Versions."""
class VersionFilterSerializer(serializers.Serializer):
"""Validate version filters applied to the list endpoint."""
etag = serializers.CharField()
is_latest = serializers.BooleanField()
last_modified = serializers.DateTimeField()
version_id = serializers.CharField()
version_id = serializers.CharField(required=False, allow_blank=True)
page_size = serializers.IntegerField(
required=False, min_value=1, max_value=50, default=20
)

View File

@@ -10,6 +10,7 @@ from django.contrib.postgres.aggregates import ArrayAgg
from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
from django.db.models import (
Min,
OuterRef,
Q,
Subquery,
@@ -373,28 +374,36 @@ class DocumentViewSet(
Return the document's versions but only those created after the user got access
to the document
"""
if not request.user.is_authenticated:
user = request.user
if not user.is_authenticated:
raise exceptions.PermissionDenied("Authentication required.")
# Validate query parameters using dedicated serializer
serializer = serializers.VersionFilterSerializer(data=request.query_params)
serializer.is_valid(raise_exception=True)
document = self.get_object()
user = request.user
from_datetime = min(
access.created_at
for access in document.accesses.filter(
Q(user=user) | Q(team__in=user.teams),
# Users should not see version history dating from before they gained access to the
# document. Filter to get the minimum access date for the logged-in user
access_queryset = document.accesses.filter(
Q(user=user) | Q(team__in=user.teams)
).aggregate(min_date=Min("created_at"))
# Handle the case where the user has no accesses
min_datetime = access_queryset["min_date"]
if not min_datetime:
return exceptions.PermissionDenied(
"Only users with specific access can see version history"
)
versions_data = document.get_versions_slice(
from_version_id=serializer.validated_data.get("version_id"),
min_datetime=min_datetime,
page_size=serializer.validated_data.get("page_size"),
)
versions_data = document.get_versions_slice(from_datetime=from_datetime)[
"versions"
]
paginator = pagination.PageNumberPagination()
paginated_versions = paginator.paginate_queryset(versions_data, request)
serialized_versions = serializers.DocumentVersionSerializer(
paginated_versions, many=True
)
return paginator.get_paginated_response(serialized_versions.data)
return drf_response.Response(versions_data)
@decorators.action(
detail=True,
@@ -414,13 +423,13 @@ class DocumentViewSet(
# Don't let users access versions that were created before they were given access
# to the document
user = request.user
from_datetime = min(
min_datetime = min(
access.created_at
for access in document.accesses.filter(
Q(user=user) | Q(team__in=user.teams),
)
)
if response["LastModified"] < from_datetime:
if response["LastModified"] < min_datetime:
raise Http404
if request.method == "DELETE":

View File

@@ -416,73 +416,62 @@ class Document(BaseModel):
Bucket=default_storage.bucket_name, Key=self.file_key, VersionId=version_id
)
def get_versions_slice(
self, from_version_id="", from_datetime=None, page_size=None
):
def get_versions_slice(self, from_version_id="", min_datetime=None, page_size=None):
"""Get document versions from object storage with pagination and starting conditions"""
# /!\ Trick here /!\
# The "KeyMarker" and "VersionIdMarker" fields must either be both set or both not set.
# The error we get otherwise is not helpful at all.
token = {}
markers = {}
if from_version_id:
token.update(
markers.update(
{"KeyMarker": self.file_key, "VersionIdMarker": from_version_id}
)
if from_datetime:
response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name,
Prefix=self.file_key,
MaxKeys=settings.DOCUMENT_VERSIONS_PAGE_SIZE,
**token,
)
# Find the first version after the given datetime
version = None
for version in response.get("Versions", []):
if version["LastModified"] >= from_datetime:
token = {
"KeyMarker": self.file_key,
"VersionIdMarker": version["VersionId"],
}
break
else:
if version is None or version["LastModified"] < from_datetime:
if response["NextVersionIdMarker"]:
return self.get_versions_slice(
from_version_id=response["NextVersionIdMarker"],
page_size=settings.DOCUMENT_VERSIONS_PAGE_SIZE,
from_datetime=from_datetime,
)
return {
"next_version_id_marker": "",
"is_truncated": False,
"versions": [],
}
real_page_size = (
min(page_size, settings.DOCUMENT_VERSIONS_PAGE_SIZE)
if page_size
else settings.DOCUMENT_VERSIONS_PAGE_SIZE
)
response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name,
Prefix=self.file_key,
MaxKeys=min(page_size, settings.DOCUMENT_VERSIONS_PAGE_SIZE)
if page_size
else settings.DOCUMENT_VERSIONS_PAGE_SIZE,
**token,
# compensate the latest version that we exclude below and get one more to
# know if there are more pages
MaxKeys=real_page_size + 2,
**markers,
)
min_last_modified = min_datetime or self.created_at
versions = [
{
key_snake: version[key_camel]
for key_snake, key_camel in [
("etag", "ETag"),
("is_latest", "IsLatest"),
("last_modified", "LastModified"),
("version_id", "VersionId"),
]
}
for version in response.get("Versions", [])
if version["LastModified"] >= min_last_modified
and version["IsLatest"] is False
]
results = versions[:real_page_size]
count = len(results)
if count == len(versions):
is_truncated = False
next_version_id_marker = ""
else:
is_truncated = True
next_version_id_marker = versions[count - 1]["version_id"]
return {
"next_version_id_marker": response["NextVersionIdMarker"],
"is_truncated": response["IsTruncated"],
"versions": [
{
key_snake: version[key_camel]
for key_camel, key_snake in [
("ETag", "etag"),
("IsLatest", "is_latest"),
("LastModified", "last_modified"),
("VersionId", "version_id"),
]
}
for version in response.get("Versions", [])
],
"next_version_id_marker": next_version_id_marker,
"is_truncated": is_truncated,
"versions": results,
"count": count,
}
def delete_version(self, version_id):

View File

@@ -60,7 +60,7 @@ def test_api_document_versions_list_authenticated_unrelated(reach):
@pytest.mark.parametrize("via", VIA)
def test_api_document_versions_list_authenticated_related(via, mock_user_teams):
def test_api_document_versions_list_authenticated_related_success(via, mock_user_teams):
"""
Authenticated users should be able to list document versions for a document
to which they are directly related, whatever their role in the document.
@@ -95,12 +95,12 @@ def test_api_document_versions_list_authenticated_related(via, mock_user_teams):
assert response.status_code == 200
content = response.json()
assert len(content["results"]) == 0
assert content["count"] == 0
# Add a new version to the document
document.content = "new content"
document.save()
for i in range(3):
document.content = f"new content {i:d}"
document.save()
response = client.get(
f"/api/v1.0/documents/{document.id!s}/versions/",
@@ -108,8 +108,100 @@ def test_api_document_versions_list_authenticated_related(via, mock_user_teams):
assert response.status_code == 200
content = response.json()
assert len(content["results"]) == 1
# The current version is not listed
assert content["count"] == 2
@pytest.mark.parametrize("via", VIA)
def test_api_document_versions_list_authenticated_related_pagination(
via, mock_user_teams
):
"""
The list of versions should be paginated and exclude versions that were created prior to the
user gaining access to the document.
"""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
document = factories.DocumentFactory()
for i in range(3):
document.content = f"before {i:d}"
document.save()
if via == USER:
models.DocumentAccess.objects.create(
document=document,
user=user,
role=random.choice(models.RoleChoices.choices)[0],
)
elif via == TEAM:
mock_user_teams.return_value = ["lasuite", "unknown"]
models.DocumentAccess.objects.create(
document=document,
team="lasuite",
role=random.choice(models.RoleChoices.choices)[0],
)
for i in range(4):
document.content = f"after {i:d}"
document.save()
response = client.get(
f"/api/v1.0/documents/{document.id!s}/versions/",
)
content = response.json()
assert content["is_truncated"] is False
# The current version is not listed
assert content["count"] == 3
assert content["next_version_id_marker"] == ""
all_version_ids = [version["version_id"] for version in content["versions"]]
# - set page size
response = client.get(
f"/api/v1.0/documents/{document.id!s}/versions/?page_size=2",
)
content = response.json()
assert content["count"] == 2
assert content["is_truncated"] is True
marker = content["next_version_id_marker"]
assert marker == all_version_ids[1]
assert [
version["version_id"] for version in content["versions"]
] == all_version_ids[:2]
# - get page 2
response = client.get(
f"/api/v1.0/documents/{document.id!s}/versions/?page_size=2&version_id={marker:s}",
)
content = response.json()
assert content["count"] == 1
assert content["is_truncated"] is False
assert content["next_version_id_marker"] == ""
assert content["versions"][0]["version_id"] == all_version_ids[2]
def test_api_document_versions_list_exceeds_max_page_size():
"""Page size should not exceed the limit set on the serializer"""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
document = factories.DocumentFactory(users=[user])
document.content = "version 2"
document.save()
response = client.get(f"/api/v1.0/documents/{document.id!s}/versions/?page_size=51")
assert response.status_code == 400
assert response.json() == {
"page_size": ["Ensure this value is less than or equal to 50."]
}
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
@@ -119,6 +211,9 @@ def test_api_document_versions_retrieve_anonymous(reach):
restricted or authenticated link reach.
"""
document = factories.DocumentFactory(link_reach=reach)
document.content = "new content"
document.save()
version_id = document.get_versions_slice()["versions"][0]["version_id"]
url = f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/"
@@ -142,6 +237,9 @@ def test_api_document_versions_retrieve_authenticated_unrelated(reach):
client.force_login(user)
document = factories.DocumentFactory(link_reach=reach)
document.content = "new content"
document.save()
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = client.get(
@@ -157,7 +255,7 @@ def test_api_document_versions_retrieve_authenticated_unrelated(reach):
def test_api_document_versions_retrieve_authenticated_related(via, mock_user_teams):
"""
A user who is related to a document should be allowed to retrieve the
associated document user accesses.
associated document versions.
"""
user = factories.UserFactory()
@@ -165,6 +263,10 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea
client.force_login(user)
document = factories.DocumentFactory()
document.content = "new content"
document.save()
assert len(document.get_versions_slice()["versions"]) == 1
version_id = document.get_versions_slice()["versions"][0]["version_id"]
if via == USER:
@@ -173,6 +275,8 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea
mock_user_teams.return_value = ["lasuite", "unknown"]
factories.TeamDocumentAccessFactory(document=document, team="lasuite")
time.sleep(1) # minio stores datetimes with the precision of a second
# Versions created before the document was shared should not be seen by the user
response = client.get(
f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/",
@@ -180,11 +284,26 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea
assert response.status_code == 404
# Create a new version should make it available to the user
time.sleep(1) # minio stores datetimes with the precision of a second
document.content = "new content"
# Create a new version should not make it available to the user because
# only the current version is available to the user but it is excluded
# from the list
document.content = "new content 1"
document.save()
assert len(document.get_versions_slice()["versions"]) == 2
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = client.get(
f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/",
)
assert response.status_code == 404
# Adding one more version should make the previous version available to the user
document.content = "new content 2"
document.save()
assert len(document.get_versions_slice()["versions"]) == 3
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = client.get(
@@ -192,7 +311,7 @@ def test_api_document_versions_retrieve_authenticated_related(via, mock_user_tea
)
assert response.status_code == 200
assert response.json()["content"] == "new content"
assert response.json()["content"] == "new content 1"
def test_api_document_versions_create_anonymous():
@@ -260,10 +379,15 @@ def test_api_document_versions_create_authenticated_related(via, mock_user_teams
def test_api_document_versions_update_anonymous():
"""Anonymous users should not be allowed to update a document version."""
access = factories.UserDocumentAccessFactory()
version_id = access.document.get_versions_slice()["versions"][0]["version_id"]
document = access.document
document.content = "new content"
document.save()
assert len(document.get_versions_slice()["versions"]) == 1
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = APIClient().put(
f"/api/v1.0/documents/{access.document_id!s}/versions/{version_id:s}/",
f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/",
{"foo": "bar"},
format="json",
)
@@ -281,7 +405,12 @@ def test_api_document_versions_update_authenticated_unrelated():
client.force_login(user)
access = factories.UserDocumentAccessFactory()
version_id = access.document.get_versions_slice()["versions"][0]["version_id"]
document = access.document
document.content = "new content"
document.save()
assert len(document.get_versions_slice()["versions"]) == 1
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = client.put(
f"/api/v1.0/documents/{access.document_id!s}/versions/{version_id:s}/",
@@ -303,7 +432,6 @@ def test_api_document_versions_update_authenticated_related(via, mock_user_teams
client.force_login(user)
document = factories.DocumentFactory()
version_id = document.get_versions_slice()["versions"][0]["version_id"]
if via == USER:
factories.UserDocumentAccessFactory(document=document, user=user)
@@ -311,6 +439,14 @@ def test_api_document_versions_update_authenticated_related(via, mock_user_teams
mock_user_teams.return_value = ["lasuite", "unknown"]
factories.TeamDocumentAccessFactory(document=document, team="lasuite")
time.sleep(1) # minio stores datetimes with the precision of a second
document.content = "new content"
document.save()
assert len(document.get_versions_slice()["versions"]) == 1
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = client.put(
f"/api/v1.0/documents/{document.id!s}/versions/{version_id!s}/",
{"foo": "bar"},
@@ -345,6 +481,9 @@ def test_api_document_versions_delete_authenticated(reach):
client.force_login(user)
document = factories.DocumentFactory(link_reach=reach)
document.content = "new content"
document.save()
version_id = document.get_versions_slice()["versions"][0]["version_id"]
response = client.delete(
@@ -381,13 +520,7 @@ def test_api_document_versions_delete_reader_or_editor(via, role, mock_user_team
document.save()
versions = document.get_versions_slice()["versions"]
assert len(versions) == 2
version_id = versions[1]["version_id"]
response = client.delete(
f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/",
)
assert response.status_code == 403
assert len(versions) == 1
version_id = versions[0]["version_id"]
response = client.delete(
@@ -396,7 +529,7 @@ def test_api_document_versions_delete_reader_or_editor(via, role, mock_user_team
assert response.status_code == 403
versions = document.get_versions_slice()["versions"]
assert len(versions) == 2
assert len(versions) == 1
@pytest.mark.parametrize("via", VIA)
@@ -421,19 +554,25 @@ def test_api_document_versions_delete_administrator_or_owner(via, mock_user_team
# Create a new version should make it available to the user
time.sleep(1) # minio stores datetimes with the precision of a second
document.content = "new content"
document.content = "new content 1"
document.save()
versions = document.get_versions_slice()["versions"]
assert len(versions) == 2
assert len(versions) == 1
version_id = versions[1]["version_id"]
version_id = versions[0]["version_id"]
response = client.delete(
f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/",
)
# 404 because the version was created before the user was given access to the document
assert response.status_code == 404
document.content = "new content 2"
document.save()
versions = document.get_versions_slice()["versions"]
assert len(versions) == 2
version_id = versions[0]["version_id"]
response = client.delete(
f"/api/v1.0/documents/{document.id!s}/versions/{version_id:s}/",

View File

@@ -10,6 +10,7 @@ from django.contrib.auth.models import AnonymousUser
from django.core import mail
from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
from django.utils import timezone
import pytest
@@ -260,7 +261,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
}
def test_models_documents_get_versions_slice(settings):
def test_models_documents_get_versions_slice_pagination(settings):
"""
The "get_versions_slice" method should allow navigating all versions of
the document with pagination.
@@ -273,7 +274,7 @@ def test_models_documents_get_versions_slice(settings):
document.content = f"bar{i:d}"
document.save()
# Add a version not related to the first document
# Add a document version not related to the first document
factories.DocumentFactory()
# - Get default max versions
@@ -291,7 +292,7 @@ def test_models_documents_get_versions_slice(settings):
from_version_id=response["next_version_id_marker"]
)
assert response["is_truncated"] is False
assert len(response["versions"]) == 3
assert len(response["versions"]) == 2
assert response["next_version_id_marker"] == ""
# - Get custom max versions
@@ -301,6 +302,30 @@ def test_models_documents_get_versions_slice(settings):
assert response["next_version_id_marker"] != ""
def test_models_documents_get_versions_slice_min_datetime():
"""
The "get_versions_slice" method should filter out versions anterior to
the from_datetime passed in argument and the current version.
"""
document = factories.DocumentFactory()
from_dt = []
for i in range(6):
from_dt.append(timezone.now())
document.content = f"bar{i:d}"
document.save()
response = document.get_versions_slice(min_datetime=from_dt[2])
assert len(response["versions"]) == 3
for version in response["versions"]:
assert version["last_modified"] > from_dt[2]
response = document.get_versions_slice(min_datetime=from_dt[4])
assert len(response["versions"]) == 1
assert response["versions"][0]["last_modified"] > from_dt[4]
def test_models_documents_version_duplicate():
"""A new version should be created in object storage only if the content has changed."""
document = factories.DocumentFactory()