(backend) add duplicate action to the document API endpoint

We took this opportunity to refactor the way access is controlled on
media attachments. We now add the media key to a list on the document
instance each time a media is uploaded to a document. This list is
passed along when a document is duplicated, allowing us to grant
access to readers on the new document, even if they don't have or
lost access to the original document.

We also propose an option to reproduce the same access rights on the
duplicate document as what was in place on the original document.
This can be requested by passing the "with_accesses=true" option in
the query string.

The tricky point is that we need to extract attachment keys from the
existing documents and set them on the new "attachments" field that is
now used to track access rights on media files.
This commit is contained in:
Samuel Paccoud - DINUM
2025-01-20 10:23:18 +01:00
committed by Manuel Raynaud
parent 6976bb7c78
commit 34a208a80d
19 changed files with 1066 additions and 79 deletions

View File

@@ -16,6 +16,8 @@ from django.db import transaction
from django.db.models.expressions import RawSQL
from django.db.models.functions import Left, Length
from django.http import Http404, StreamingHttpResponse
from django.utils.text import capfirst
from django.utils.translation import gettext_lazy as _
import requests
import rest_framework as drf
@@ -28,26 +30,13 @@ from rest_framework.throttling import UserRateThrottle
from core import authentication, enums, models
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.utils import extract_attachments, filter_descendants
from . import permissions, serializers, utils
from .filters import DocumentFilter, ListDocumentFilter
logger = logging.getLogger(__name__)
<<<<<<< HEAD
ATTACHMENTS_FOLDER = "attachments"
UUID_REGEX = (
r"[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"
)
FILE_EXT_REGEX = r"\.[a-zA-Z0-9]{1,10}"
MEDIA_STORAGE_URL_PATTERN = re.compile(
f"{settings.MEDIA_URL:s}(?P<pk>{UUID_REGEX:s})/"
f"(?P<key>{ATTACHMENTS_FOLDER:s}/{UUID_REGEX:s}(?:-unsafe)?{FILE_EXT_REGEX:s})$"
)
COLLABORATION_WS_URL_PATTERN = re.compile(rf"(?:^|&)room=(?P<pk>{UUID_REGEX})(?:&|$)")
=======
>>>>>>> 8076486a ((backend) add missing test on media-auth and collaboration-auth)
# pylint: disable=too-many-ancestors
@@ -904,6 +893,82 @@ class DocumentViewSet(
utils.nest_tree(serializer.data, self.queryset.model.steplen)
)
@drf.decorators.action(
detail=True,
methods=["post"],
permission_classes=[permissions.IsAuthenticated, permissions.AccessPermission],
url_path="duplicate",
)
@transaction.atomic
def duplicate(self, request, *args, **kwargs):
"""
Duplicate a document and store the links to attached files in the duplicated
document to allow cross-access.
Optionally duplicates accesses if `with_accesses` is set to true
in the payload.
"""
# Get document while checking permissions
document = self.get_object()
serializer = serializers.DocumentDuplicationSerializer(
data=request.data, partial=True
)
serializer.is_valid(raise_exception=True)
with_accesses = serializer.validated_data.get("with_accesses", False)
base64_yjs_content = document.content
# Duplicate the document instance
link_kwargs = (
{"link_reach": document.link_reach, "link_role": document.link_role}
if with_accesses
else {}
)
extracted_attachments = set(extract_attachments(document.content))
attachments = list(extracted_attachments & set(document.attachments))
duplicated_document = document.add_sibling(
"right",
title=capfirst(_("copy of {title}").format(title=document.title)),
content=base64_yjs_content,
attachments=attachments,
duplicated_from=document,
creator=request.user,
**link_kwargs,
)
# Always add the logged-in user as OWNER
accesses_to_create = [
models.DocumentAccess(
document=duplicated_document,
user=request.user,
role=models.RoleChoices.OWNER,
)
]
# If accesses should be duplicated, add other users' accesses as per original document
if with_accesses:
original_accesses = models.DocumentAccess.objects.filter(
document=document
).exclude(user=request.user)
accesses_to_create.extend(
models.DocumentAccess(
document=duplicated_document,
user_id=access.user_id,
team=access.team,
role=access.role,
)
for access in original_accesses
)
# Bulk create all the duplicated accesses
models.DocumentAccess.objects.bulk_create(accesses_to_create)
return drf_response.Response(
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
)
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
def versions_list(self, request, *args, **kwargs):
"""
@@ -1053,7 +1118,7 @@ class DocumentViewSet(
# Generate a generic yet unique filename to store the image in object storage
file_id = uuid.uuid4()
extension = serializer.validated_data["expected_extension"]
ext = serializer.validated_data["expected_extension"]
# Prepare metadata for storage
extra_args = {
@@ -1065,7 +1130,7 @@ class DocumentViewSet(
extra_args["Metadata"]["is_unsafe"] = "true"
file_unsafe = "-unsafe"
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{file_unsafe}.{extension:s}"
key = f"{document.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}{file_unsafe}.{ext:s}"
file_name = serializer.validated_data["file_name"]
if (
@@ -1085,6 +1150,10 @@ class DocumentViewSet(
file, default_storage.bucket_name, key, ExtraArgs=extra_args
)
# Make the attachment readable by document readers
document.attachments.append(key)
document.save()
return drf.response.Response(
{"file": f"{settings.MEDIA_URL:s}{key:s}"},
status=drf.status.HTTP_201_CREATED,
@@ -1152,20 +1221,35 @@ class DocumentViewSet(
url_params = self._auth_get_url_params(
enums.MEDIA_STORAGE_URL_PATTERN, parsed_url.path
)
document = self._auth_get_document(url_params["pk"])
if not document.get_abilities(request.user).get(self.action, False):
logger.debug(
"User '%s' lacks permission for document '%s'",
request.user,
document.pk,
)
user = request.user
key = f"{url_params['pk']:s}/{url_params['attachment']:s}"
# Look for a document to which the user has access and that includes this attachment
# We must look into all descendants of any document to which the user has access per se
readable_per_se_paths = (
self.queryset.readable_per_se(user)
.order_by("path")
.values_list("path", flat=True)
)
attachments_documents = (
self.queryset.filter(attachments__contains=[key])
.only("path")
.order_by("path")
)
readable_attachments_paths = filter_descendants(
[doc.path for doc in attachments_documents],
readable_per_se_paths,
skip_sorting=True,
)
if not readable_attachments_paths:
logger.debug("User '%s' lacks permission for attachment", user)
raise drf.exceptions.PermissionDenied()
# Generate S3 authorization headers using the extracted URL parameters
request = utils.generate_s3_authorization_headers(
f"{url_params['pk']:s}/{url_params['key']:s}"
)
request = utils.generate_s3_authorization_headers(key)
return drf.response.Response("authorized", headers=request.headers, status=200)