(backend) add soft delete to documents and refactor db queryset

Now that we have introduced a document tree structure, it is not
possible to allow deleting documents anymore as it impacts the whole
subtree below the deleted document and the consequences are too big.

We introduce soft delete in order to give a second thought to the
document's owner (who is the only one to be allowed to delete a
document). After a document is soft deleted, the owner can still
see it in the trashbin (/api/v1.0/documents/trashbin).
After a grace period (30 days be default) the document disappears
from the trashbin and can't be restored anymore. Note that even
then it is still kept in database. Cleaning the database to erase
deleted documents after the grace period can be done as a maintenance
script.
This commit is contained in:
Samuel Paccoud - DINUM
2025-01-02 17:20:09 +01:00
committed by Anthony LC
parent 4de03d292a
commit 8ccfdb3c6a
21 changed files with 1373 additions and 252 deletions

View File

@@ -13,15 +13,13 @@ from django.conf import settings
from django.contrib.auth import models as auth_models
from django.contrib.auth.base_user import AbstractBaseUser
from django.contrib.sites.models import Site
from django.core import exceptions, mail, validators
from django.core import mail, validators
from django.core.cache import cache
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.core.mail import send_mail
from django.db import models
from django.db import models, transaction
from django.db.models.functions import Left, Length
from django.http import FileResponse
from django.template.base import Template as DjangoTemplate
from django.template.context import Context
from django.template.loader import render_to_string
from django.utils import timezone
from django.utils.functional import cached_property, lazy
@@ -29,12 +27,27 @@ from django.utils.translation import get_language, override
from django.utils.translation import gettext_lazy as _
from botocore.exceptions import ClientError
from rest_framework.exceptions import ValidationError
from timezone_field import TimeZoneField
from treebeard.mp_tree import MP_Node
logger = getLogger(__name__)
def get_trashbin_cutoff():
"""
Calculate the cutoff datetime for soft-deleted items based on the retention policy.
The function returns the current datetime minus the number of days specified in
the TRASHBIN_CUTOFF_DAYS setting, indicating the oldest date for items that can
remain in the trash bin.
Returns:
datetime: The cutoff datetime for soft-deleted items.
"""
return timezone.now() - timedelta(days=settings.TRASHBIN_CUTOFF_DAYS)
class LinkRoleChoices(models.TextChoices):
"""Defines the possible roles a link can offer on a document."""
@@ -374,6 +387,8 @@ class Document(MP_Node, BaseModel):
blank=True,
null=True,
)
deleted_at = models.DateTimeField(null=True, blank=True)
ancestors_deleted_at = models.DateTimeField(null=True, blank=True)
_content = None
@@ -389,6 +404,15 @@ class Document(MP_Node, BaseModel):
ordering = ("path",)
verbose_name = _("Document")
verbose_name_plural = _("Documents")
constraints = [
models.CheckConstraint(
check=(
models.Q(deleted_at__isnull=True)
| models.Q(deleted_at=models.F("ancestors_deleted_at"))
),
name="check_deleted_at_matches_ancestors_deleted_at_when_set",
),
]
def __str__(self):
return str(self.title) if self.title else str(_("Untitled Document"))
@@ -527,6 +551,32 @@ class Document(MP_Node, BaseModel):
Bucket=default_storage.bucket_name, Key=self.file_key, VersionId=version_id
)
def get_nb_accesses_cache_key(self):
"""Generate a unique cache key for each document."""
return f"document_{self.id!s}_nb_accesses"
@property
def nb_accesses(self):
"""Calculate the number of accesses."""
cache_key = self.get_nb_accesses_cache_key()
nb_accesses = cache.get(cache_key)
if nb_accesses is None:
nb_accesses = DocumentAccess.objects.filter(
document__path=Left(models.Value(self.path), Length("document__path")),
).count()
cache.set(cache_key, nb_accesses)
return nb_accesses
def invalidate_nb_accesses_cache(self):
"""
Invalidate the cache for number of accesses, including on affected descendants.
"""
for document in Document.objects.filter(path__startswith=self.path).only("id"):
cache_key = document.get_nb_accesses_cache_key()
cache.delete(cache_key)
def get_roles(self, user):
"""Return the roles a user has on a document."""
if not user.is_authenticated:
@@ -546,60 +596,78 @@ class Document(MP_Node, BaseModel):
roles = []
return roles
@cached_property
def links_definitions(self):
"""Get links reach/role definitions for the current document and its ancestors."""
links_definitions = {self.link_reach: {self.link_role}}
# Ancestors links definitions are only interesting if the document is not the highest
# ancestor to which the current user has access. Look for the annotation:
if self.depth > 1 and not getattr(self, "is_highest_ancestor_for_user", False):
for ancestor in self.get_ancestors().values("link_reach", "link_role"):
links_definitions.setdefault(ancestor["link_reach"], set()).add(
ancestor["link_role"]
)
return links_definitions
def get_abilities(self, user):
"""
Compute and return abilities for a given user on the document.
"""
roles = set(self.get_roles(user))
roles = set(
self.get_roles(user)
) # at this point only roles based on specific access
# Compute version roles before adding link roles because we don't
# Characteristics that are based only on specific access
is_owner = RoleChoices.OWNER in roles
is_deleted = self.ancestors_deleted_at and not is_owner
is_owner_or_admin = (is_owner or RoleChoices.ADMIN in roles) and not is_deleted
# Compute access roles before adding link roles because we don't
# want anonymous users to access versions (we wouldn't know from
# which date to allow them anyway)
# Anonymous users should also not see document accesses
has_role = bool(roles)
has_access_role = bool(roles) and not is_deleted
# Add roles provided by the document link, taking into account its ancestors
link_reaches = list(self.get_ancestors().values("link_reach", "link_role"))
link_reaches.append(
{"link_reach": self.link_reach, "link_role": self.link_role}
# Add roles provided by the document link
links_definitions = self.links_definitions
public_roles = links_definitions.get(LinkReachChoices.PUBLIC, set())
authenticated_roles = (
links_definitions.get(LinkReachChoices.AUTHENTICATED, set())
if user.is_authenticated
else set()
)
roles = roles | public_roles | authenticated_roles
for lr in link_reaches:
if lr["link_reach"] == LinkReachChoices.PUBLIC:
roles.add(lr["link_role"])
if user.is_authenticated:
for lr in link_reaches:
if lr["link_reach"] == LinkReachChoices.AUTHENTICATED:
roles.add(lr["link_role"])
is_owner_or_admin = bool(
roles.intersection({RoleChoices.OWNER, RoleChoices.ADMIN})
)
can_get = bool(roles)
can_update = is_owner_or_admin or RoleChoices.EDITOR in roles
can_get = bool(roles) and not is_deleted
can_update = (
is_owner_or_admin or RoleChoices.EDITOR in roles
) and not is_deleted
return {
"accesses_manage": is_owner_or_admin,
"accesses_view": has_role,
"accesses_view": has_access_role,
"ai_transform": can_update,
"ai_translate": can_update,
"attachment_upload": can_update,
"children_list": can_get,
"children_create": can_update and user.is_authenticated,
"collaboration_auth": can_get,
"destroy": RoleChoices.OWNER in roles,
"destroy": is_owner,
"favorite": can_get and user.is_authenticated,
"link_configuration": is_owner_or_admin,
"invite_owner": RoleChoices.OWNER in roles,
"move": is_owner_or_admin,
"invite_owner": is_owner,
"move": is_owner_or_admin and not self.ancestors_deleted_at,
"partial_update": can_update,
"retrieve": can_get,
"media_auth": can_get,
"update": can_update,
"versions_destroy": is_owner_or_admin,
"versions_list": has_role,
"versions_retrieve": has_role,
"versions_list": has_access_role,
"versions_retrieve": has_access_role,
}
def send_email(self, subject, emails, context=None, language=None):
@@ -660,6 +728,31 @@ class Document(MP_Node, BaseModel):
self.send_email(subject, [email], context, language)
@transaction.atomic
def soft_delete(self):
"""
Soft delete the document, marking the deletion on descendants.
We still keep the .delete() method untouched for programmatic purposes.
"""
if self.deleted_at or self.ancestors_deleted_at:
raise RuntimeError(
"This document is already deleted or has deleted ancestors."
)
# Check if any ancestors are deleted
if self.get_ancestors().filter(deleted_at__isnull=False).exists():
raise RuntimeError(
"Cannot delete this document because one or more ancestors are already deleted."
)
self.ancestors_deleted_at = self.deleted_at = timezone.now()
self.save()
# Mark all descendants as soft deleted
self.get_descendants().filter(ancestors_deleted_at__isnull=True).update(
ancestors_deleted_at=self.ancestors_deleted_at
)
class LinkTrace(BaseModel):
"""
@@ -762,6 +855,16 @@ class DocumentAccess(BaseAccess):
def __str__(self):
return f"{self.user!s} is {self.role:s} in document {self.document!s}"
def save(self, *args, **kwargs):
"""Override save to clear the document's cache for number of accesses."""
super().save(*args, **kwargs)
self.document.invalidate_nb_accesses_cache()
def delete(self, *args, **kwargs):
"""Override delete to clear the document's cache for number of accesses."""
super().delete(*args, **kwargs)
self.document.invalidate_nb_accesses_cache()
def get_abilities(self, user):
"""
Compute and return abilities for a given user on the document access.
@@ -792,7 +895,7 @@ class Template(BaseModel):
return self.title
def get_roles(self, user):
"""Return the roles a user has on a resource."""
"""Return the roles a user has on a resource as an iterable."""
if not user.is_authenticated:
return []
@@ -915,8 +1018,8 @@ class Invitation(BaseModel):
User.objects.filter(email=self.email).exists()
and not settings.OIDC_ALLOW_DUPLICATE_EMAILS
):
raise exceptions.ValidationError(
{"email": _("This email is already associated to a registered user.")}
raise ValidationError(
{"email": [_("This email is already associated to a registered user.")]}
)
@property