✨(documents) add content field as an S3 object
The content field is a writable property on the model which is persisted in object storage. We take advantage of the versioning, robustness and scalability of S3.
This commit is contained in:
committed by
Anthony LC
parent
4956beb040
commit
8e262da8f5
24
src/backend/core/api/fields.py
Normal file
24
src/backend/core/api/fields.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""A JSONField for DRF to handle serialization/deserialization."""
|
||||
import json
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
|
||||
class JSONField(serializers.Field):
|
||||
"""
|
||||
A custom field for handling JSON data.
|
||||
"""
|
||||
|
||||
def to_representation(self, value):
|
||||
"""
|
||||
Convert the JSON string to a Python dictionary for serialization.
|
||||
"""
|
||||
return value
|
||||
|
||||
def to_internal_value(self, data):
|
||||
"""
|
||||
Convert the Python dictionary to a JSON string for deserialization.
|
||||
"""
|
||||
if data is None:
|
||||
return None
|
||||
return json.dumps(data)
|
||||
@@ -7,6 +7,8 @@ from timezone_field.rest_framework import TimeZoneSerializerField
|
||||
|
||||
from core import models
|
||||
|
||||
from .fields import JSONField
|
||||
|
||||
|
||||
class UserSerializer(serializers.ModelSerializer):
|
||||
"""Serialize users."""
|
||||
@@ -134,6 +136,8 @@ class BaseResourceSerializer(serializers.ModelSerializer):
|
||||
class DocumentSerializer(BaseResourceSerializer):
|
||||
"""Serialize documents."""
|
||||
|
||||
content = JSONField(required=False)
|
||||
|
||||
class Meta:
|
||||
model = models.Document
|
||||
fields = ["id", "content", "title", "accesses", "abilities", "is_public"]
|
||||
|
||||
@@ -35,6 +35,7 @@ class DocumentFactory(factory.django.DjangoModelFactory):
|
||||
|
||||
title = factory.Sequence(lambda n: f"document{n}")
|
||||
is_public = factory.Faker("boolean")
|
||||
content = factory.LazyFunction(lambda: {"foo": fake.word()})
|
||||
|
||||
@factory.post_generation
|
||||
def users(self, create, extracted, **kwargs):
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
"""
|
||||
Declare and configure the models for the impress core application
|
||||
"""
|
||||
import hashlib
|
||||
import json
|
||||
import textwrap
|
||||
import uuid
|
||||
|
||||
@@ -8,6 +10,8 @@ from django.conf import settings
|
||||
from django.contrib.auth import models as auth_models
|
||||
from django.contrib.auth.base_user import AbstractBaseUser
|
||||
from django.core import mail, validators
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
from django.db import models
|
||||
from django.template.base import Template as DjangoTemplate
|
||||
from django.template.context import Context
|
||||
@@ -249,6 +253,8 @@ class Document(BaseModel):
|
||||
help_text=_("Whether this document is public for anyone to use."),
|
||||
)
|
||||
|
||||
_content = None
|
||||
|
||||
class Meta:
|
||||
db_table = "impress_document"
|
||||
ordering = ("title",)
|
||||
@@ -258,6 +264,49 @@ class Document(BaseModel):
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
"""Return the json content from object storage if available"""
|
||||
if self._content is None and self.id:
|
||||
try:
|
||||
# Load content from object storage
|
||||
with default_storage.open(str(self.id)) as f:
|
||||
self._content = json.load(f)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return self._content
|
||||
|
||||
@content.setter
|
||||
def content(self, content):
|
||||
"""Cache the content, don't write to object storage yet"""
|
||||
if isinstance(content, str):
|
||||
content = json.loads(content)
|
||||
if not isinstance(content, dict):
|
||||
raise ValueError("content should be a json object.")
|
||||
self._content = content
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
"""Write content to object storage only if _content has changed."""
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
if self._content:
|
||||
file_key = str(self.pk)
|
||||
bytes_content = json.dumps(self._content).encode("utf-8")
|
||||
|
||||
if default_storage.exists(file_key):
|
||||
response = default_storage.connection.meta.client.head_object(
|
||||
Bucket=default_storage.bucket_name, Key=file_key
|
||||
)
|
||||
has_changed = (
|
||||
response["ETag"].strip('"')
|
||||
!= hashlib.md5(bytes_content).hexdigest() # noqa
|
||||
)
|
||||
else:
|
||||
has_changed = True
|
||||
if has_changed:
|
||||
content_file = ContentFile(bytes_content)
|
||||
default_storage.save(file_key, content_file)
|
||||
|
||||
def get_abilities(self, user):
|
||||
"""
|
||||
Compute and return abilities for a given user on the document.
|
||||
|
||||
@@ -28,6 +28,7 @@ def test_api_documents_retrieve_anonymous_public():
|
||||
"accesses": [],
|
||||
"title": document.title,
|
||||
"is_public": True,
|
||||
"content": {"foo": document.content["foo"]},
|
||||
}
|
||||
|
||||
|
||||
@@ -69,6 +70,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public():
|
||||
"accesses": [],
|
||||
"title": document.title,
|
||||
"is_public": True,
|
||||
"content": {"foo": document.content["foo"]},
|
||||
}
|
||||
|
||||
|
||||
@@ -132,6 +134,7 @@ def test_api_documents_retrieve_authenticated_related_direct():
|
||||
assert response.json() == {
|
||||
"id": str(document.id),
|
||||
"title": document.title,
|
||||
"content": {"foo": document.content["foo"]},
|
||||
"abilities": document.get_abilities(user),
|
||||
"is_public": document.is_public,
|
||||
}
|
||||
@@ -246,6 +249,7 @@ def test_api_documents_retrieve_authenticated_related_team_members(
|
||||
assert response.json() == {
|
||||
"id": str(document.id),
|
||||
"title": document.title,
|
||||
"content": {"foo": document.content["foo"]},
|
||||
"abilities": document.get_abilities(user),
|
||||
"is_public": False,
|
||||
}
|
||||
@@ -343,6 +347,7 @@ def test_api_documents_retrieve_authenticated_related_team_administrators(
|
||||
assert response.json() == {
|
||||
"id": str(document.id),
|
||||
"title": document.title,
|
||||
"content": {"foo": document.content["foo"]},
|
||||
"abilities": document.get_abilities(user),
|
||||
"is_public": False,
|
||||
}
|
||||
@@ -444,6 +449,7 @@ def test_api_documents_retrieve_authenticated_related_team_owners(
|
||||
assert response.json() == {
|
||||
"id": str(document.id),
|
||||
"title": document.title,
|
||||
"content": {"foo": document.content["foo"]},
|
||||
"abilities": document.get_abilities(user),
|
||||
"is_public": False,
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import random
|
||||
import pytest
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories
|
||||
from core import factories, models
|
||||
from core.api import serializers
|
||||
from core.tests.conftest import TEAM, USER, VIA
|
||||
|
||||
@@ -138,7 +138,7 @@ def test_api_documents_update_authenticated_administrator_or_owner(
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
document.refresh_from_db()
|
||||
document = models.Document.objects.get(pk=document.pk)
|
||||
document_values = serializers.DocumentSerializer(instance=document).data
|
||||
for key, value in document_values.items():
|
||||
if key in ["id", "accesses"]:
|
||||
@@ -175,7 +175,7 @@ def test_api_documents_update_authenticated_owners(via, mock_user_get_teams):
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
document.refresh_from_db()
|
||||
document = models.Document.objects.get(pk=document.pk)
|
||||
document_values = serializers.DocumentSerializer(instance=document).data
|
||||
for key, value in document_values.items():
|
||||
if key in ["id", "accesses"]:
|
||||
|
||||
@@ -3,8 +3,10 @@ Unit tests for the Document model
|
||||
"""
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.core.files.storage import default_storage
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from core import factories, models
|
||||
|
||||
@@ -159,3 +161,47 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
|
||||
"manage_accesses": False,
|
||||
"partial_update": False,
|
||||
}
|
||||
|
||||
|
||||
def test_models_documents_file_upload_to_minio():
|
||||
"""Validate read/write from/to minio"""
|
||||
document = factories.DocumentFactory()
|
||||
document.content = {"foé": "çar"}
|
||||
document.save()
|
||||
|
||||
# Check that the file exists in MinIO:
|
||||
file_key = str(document.pk)
|
||||
# - through the storage backend
|
||||
assert default_storage.exists(file_key) is True
|
||||
# - directly from minio
|
||||
signed_url = default_storage.url(file_key)
|
||||
response = requests.get(signed_url, timeout=1)
|
||||
assert response.json() == {"foé": "çar"}
|
||||
|
||||
|
||||
def test_models_documents_version_duplicate():
|
||||
"""A new version should be created in object storage only if the content has changed."""
|
||||
document = factories.DocumentFactory()
|
||||
|
||||
file_key = str(document.pk)
|
||||
response = default_storage.connection.meta.client.list_object_versions(
|
||||
Bucket=default_storage.bucket_name, Prefix=file_key
|
||||
)
|
||||
assert len(response["Versions"]) == 1
|
||||
|
||||
# Save again with the same content
|
||||
document.save()
|
||||
|
||||
response = default_storage.connection.meta.client.list_object_versions(
|
||||
Bucket=default_storage.bucket_name, Prefix=file_key
|
||||
)
|
||||
assert len(response["Versions"]) == 1
|
||||
|
||||
# Save modified content
|
||||
document.content = {"foo": "spam"}
|
||||
document.save()
|
||||
|
||||
response = default_storage.connection.meta.client.list_object_versions(
|
||||
Bucket=default_storage.bucket_name, Prefix=file_key
|
||||
)
|
||||
assert len(response["Versions"]) == 2
|
||||
|
||||
Reference in New Issue
Block a user