(documents) add content field as an S3 object

The content field is a writable property on the model which is persisted
in object storage. We take advantage of the versioning, robustness and
scalability of S3.
This commit is contained in:
Samuel Paccoud - DINUM
2024-04-06 09:09:46 +02:00
committed by Anthony LC
parent 4956beb040
commit 8e262da8f5
11 changed files with 164 additions and 44 deletions

View File

@@ -59,7 +59,6 @@ services:
- "8071:8000" - "8071:8000"
volumes: volumes:
- ./src/backend:/app - ./src/backend:/app
- ./data/media:/data/media
- ./data/static:/data/static - ./data/static:/data/static
depends_on: depends_on:
- postgresql - postgresql
@@ -78,7 +77,6 @@ services:
- env.d/development/postgresql - env.d/development/postgresql
volumes: volumes:
- ./src/backend:/app - ./src/backend:/app
- ./data/media:/data/media
- ./data/static:/data/static - ./data/static:/data/static
depends_on: depends_on:
- app-dev - app-dev
@@ -96,8 +94,6 @@ services:
env_file: env_file:
- env.d/development/common - env.d/development/common
- env.d/development/postgresql - env.d/development/postgresql
volumes:
- ./data/media:/data/media
depends_on: depends_on:
- postgresql - postgresql
- redis - redis

View File

@@ -16,6 +16,12 @@ DJANGO_EMAIL_PORT=1025
# Backend url # Backend url
IMPRESS_BASE_URL="http://localhost:8072" IMPRESS_BASE_URL="http://localhost:8072"
# Media
STORAGES_STATICFILES_BACKEND=django.contrib.staticfiles.storage.StaticFilesStorage
AWS_S3_ENDPOINT_URL=http://minio:9000
AWS_S3_ACCESS_KEY_ID=impress
AWS_S3_SECRET_ACCESS_KEY=password
# OIDC # OIDC
OIDC_OP_JWKS_ENDPOINT=http://nginx:8083/realms/impress/protocol/openid-connect/certs OIDC_OP_JWKS_ENDPOINT=http://nginx:8083/realms/impress/protocol/openid-connect/certs
OIDC_OP_AUTHORIZATION_ENDPOINT=http://localhost:8083/realms/impress/protocol/openid-connect/auth OIDC_OP_AUTHORIZATION_ENDPOINT=http://localhost:8083/realms/impress/protocol/openid-connect/auth

View File

@@ -0,0 +1,24 @@
"""A JSONField for DRF to handle serialization/deserialization."""
import json
from rest_framework import serializers
class JSONField(serializers.Field):
"""
A custom field for handling JSON data.
"""
def to_representation(self, value):
"""
Convert the JSON string to a Python dictionary for serialization.
"""
return value
def to_internal_value(self, data):
"""
Convert the Python dictionary to a JSON string for deserialization.
"""
if data is None:
return None
return json.dumps(data)

View File

@@ -7,6 +7,8 @@ from timezone_field.rest_framework import TimeZoneSerializerField
from core import models from core import models
from .fields import JSONField
class UserSerializer(serializers.ModelSerializer): class UserSerializer(serializers.ModelSerializer):
"""Serialize users.""" """Serialize users."""
@@ -134,6 +136,8 @@ class BaseResourceSerializer(serializers.ModelSerializer):
class DocumentSerializer(BaseResourceSerializer): class DocumentSerializer(BaseResourceSerializer):
"""Serialize documents.""" """Serialize documents."""
content = JSONField(required=False)
class Meta: class Meta:
model = models.Document model = models.Document
fields = ["id", "content", "title", "accesses", "abilities", "is_public"] fields = ["id", "content", "title", "accesses", "abilities", "is_public"]

View File

@@ -35,6 +35,7 @@ class DocumentFactory(factory.django.DjangoModelFactory):
title = factory.Sequence(lambda n: f"document{n}") title = factory.Sequence(lambda n: f"document{n}")
is_public = factory.Faker("boolean") is_public = factory.Faker("boolean")
content = factory.LazyFunction(lambda: {"foo": fake.word()})
@factory.post_generation @factory.post_generation
def users(self, create, extracted, **kwargs): def users(self, create, extracted, **kwargs):

View File

@@ -1,6 +1,8 @@
""" """
Declare and configure the models for the impress core application Declare and configure the models for the impress core application
""" """
import hashlib
import json
import textwrap import textwrap
import uuid import uuid
@@ -8,6 +10,8 @@ from django.conf import settings
from django.contrib.auth import models as auth_models from django.contrib.auth import models as auth_models
from django.contrib.auth.base_user import AbstractBaseUser from django.contrib.auth.base_user import AbstractBaseUser
from django.core import mail, validators from django.core import mail, validators
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.db import models from django.db import models
from django.template.base import Template as DjangoTemplate from django.template.base import Template as DjangoTemplate
from django.template.context import Context from django.template.context import Context
@@ -249,6 +253,8 @@ class Document(BaseModel):
help_text=_("Whether this document is public for anyone to use."), help_text=_("Whether this document is public for anyone to use."),
) )
_content = None
class Meta: class Meta:
db_table = "impress_document" db_table = "impress_document"
ordering = ("title",) ordering = ("title",)
@@ -258,6 +264,49 @@ class Document(BaseModel):
def __str__(self): def __str__(self):
return self.title return self.title
@property
def content(self):
"""Return the json content from object storage if available"""
if self._content is None and self.id:
try:
# Load content from object storage
with default_storage.open(str(self.id)) as f:
self._content = json.load(f)
except FileNotFoundError:
pass
return self._content
@content.setter
def content(self, content):
"""Cache the content, don't write to object storage yet"""
if isinstance(content, str):
content = json.loads(content)
if not isinstance(content, dict):
raise ValueError("content should be a json object.")
self._content = content
def save(self, *args, **kwargs):
"""Write content to object storage only if _content has changed."""
super().save(*args, **kwargs)
if self._content:
file_key = str(self.pk)
bytes_content = json.dumps(self._content).encode("utf-8")
if default_storage.exists(file_key):
response = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=file_key
)
has_changed = (
response["ETag"].strip('"')
!= hashlib.md5(bytes_content).hexdigest() # noqa
)
else:
has_changed = True
if has_changed:
content_file = ContentFile(bytes_content)
default_storage.save(file_key, content_file)
def get_abilities(self, user): def get_abilities(self, user):
""" """
Compute and return abilities for a given user on the document. Compute and return abilities for a given user on the document.

View File

@@ -28,6 +28,7 @@ def test_api_documents_retrieve_anonymous_public():
"accesses": [], "accesses": [],
"title": document.title, "title": document.title,
"is_public": True, "is_public": True,
"content": {"foo": document.content["foo"]},
} }
@@ -69,6 +70,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public():
"accesses": [], "accesses": [],
"title": document.title, "title": document.title,
"is_public": True, "is_public": True,
"content": {"foo": document.content["foo"]},
} }
@@ -132,6 +134,7 @@ def test_api_documents_retrieve_authenticated_related_direct():
assert response.json() == { assert response.json() == {
"id": str(document.id), "id": str(document.id),
"title": document.title, "title": document.title,
"content": {"foo": document.content["foo"]},
"abilities": document.get_abilities(user), "abilities": document.get_abilities(user),
"is_public": document.is_public, "is_public": document.is_public,
} }
@@ -246,6 +249,7 @@ def test_api_documents_retrieve_authenticated_related_team_members(
assert response.json() == { assert response.json() == {
"id": str(document.id), "id": str(document.id),
"title": document.title, "title": document.title,
"content": {"foo": document.content["foo"]},
"abilities": document.get_abilities(user), "abilities": document.get_abilities(user),
"is_public": False, "is_public": False,
} }
@@ -343,6 +347,7 @@ def test_api_documents_retrieve_authenticated_related_team_administrators(
assert response.json() == { assert response.json() == {
"id": str(document.id), "id": str(document.id),
"title": document.title, "title": document.title,
"content": {"foo": document.content["foo"]},
"abilities": document.get_abilities(user), "abilities": document.get_abilities(user),
"is_public": False, "is_public": False,
} }
@@ -444,6 +449,7 @@ def test_api_documents_retrieve_authenticated_related_team_owners(
assert response.json() == { assert response.json() == {
"id": str(document.id), "id": str(document.id),
"title": document.title, "title": document.title,
"content": {"foo": document.content["foo"]},
"abilities": document.get_abilities(user), "abilities": document.get_abilities(user),
"is_public": False, "is_public": False,
} }

View File

@@ -6,7 +6,7 @@ import random
import pytest import pytest
from rest_framework.test import APIClient from rest_framework.test import APIClient
from core import factories from core import factories, models
from core.api import serializers from core.api import serializers
from core.tests.conftest import TEAM, USER, VIA from core.tests.conftest import TEAM, USER, VIA
@@ -138,7 +138,7 @@ def test_api_documents_update_authenticated_administrator_or_owner(
) )
assert response.status_code == 200 assert response.status_code == 200
document.refresh_from_db() document = models.Document.objects.get(pk=document.pk)
document_values = serializers.DocumentSerializer(instance=document).data document_values = serializers.DocumentSerializer(instance=document).data
for key, value in document_values.items(): for key, value in document_values.items():
if key in ["id", "accesses"]: if key in ["id", "accesses"]:
@@ -175,7 +175,7 @@ def test_api_documents_update_authenticated_owners(via, mock_user_get_teams):
) )
assert response.status_code == 200 assert response.status_code == 200
document.refresh_from_db() document = models.Document.objects.get(pk=document.pk)
document_values = serializers.DocumentSerializer(instance=document).data document_values = serializers.DocumentSerializer(instance=document).data
for key, value in document_values.items(): for key, value in document_values.items():
if key in ["id", "accesses"]: if key in ["id", "accesses"]:

View File

@@ -3,8 +3,10 @@ Unit tests for the Document model
""" """
from django.contrib.auth.models import AnonymousUser from django.contrib.auth.models import AnonymousUser
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
import pytest import pytest
import requests
from core import factories, models from core import factories, models
@@ -159,3 +161,47 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries):
"manage_accesses": False, "manage_accesses": False,
"partial_update": False, "partial_update": False,
} }
def test_models_documents_file_upload_to_minio():
"""Validate read/write from/to minio"""
document = factories.DocumentFactory()
document.content = {"foé": "çar"}
document.save()
# Check that the file exists in MinIO:
file_key = str(document.pk)
# - through the storage backend
assert default_storage.exists(file_key) is True
# - directly from minio
signed_url = default_storage.url(file_key)
response = requests.get(signed_url, timeout=1)
assert response.json() == {"foé": "çar"}
def test_models_documents_version_duplicate():
"""A new version should be created in object storage only if the content has changed."""
document = factories.DocumentFactory()
file_key = str(document.pk)
response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name, Prefix=file_key
)
assert len(response["Versions"]) == 1
# Save again with the same content
document.save()
response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name, Prefix=file_key
)
assert len(response["Versions"]) == 1
# Save modified content
document.content = {"foo": "spam"}
document.save()
response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name, Prefix=file_key
)
assert len(response["Versions"]) == 2

View File

@@ -108,13 +108,35 @@ class Base(Configuration):
STORAGES = { STORAGES = {
"default": { "default": {
"BACKEND": "django.core.files.storage.FileSystemStorage", "BACKEND": "storages.backends.s3.S3Storage",
}, },
"staticfiles": { "staticfiles": {
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage", "BACKEND": values.Value(
"whitenoise.storage.CompressedManifestStaticFilesStorage",
environ_name="STORAGES_STATICFILES_BACKEND",
),
}, },
} }
# Media
AWS_S3_ENDPOINT_URL = values.Value(
environ_name="AWS_S3_ENDPOINT_URL", environ_prefix=None
)
AWS_S3_ACCESS_KEY_ID = values.Value(
environ_name="AWS_S3_ACCESS_KEY_ID", environ_prefix=None
)
AWS_S3_SECRET_ACCESS_KEY = values.Value(
environ_name="AWS_S3_SECRET_ACCESS_KEY", environ_prefix=None
)
AWS_S3_REGION_NAME = values.Value(
environ_name="AWS_S3_REGION_NAME", environ_prefix=None
)
AWS_STORAGE_BUCKET_NAME = values.Value(
"impress-media-storage",
environ_name="AWS_STORAGE_BUCKET_NAME",
environ_prefix=None,
)
# Internationalization # Internationalization
# https://docs.djangoproject.com/en/3.1/topics/i18n/ # https://docs.djangoproject.com/en/3.1/topics/i18n/
@@ -451,15 +473,6 @@ class Test(Base):
] ]
USE_SWAGGER = True USE_SWAGGER = True
STORAGES = {
"default": {
"BACKEND": "django.core.files.storage.FileSystemStorage",
},
"staticfiles": {
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage",
},
}
CELERY_TASK_ALWAYS_EAGER = values.BooleanValue(True) CELERY_TASK_ALWAYS_EAGER = values.BooleanValue(True)
def __init__(self): def __init__(self):
@@ -506,34 +519,9 @@ class Production(Base):
CSRF_COOKIE_SECURE = True CSRF_COOKIE_SECURE = True
SESSION_COOKIE_SECURE = True SESSION_COOKIE_SECURE = True
# For static files in production, we want to use a backend that includes a hash in
# the filename, that is calculated from the file content, so that browsers always
# get the updated version of each file.
STORAGES = {
"default": {
"BACKEND": "storages.backends.s3.S3Storage",
},
"staticfiles": {
# For static files in production, we want to use a backend that includes a hash in
# the filename, that is calculated from the file content, so that browsers always
# get the updated version of each file.
"BACKEND": values.Value(
"whitenoise.storage.CompressedManifestStaticFilesStorage",
environ_name="STORAGES_STATICFILES_BACKEND",
)
},
}
# Privacy # Privacy
SECURE_REFERRER_POLICY = "same-origin" SECURE_REFERRER_POLICY = "same-origin"
# Media
AWS_S3_ENDPOINT_URL = values.Value()
AWS_S3_ACCESS_KEY_ID = values.Value()
AWS_S3_SECRET_ACCESS_KEY = values.Value()
AWS_STORAGE_BUCKET_NAME = values.Value("tf-default-impress-media-storage")
AWS_S3_REGION_NAME = values.Value()
CACHES = { CACHES = {
"default": { "default": {
"BACKEND": "django_redis.cache.RedisCache", "BACKEND": "django_redis.cache.RedisCache",

View File

@@ -34,7 +34,7 @@ dependencies = [
"django-parler==2.3", "django-parler==2.3",
"redis==5.0.3", "redis==5.0.3",
"django-redis==5.4.0", "django-redis==5.4.0",
"django-storages==1.14.2", "django-storages[s3]==1.14.2",
"django-timezone-field>=5.1", "django-timezone-field>=5.1",
"django==5.0.3", "django==5.0.3",
"djangorestframework==3.14.0", "djangorestframework==3.14.0",