🚑️(backend) command to update attachment content-type
The uploaded files in the system are missing the content-type. We add a command to update the content-type of the existing uploaded files. This command will run one time when we will deploy to the environments.
This commit is contained in:
0
src/backend/core/management/__init__.py
Normal file
0
src/backend/core/management/__init__.py
Normal file
0
src/backend/core/management/commands/__init__.py
Normal file
0
src/backend/core/management/commands/__init__.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
"""Management command updating the metadata for all the files in the MinIO bucket."""
|
||||||
|
|
||||||
|
from django.core.files.storage import default_storage
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
import magic
|
||||||
|
|
||||||
|
from core.models import Document
|
||||||
|
|
||||||
|
# pylint: disable=too-many-locals, broad-exception-caught
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
"""Update the metadata for all the files in the MinIO bucket."""
|
||||||
|
|
||||||
|
help = __doc__
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
"""Execute management command."""
|
||||||
|
s3_client = default_storage.connection.meta.client
|
||||||
|
bucket_name = default_storage.bucket_name
|
||||||
|
|
||||||
|
mime_detector = magic.Magic(mime=True)
|
||||||
|
|
||||||
|
documents = Document.objects.all()
|
||||||
|
self.stdout.write(
|
||||||
|
f"[INFO] Found {documents.count()} documents. Starting ContentType fix..."
|
||||||
|
)
|
||||||
|
|
||||||
|
for doc in documents:
|
||||||
|
doc_id_str = str(doc.id)
|
||||||
|
prefix = f"{doc_id_str}/attachments/"
|
||||||
|
self.stdout.write(
|
||||||
|
f"[INFO] Processing attachments under prefix '{prefix}' ..."
|
||||||
|
)
|
||||||
|
|
||||||
|
continuation_token = None
|
||||||
|
total_updated = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
list_kwargs = {"Bucket": bucket_name, "Prefix": prefix}
|
||||||
|
if continuation_token:
|
||||||
|
list_kwargs["ContinuationToken"] = continuation_token
|
||||||
|
|
||||||
|
response = s3_client.list_objects_v2(**list_kwargs)
|
||||||
|
|
||||||
|
# If no objects found under this prefix, break out of the loop
|
||||||
|
if "Contents" not in response:
|
||||||
|
break
|
||||||
|
|
||||||
|
for obj in response["Contents"]:
|
||||||
|
key = obj["Key"]
|
||||||
|
|
||||||
|
# Skip if it's a folder
|
||||||
|
if key.endswith("/"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get existing metadata
|
||||||
|
head_resp = s3_client.head_object(Bucket=bucket_name, Key=key)
|
||||||
|
|
||||||
|
# Read first ~1KB for MIME detection
|
||||||
|
partial_obj = s3_client.get_object(
|
||||||
|
Bucket=bucket_name, Key=key, Range="bytes=0-1023"
|
||||||
|
)
|
||||||
|
partial_data = partial_obj["Body"].read()
|
||||||
|
|
||||||
|
# Detect MIME type
|
||||||
|
magic_mime_type = mime_detector.from_buffer(partial_data)
|
||||||
|
|
||||||
|
# Update ContentType
|
||||||
|
s3_client.copy_object(
|
||||||
|
Bucket=bucket_name,
|
||||||
|
CopySource={"Bucket": bucket_name, "Key": key},
|
||||||
|
Key=key,
|
||||||
|
ContentType=magic_mime_type,
|
||||||
|
Metadata=head_resp.get("Metadata", {}),
|
||||||
|
MetadataDirective="REPLACE",
|
||||||
|
)
|
||||||
|
total_updated += 1
|
||||||
|
|
||||||
|
except Exception as exc: # noqa
|
||||||
|
self.stderr.write(
|
||||||
|
f"[ERROR] Could not update ContentType for {key}: {exc}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.get("IsTruncated"):
|
||||||
|
continuation_token = response.get("NextContinuationToken")
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if total_updated > 0:
|
||||||
|
self.stdout.write(
|
||||||
|
f"[INFO] -> Updated {total_updated} objects for Document {doc_id_str}."
|
||||||
|
)
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
"""
|
||||||
|
Unit test for `update_files_content_type_metadata` command.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from django.core.files.storage import default_storage
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core import factories
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
def test_update_files_content_type_metadata():
|
||||||
|
"""
|
||||||
|
Test that the command `update_files_content_type_metadata`
|
||||||
|
fixes the ContentType of attachment in the storage.
|
||||||
|
"""
|
||||||
|
s3_client = default_storage.connection.meta.client
|
||||||
|
bucket_name = default_storage.bucket_name
|
||||||
|
|
||||||
|
# Create files with a wrong ContentType
|
||||||
|
keys = []
|
||||||
|
for _ in range(10):
|
||||||
|
doc_id = uuid.uuid4()
|
||||||
|
factories.DocumentFactory(id=doc_id)
|
||||||
|
key = f"{doc_id}/attachments/testfile.png"
|
||||||
|
keys.append(key)
|
||||||
|
fake_png = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR..."
|
||||||
|
s3_client.put_object(
|
||||||
|
Bucket=bucket_name,
|
||||||
|
Key=key,
|
||||||
|
Body=fake_png,
|
||||||
|
ContentType="text/plain",
|
||||||
|
Metadata={"owner": "None"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call the command that fixes the ContentType
|
||||||
|
call_command("update_files_content_type_metadata")
|
||||||
|
|
||||||
|
for key in keys:
|
||||||
|
head_resp = s3_client.head_object(Bucket=bucket_name, Key=key)
|
||||||
|
assert (
|
||||||
|
head_resp["ContentType"] == "image/png"
|
||||||
|
), f"ContentType not fixed, got {head_resp['ContentType']!r}"
|
||||||
|
|
||||||
|
# Check that original metadata was preserved
|
||||||
|
assert head_resp["Metadata"].get("owner") == "None"
|
||||||
Reference in New Issue
Block a user