🚑️(backend) command to update attachment content-type
The uploaded files in the system are missing the content-type. We add a command to update the content-type of the existing uploaded files. This command will run one time when we will deploy to the environments.
This commit is contained in:
0
src/backend/core/management/__init__.py
Normal file
0
src/backend/core/management/__init__.py
Normal file
0
src/backend/core/management/commands/__init__.py
Normal file
0
src/backend/core/management/commands/__init__.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Management command updating the metadata for all the files in the MinIO bucket."""
|
||||
|
||||
from django.core.files.storage import default_storage
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
import magic
|
||||
|
||||
from core.models import Document
|
||||
|
||||
# pylint: disable=too-many-locals, broad-exception-caught
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Update the metadata for all the files in the MinIO bucket."""
|
||||
|
||||
help = __doc__
|
||||
|
||||
def handle(self, *args, **options):
|
||||
"""Execute management command."""
|
||||
s3_client = default_storage.connection.meta.client
|
||||
bucket_name = default_storage.bucket_name
|
||||
|
||||
mime_detector = magic.Magic(mime=True)
|
||||
|
||||
documents = Document.objects.all()
|
||||
self.stdout.write(
|
||||
f"[INFO] Found {documents.count()} documents. Starting ContentType fix..."
|
||||
)
|
||||
|
||||
for doc in documents:
|
||||
doc_id_str = str(doc.id)
|
||||
prefix = f"{doc_id_str}/attachments/"
|
||||
self.stdout.write(
|
||||
f"[INFO] Processing attachments under prefix '{prefix}' ..."
|
||||
)
|
||||
|
||||
continuation_token = None
|
||||
total_updated = 0
|
||||
|
||||
while True:
|
||||
list_kwargs = {"Bucket": bucket_name, "Prefix": prefix}
|
||||
if continuation_token:
|
||||
list_kwargs["ContinuationToken"] = continuation_token
|
||||
|
||||
response = s3_client.list_objects_v2(**list_kwargs)
|
||||
|
||||
# If no objects found under this prefix, break out of the loop
|
||||
if "Contents" not in response:
|
||||
break
|
||||
|
||||
for obj in response["Contents"]:
|
||||
key = obj["Key"]
|
||||
|
||||
# Skip if it's a folder
|
||||
if key.endswith("/"):
|
||||
continue
|
||||
|
||||
try:
|
||||
# Get existing metadata
|
||||
head_resp = s3_client.head_object(Bucket=bucket_name, Key=key)
|
||||
|
||||
# Read first ~1KB for MIME detection
|
||||
partial_obj = s3_client.get_object(
|
||||
Bucket=bucket_name, Key=key, Range="bytes=0-1023"
|
||||
)
|
||||
partial_data = partial_obj["Body"].read()
|
||||
|
||||
# Detect MIME type
|
||||
magic_mime_type = mime_detector.from_buffer(partial_data)
|
||||
|
||||
# Update ContentType
|
||||
s3_client.copy_object(
|
||||
Bucket=bucket_name,
|
||||
CopySource={"Bucket": bucket_name, "Key": key},
|
||||
Key=key,
|
||||
ContentType=magic_mime_type,
|
||||
Metadata=head_resp.get("Metadata", {}),
|
||||
MetadataDirective="REPLACE",
|
||||
)
|
||||
total_updated += 1
|
||||
|
||||
except Exception as exc: # noqa
|
||||
self.stderr.write(
|
||||
f"[ERROR] Could not update ContentType for {key}: {exc}"
|
||||
)
|
||||
|
||||
if response.get("IsTruncated"):
|
||||
continuation_token = response.get("NextContinuationToken")
|
||||
else:
|
||||
break
|
||||
|
||||
if total_updated > 0:
|
||||
self.stdout.write(
|
||||
f"[INFO] -> Updated {total_updated} objects for Document {doc_id_str}."
|
||||
)
|
||||
@@ -0,0 +1,50 @@
|
||||
"""
|
||||
Unit test for `update_files_content_type_metadata` command.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
|
||||
from django.core.files.storage import default_storage
|
||||
from django.core.management import call_command
|
||||
|
||||
import pytest
|
||||
|
||||
from core import factories
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_update_files_content_type_metadata():
|
||||
"""
|
||||
Test that the command `update_files_content_type_metadata`
|
||||
fixes the ContentType of attachment in the storage.
|
||||
"""
|
||||
s3_client = default_storage.connection.meta.client
|
||||
bucket_name = default_storage.bucket_name
|
||||
|
||||
# Create files with a wrong ContentType
|
||||
keys = []
|
||||
for _ in range(10):
|
||||
doc_id = uuid.uuid4()
|
||||
factories.DocumentFactory(id=doc_id)
|
||||
key = f"{doc_id}/attachments/testfile.png"
|
||||
keys.append(key)
|
||||
fake_png = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR..."
|
||||
s3_client.put_object(
|
||||
Bucket=bucket_name,
|
||||
Key=key,
|
||||
Body=fake_png,
|
||||
ContentType="text/plain",
|
||||
Metadata={"owner": "None"},
|
||||
)
|
||||
|
||||
# Call the command that fixes the ContentType
|
||||
call_command("update_files_content_type_metadata")
|
||||
|
||||
for key in keys:
|
||||
head_resp = s3_client.head_object(Bucket=bucket_name, Key=key)
|
||||
assert (
|
||||
head_resp["ContentType"] == "image/png"
|
||||
), f"ContentType not fixed, got {head_resp['ContentType']!r}"
|
||||
|
||||
# Check that original metadata was preserved
|
||||
assert head_resp["Metadata"].get("owner") == "None"
|
||||
Reference in New Issue
Block a user