🗃️(backend) export to docx

We can now export our document to a docx file.
This is done by converting the html to a docx
file using the pypandoc and pandoc library.
We added the "format" param to the
generate-document endpoint, "format" accept
"pdf" or "docx" as value.
This commit is contained in:
Anthony LC
2024-08-07 14:44:18 +02:00
committed by Anthony LC
parent ffaccad014
commit 4280f0779e
10 changed files with 147 additions and 32 deletions

View File

@@ -198,7 +198,7 @@ jobs:
- name: Install gettext (required to compile messages)
run: |
sudo apt-get update
sudo apt-get install -y gettext
sudo apt-get install -y gettext pandoc
- name: Generate a MO file from strings extracted from the project
run: python manage.py compilemessages

View File

@@ -75,6 +75,7 @@ RUN apt-get update && \
libgdk-pixbuf2.0-0 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
pandoc \
shared-mime-info && \
rm -rf /var/lib/apt/lists/*

View File

@@ -180,6 +180,12 @@ class DocumentGenerationSerializer(serializers.Serializer):
required=False,
default="html",
)
format = serializers.ChoiceField(
choices=["pdf", "docx"],
label=_("Format"),
required=False,
default="pdf",
)
class InvitationSerializer(serializers.ModelSerializer):

View File

@@ -1,13 +1,11 @@
"""API endpoints"""
from io import BytesIO
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models import (
OuterRef,
Q,
Subquery,
)
from django.http import FileResponse, Http404
from django.http import Http404
from botocore.exceptions import ClientError
from rest_framework import (
@@ -460,7 +458,16 @@ class TemplateViewSet(
# pylint: disable=unused-argument
def generate_document(self, request, pk=None):
"""
Generate and return pdf for this template with the content passed.
Generate and return a document for this template around the
body passed as argument.
2 types of body are accepted:
- HTML: body_type = "html"
- Markdown: body_type = "markdown"
2 types of documents can be generated:
- PDF: format = "pdf"
- Docx: format = "docx"
"""
serializer = serializers.DocumentGenerationSerializer(data=request.data)
@@ -471,13 +478,10 @@ class TemplateViewSet(
body = serializer.validated_data["body"]
body_type = serializer.validated_data["body_type"]
export_format = serializer.validated_data["format"]
template = self.get_object()
pdf_content = template.generate_document(body, body_type)
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
response["Content-Disposition"] = f"attachment; filename={template.title}.pdf"
return response
return template.generate_document(body, body_type, export_format)
class TemplateAccessViewSet(

View File

@@ -2,10 +2,13 @@
Declare and configure the models for the impress core application
"""
import hashlib
import os
import smtplib
import tempfile
import textwrap
import uuid
from datetime import timedelta
from io import BytesIO
from logging import getLogger
from django.conf import settings
@@ -16,6 +19,7 @@ from django.core import exceptions, mail, validators
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.db import models
from django.http import FileResponse
from django.template.base import Template as DjangoTemplate
from django.template.context import Context
from django.template.loader import render_to_string
@@ -26,10 +30,10 @@ from django.utils.translation import override
import frontmatter
import markdown
import pypandoc
import weasyprint
from botocore.exceptions import ClientError
from timezone_field import TimeZoneField
from weasyprint import CSS, HTML
from weasyprint.text.fonts import FontConfiguration
logger = getLogger(__name__)
@@ -564,10 +568,90 @@ class Template(BaseModel):
"retrieve": can_get,
}
def generate_document(self, body, body_type):
def generate_pdf(self, body_html, metadata):
"""
Generate and return a PDF document for this template around the
Generate and return a pdf document wrapped around the current template
"""
document_html = weasyprint.HTML(
string=DjangoTemplate(self.code).render(
Context({"body": html.format_html(body_html), **metadata})
)
)
css = weasyprint.CSS(
string=self.css,
font_config=weasyprint.text.fonts.FontConfiguration(),
)
pdf_content = document_html.write_pdf(stylesheets=[css], zoom=1)
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
response["Content-Disposition"] = f"attachment; filename={self.title}.pdf"
return response
def generate_word(self, body_html, metadata):
"""
Generate and return a docx document wrapped around the current template
"""
template_string = DjangoTemplate(self.code).render(
Context({"body": html.format_html(body_html), **metadata})
)
html_string = f"""
<!DOCTYPE html>
<html>
<head>
<style>
{self.css}
</style>
</head>
<body>
{template_string}
</body>
</html>
"""
reference_docx = "core/static/reference.docx"
# Convert the HTML to a temporary docx file
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
output_path = tmp_file.name
pypandoc.convert_text(
html_string,
"docx",
format="html",
outputfile=output_path,
extra_args=["--reference-doc", reference_docx],
)
# Create a BytesIO object to store the output of the temporary docx file
with open(output_path, "rb") as f:
output = BytesIO(f.read())
# Remove the temporary docx file
os.remove(output_path)
output.seek(0)
response = FileResponse(
output,
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
response["Content-Disposition"] = f"attachment; filename={self.title}.docx"
return response
def generate_document(self, body, body_type, export_format):
"""
Generate and return a document for this template around the
body passed as argument.
2 types of body are accepted:
- HTML: body_type = "html"
- Markdown: body_type = "markdown"
2 types of documents can be generated:
- PDF: export_format = "pdf"
- Docx: export_format = "docx"
"""
document = frontmatter.loads(body)
metadata = document.metadata
@@ -580,16 +664,10 @@ class Template(BaseModel):
markdown.markdown(textwrap.dedent(strip_body)) if strip_body else ""
)
document_html = HTML(
string=DjangoTemplate(self.code).render(
Context({"body": html.format_html(body_html), **metadata})
)
)
css = CSS(
string=self.css,
font_config=FontConfiguration(),
)
return document_html.write_pdf(stylesheets=[css], zoom=1)
if export_format == "pdf":
return self.generate_pdf(body_html, metadata)
return self.generate_word(body_html, metadata)
class TemplateAccess(BaseAccess):

Binary file not shown.

View File

@@ -178,3 +178,26 @@ def test_api_templates_generate_document_type_unknown():
'"unknown" is not a valid choice.',
]
}
def test_api_templates_generate_document_export_docx():
"""Generate pdf document with the body type html."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)
template = factories.TemplateFactory(is_public=True)
data = {"body": "<p>Test body</p>", "body_type": "html", "format": "docx"}
response = client.post(
f"/api/v1.0/templates/{template.id!s}/generate-document/",
data,
format="json",
)
assert response.status_code == 200
assert (
response.headers["content-type"]
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)

View File

@@ -1,7 +1,7 @@
<page size="A4">
<div class="header">
<img
src="https://upload.wikimedia.org/wikipedia/fr/7/72/Logo_du_Gouvernement_de_la_R%C3%A9publique_fran%C3%A7aise_%282020%29.svg"
<img width="200"
src="https://impress-staging.beta.numerique.gouv.fr/assets/logo-gouv.png"
/>
</div>
<div class="content">

View File

@@ -1,18 +1,20 @@
body {
background: white;
font-family: arial
}
.header {
display: flex;
justify-content: space-between;
font-family: arial;
}
.header img {
width: 5cm;
margin-left: -0.4cm;
}
.body{
margin-top: 1.5rem
margin-top: 1.5rem;
}
img {
max-width: 100%;
}
[custom-style="center"] {
text-align: center;
}
[custom-style="right"] {
text-align: right;
}

View File

@@ -49,6 +49,7 @@ dependencies = [
"nested-multipart-parser==1.5.0",
"psycopg[binary]==3.1.14",
"PyJWT==2.8.0",
"pypandoc==1.13",
"python-frontmatter==1.0.1",
"requests==2.32.2",
"sentry-sdk==2.8.0",