🗃️(backend) export to docx
We can now export our document to a docx file. This is done by converting the html to a docx file using the pypandoc and pandoc library. We added the "format" param to the generate-document endpoint, "format" accept "pdf" or "docx" as value.
This commit is contained in:
2
.github/workflows/impress.yml
vendored
2
.github/workflows/impress.yml
vendored
@@ -198,7 +198,7 @@ jobs:
|
||||
- name: Install gettext (required to compile messages)
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gettext
|
||||
sudo apt-get install -y gettext pandoc
|
||||
|
||||
- name: Generate a MO file from strings extracted from the project
|
||||
run: python manage.py compilemessages
|
||||
|
||||
@@ -75,6 +75,7 @@ RUN apt-get update && \
|
||||
libgdk-pixbuf2.0-0 \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
pandoc \
|
||||
shared-mime-info && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
@@ -180,6 +180,12 @@ class DocumentGenerationSerializer(serializers.Serializer):
|
||||
required=False,
|
||||
default="html",
|
||||
)
|
||||
format = serializers.ChoiceField(
|
||||
choices=["pdf", "docx"],
|
||||
label=_("Format"),
|
||||
required=False,
|
||||
default="pdf",
|
||||
)
|
||||
|
||||
|
||||
class InvitationSerializer(serializers.ModelSerializer):
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
"""API endpoints"""
|
||||
from io import BytesIO
|
||||
|
||||
from django.contrib.postgres.aggregates import ArrayAgg
|
||||
from django.db.models import (
|
||||
OuterRef,
|
||||
Q,
|
||||
Subquery,
|
||||
)
|
||||
from django.http import FileResponse, Http404
|
||||
from django.http import Http404
|
||||
|
||||
from botocore.exceptions import ClientError
|
||||
from rest_framework import (
|
||||
@@ -460,7 +458,16 @@ class TemplateViewSet(
|
||||
# pylint: disable=unused-argument
|
||||
def generate_document(self, request, pk=None):
|
||||
"""
|
||||
Generate and return pdf for this template with the content passed.
|
||||
Generate and return a document for this template around the
|
||||
body passed as argument.
|
||||
|
||||
2 types of body are accepted:
|
||||
- HTML: body_type = "html"
|
||||
- Markdown: body_type = "markdown"
|
||||
|
||||
2 types of documents can be generated:
|
||||
- PDF: format = "pdf"
|
||||
- Docx: format = "docx"
|
||||
"""
|
||||
serializer = serializers.DocumentGenerationSerializer(data=request.data)
|
||||
|
||||
@@ -471,13 +478,10 @@ class TemplateViewSet(
|
||||
|
||||
body = serializer.validated_data["body"]
|
||||
body_type = serializer.validated_data["body_type"]
|
||||
export_format = serializer.validated_data["format"]
|
||||
|
||||
template = self.get_object()
|
||||
pdf_content = template.generate_document(body, body_type)
|
||||
|
||||
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
|
||||
response["Content-Disposition"] = f"attachment; filename={template.title}.pdf"
|
||||
return response
|
||||
return template.generate_document(body, body_type, export_format)
|
||||
|
||||
|
||||
class TemplateAccessViewSet(
|
||||
|
||||
@@ -2,10 +2,13 @@
|
||||
Declare and configure the models for the impress core application
|
||||
"""
|
||||
import hashlib
|
||||
import os
|
||||
import smtplib
|
||||
import tempfile
|
||||
import textwrap
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from io import BytesIO
|
||||
from logging import getLogger
|
||||
|
||||
from django.conf import settings
|
||||
@@ -16,6 +19,7 @@ from django.core import exceptions, mail, validators
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
from django.db import models
|
||||
from django.http import FileResponse
|
||||
from django.template.base import Template as DjangoTemplate
|
||||
from django.template.context import Context
|
||||
from django.template.loader import render_to_string
|
||||
@@ -26,10 +30,10 @@ from django.utils.translation import override
|
||||
|
||||
import frontmatter
|
||||
import markdown
|
||||
import pypandoc
|
||||
import weasyprint
|
||||
from botocore.exceptions import ClientError
|
||||
from timezone_field import TimeZoneField
|
||||
from weasyprint import CSS, HTML
|
||||
from weasyprint.text.fonts import FontConfiguration
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
@@ -564,10 +568,90 @@ class Template(BaseModel):
|
||||
"retrieve": can_get,
|
||||
}
|
||||
|
||||
def generate_document(self, body, body_type):
|
||||
def generate_pdf(self, body_html, metadata):
|
||||
"""
|
||||
Generate and return a PDF document for this template around the
|
||||
Generate and return a pdf document wrapped around the current template
|
||||
"""
|
||||
document_html = weasyprint.HTML(
|
||||
string=DjangoTemplate(self.code).render(
|
||||
Context({"body": html.format_html(body_html), **metadata})
|
||||
)
|
||||
)
|
||||
css = weasyprint.CSS(
|
||||
string=self.css,
|
||||
font_config=weasyprint.text.fonts.FontConfiguration(),
|
||||
)
|
||||
|
||||
pdf_content = document_html.write_pdf(stylesheets=[css], zoom=1)
|
||||
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
|
||||
response["Content-Disposition"] = f"attachment; filename={self.title}.pdf"
|
||||
|
||||
return response
|
||||
|
||||
def generate_word(self, body_html, metadata):
|
||||
"""
|
||||
Generate and return a docx document wrapped around the current template
|
||||
"""
|
||||
template_string = DjangoTemplate(self.code).render(
|
||||
Context({"body": html.format_html(body_html), **metadata})
|
||||
)
|
||||
|
||||
html_string = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
{self.css}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{template_string}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
reference_docx = "core/static/reference.docx"
|
||||
|
||||
# Convert the HTML to a temporary docx file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
|
||||
output_path = tmp_file.name
|
||||
|
||||
pypandoc.convert_text(
|
||||
html_string,
|
||||
"docx",
|
||||
format="html",
|
||||
outputfile=output_path,
|
||||
extra_args=["--reference-doc", reference_docx],
|
||||
)
|
||||
|
||||
# Create a BytesIO object to store the output of the temporary docx file
|
||||
with open(output_path, "rb") as f:
|
||||
output = BytesIO(f.read())
|
||||
|
||||
# Remove the temporary docx file
|
||||
os.remove(output_path)
|
||||
|
||||
output.seek(0)
|
||||
|
||||
response = FileResponse(
|
||||
output,
|
||||
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
response["Content-Disposition"] = f"attachment; filename={self.title}.docx"
|
||||
return response
|
||||
|
||||
def generate_document(self, body, body_type, export_format):
|
||||
"""
|
||||
Generate and return a document for this template around the
|
||||
body passed as argument.
|
||||
|
||||
2 types of body are accepted:
|
||||
- HTML: body_type = "html"
|
||||
- Markdown: body_type = "markdown"
|
||||
|
||||
2 types of documents can be generated:
|
||||
- PDF: export_format = "pdf"
|
||||
- Docx: export_format = "docx"
|
||||
"""
|
||||
document = frontmatter.loads(body)
|
||||
metadata = document.metadata
|
||||
@@ -580,16 +664,10 @@ class Template(BaseModel):
|
||||
markdown.markdown(textwrap.dedent(strip_body)) if strip_body else ""
|
||||
)
|
||||
|
||||
document_html = HTML(
|
||||
string=DjangoTemplate(self.code).render(
|
||||
Context({"body": html.format_html(body_html), **metadata})
|
||||
)
|
||||
)
|
||||
css = CSS(
|
||||
string=self.css,
|
||||
font_config=FontConfiguration(),
|
||||
)
|
||||
return document_html.write_pdf(stylesheets=[css], zoom=1)
|
||||
if export_format == "pdf":
|
||||
return self.generate_pdf(body_html, metadata)
|
||||
|
||||
return self.generate_word(body_html, metadata)
|
||||
|
||||
|
||||
class TemplateAccess(BaseAccess):
|
||||
|
||||
BIN
src/backend/core/static/reference.docx
Normal file
BIN
src/backend/core/static/reference.docx
Normal file
Binary file not shown.
@@ -178,3 +178,26 @@ def test_api_templates_generate_document_type_unknown():
|
||||
'"unknown" is not a valid choice.',
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_api_templates_generate_document_export_docx():
|
||||
"""Generate pdf document with the body type html."""
|
||||
user = factories.UserFactory()
|
||||
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
|
||||
template = factories.TemplateFactory(is_public=True)
|
||||
data = {"body": "<p>Test body</p>", "body_type": "html", "format": "docx"}
|
||||
|
||||
response = client.post(
|
||||
f"/api/v1.0/templates/{template.id!s}/generate-document/",
|
||||
data,
|
||||
format="json",
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert (
|
||||
response.headers["content-type"]
|
||||
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<page size="A4">
|
||||
<div class="header">
|
||||
<img
|
||||
src="https://upload.wikimedia.org/wikipedia/fr/7/72/Logo_du_Gouvernement_de_la_R%C3%A9publique_fran%C3%A7aise_%282020%29.svg"
|
||||
<img width="200"
|
||||
src="https://impress-staging.beta.numerique.gouv.fr/assets/logo-gouv.png"
|
||||
/>
|
||||
</div>
|
||||
<div class="content">
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
body {
|
||||
background: white;
|
||||
font-family: arial
|
||||
}
|
||||
.header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
font-family: arial;
|
||||
}
|
||||
.header img {
|
||||
width: 5cm;
|
||||
margin-left: -0.4cm;
|
||||
}
|
||||
.body{
|
||||
margin-top: 1.5rem
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
img {
|
||||
max-width: 100%;
|
||||
}
|
||||
[custom-style="center"] {
|
||||
text-align: center;
|
||||
}
|
||||
[custom-style="right"] {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ dependencies = [
|
||||
"nested-multipart-parser==1.5.0",
|
||||
"psycopg[binary]==3.1.14",
|
||||
"PyJWT==2.8.0",
|
||||
"pypandoc==1.13",
|
||||
"python-frontmatter==1.0.1",
|
||||
"requests==2.32.2",
|
||||
"sentry-sdk==2.8.0",
|
||||
|
||||
Reference in New Issue
Block a user