🗃️(backend) export to docx
We can now export our document to a docx file. This is done by converting the html to a docx file using the pypandoc and pandoc library. We added the "format" param to the generate-document endpoint, "format" accept "pdf" or "docx" as value.
This commit is contained in:
2
.github/workflows/impress.yml
vendored
2
.github/workflows/impress.yml
vendored
@@ -198,7 +198,7 @@ jobs:
|
|||||||
- name: Install gettext (required to compile messages)
|
- name: Install gettext (required to compile messages)
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y gettext
|
sudo apt-get install -y gettext pandoc
|
||||||
|
|
||||||
- name: Generate a MO file from strings extracted from the project
|
- name: Generate a MO file from strings extracted from the project
|
||||||
run: python manage.py compilemessages
|
run: python manage.py compilemessages
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ RUN apt-get update && \
|
|||||||
libgdk-pixbuf2.0-0 \
|
libgdk-pixbuf2.0-0 \
|
||||||
libpango-1.0-0 \
|
libpango-1.0-0 \
|
||||||
libpangocairo-1.0-0 \
|
libpangocairo-1.0-0 \
|
||||||
|
pandoc \
|
||||||
shared-mime-info && \
|
shared-mime-info && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|||||||
@@ -180,6 +180,12 @@ class DocumentGenerationSerializer(serializers.Serializer):
|
|||||||
required=False,
|
required=False,
|
||||||
default="html",
|
default="html",
|
||||||
)
|
)
|
||||||
|
format = serializers.ChoiceField(
|
||||||
|
choices=["pdf", "docx"],
|
||||||
|
label=_("Format"),
|
||||||
|
required=False,
|
||||||
|
default="pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class InvitationSerializer(serializers.ModelSerializer):
|
class InvitationSerializer(serializers.ModelSerializer):
|
||||||
|
|||||||
@@ -1,13 +1,11 @@
|
|||||||
"""API endpoints"""
|
"""API endpoints"""
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
from django.contrib.postgres.aggregates import ArrayAgg
|
from django.contrib.postgres.aggregates import ArrayAgg
|
||||||
from django.db.models import (
|
from django.db.models import (
|
||||||
OuterRef,
|
OuterRef,
|
||||||
Q,
|
Q,
|
||||||
Subquery,
|
Subquery,
|
||||||
)
|
)
|
||||||
from django.http import FileResponse, Http404
|
from django.http import Http404
|
||||||
|
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
from rest_framework import (
|
from rest_framework import (
|
||||||
@@ -460,7 +458,16 @@ class TemplateViewSet(
|
|||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
def generate_document(self, request, pk=None):
|
def generate_document(self, request, pk=None):
|
||||||
"""
|
"""
|
||||||
Generate and return pdf for this template with the content passed.
|
Generate and return a document for this template around the
|
||||||
|
body passed as argument.
|
||||||
|
|
||||||
|
2 types of body are accepted:
|
||||||
|
- HTML: body_type = "html"
|
||||||
|
- Markdown: body_type = "markdown"
|
||||||
|
|
||||||
|
2 types of documents can be generated:
|
||||||
|
- PDF: format = "pdf"
|
||||||
|
- Docx: format = "docx"
|
||||||
"""
|
"""
|
||||||
serializer = serializers.DocumentGenerationSerializer(data=request.data)
|
serializer = serializers.DocumentGenerationSerializer(data=request.data)
|
||||||
|
|
||||||
@@ -471,13 +478,10 @@ class TemplateViewSet(
|
|||||||
|
|
||||||
body = serializer.validated_data["body"]
|
body = serializer.validated_data["body"]
|
||||||
body_type = serializer.validated_data["body_type"]
|
body_type = serializer.validated_data["body_type"]
|
||||||
|
export_format = serializer.validated_data["format"]
|
||||||
|
|
||||||
template = self.get_object()
|
template = self.get_object()
|
||||||
pdf_content = template.generate_document(body, body_type)
|
return template.generate_document(body, body_type, export_format)
|
||||||
|
|
||||||
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
|
|
||||||
response["Content-Disposition"] = f"attachment; filename={template.title}.pdf"
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
class TemplateAccessViewSet(
|
class TemplateAccessViewSet(
|
||||||
|
|||||||
@@ -2,10 +2,13 @@
|
|||||||
Declare and configure the models for the impress core application
|
Declare and configure the models for the impress core application
|
||||||
"""
|
"""
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import os
|
||||||
import smtplib
|
import smtplib
|
||||||
|
import tempfile
|
||||||
import textwrap
|
import textwrap
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
from io import BytesIO
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -16,6 +19,7 @@ from django.core import exceptions, mail, validators
|
|||||||
from django.core.files.base import ContentFile
|
from django.core.files.base import ContentFile
|
||||||
from django.core.files.storage import default_storage
|
from django.core.files.storage import default_storage
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
from django.http import FileResponse
|
||||||
from django.template.base import Template as DjangoTemplate
|
from django.template.base import Template as DjangoTemplate
|
||||||
from django.template.context import Context
|
from django.template.context import Context
|
||||||
from django.template.loader import render_to_string
|
from django.template.loader import render_to_string
|
||||||
@@ -26,10 +30,10 @@ from django.utils.translation import override
|
|||||||
|
|
||||||
import frontmatter
|
import frontmatter
|
||||||
import markdown
|
import markdown
|
||||||
|
import pypandoc
|
||||||
|
import weasyprint
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
from timezone_field import TimeZoneField
|
from timezone_field import TimeZoneField
|
||||||
from weasyprint import CSS, HTML
|
|
||||||
from weasyprint.text.fonts import FontConfiguration
|
|
||||||
|
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
|
|
||||||
@@ -564,10 +568,90 @@ class Template(BaseModel):
|
|||||||
"retrieve": can_get,
|
"retrieve": can_get,
|
||||||
}
|
}
|
||||||
|
|
||||||
def generate_document(self, body, body_type):
|
def generate_pdf(self, body_html, metadata):
|
||||||
"""
|
"""
|
||||||
Generate and return a PDF document for this template around the
|
Generate and return a pdf document wrapped around the current template
|
||||||
|
"""
|
||||||
|
document_html = weasyprint.HTML(
|
||||||
|
string=DjangoTemplate(self.code).render(
|
||||||
|
Context({"body": html.format_html(body_html), **metadata})
|
||||||
|
)
|
||||||
|
)
|
||||||
|
css = weasyprint.CSS(
|
||||||
|
string=self.css,
|
||||||
|
font_config=weasyprint.text.fonts.FontConfiguration(),
|
||||||
|
)
|
||||||
|
|
||||||
|
pdf_content = document_html.write_pdf(stylesheets=[css], zoom=1)
|
||||||
|
response = FileResponse(BytesIO(pdf_content), content_type="application/pdf")
|
||||||
|
response["Content-Disposition"] = f"attachment; filename={self.title}.pdf"
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def generate_word(self, body_html, metadata):
|
||||||
|
"""
|
||||||
|
Generate and return a docx document wrapped around the current template
|
||||||
|
"""
|
||||||
|
template_string = DjangoTemplate(self.code).render(
|
||||||
|
Context({"body": html.format_html(body_html), **metadata})
|
||||||
|
)
|
||||||
|
|
||||||
|
html_string = f"""
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<style>
|
||||||
|
{self.css}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
{template_string}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
reference_docx = "core/static/reference.docx"
|
||||||
|
|
||||||
|
# Convert the HTML to a temporary docx file
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
|
||||||
|
output_path = tmp_file.name
|
||||||
|
|
||||||
|
pypandoc.convert_text(
|
||||||
|
html_string,
|
||||||
|
"docx",
|
||||||
|
format="html",
|
||||||
|
outputfile=output_path,
|
||||||
|
extra_args=["--reference-doc", reference_docx],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a BytesIO object to store the output of the temporary docx file
|
||||||
|
with open(output_path, "rb") as f:
|
||||||
|
output = BytesIO(f.read())
|
||||||
|
|
||||||
|
# Remove the temporary docx file
|
||||||
|
os.remove(output_path)
|
||||||
|
|
||||||
|
output.seek(0)
|
||||||
|
|
||||||
|
response = FileResponse(
|
||||||
|
output,
|
||||||
|
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
)
|
||||||
|
response["Content-Disposition"] = f"attachment; filename={self.title}.docx"
|
||||||
|
return response
|
||||||
|
|
||||||
|
def generate_document(self, body, body_type, export_format):
|
||||||
|
"""
|
||||||
|
Generate and return a document for this template around the
|
||||||
body passed as argument.
|
body passed as argument.
|
||||||
|
|
||||||
|
2 types of body are accepted:
|
||||||
|
- HTML: body_type = "html"
|
||||||
|
- Markdown: body_type = "markdown"
|
||||||
|
|
||||||
|
2 types of documents can be generated:
|
||||||
|
- PDF: export_format = "pdf"
|
||||||
|
- Docx: export_format = "docx"
|
||||||
"""
|
"""
|
||||||
document = frontmatter.loads(body)
|
document = frontmatter.loads(body)
|
||||||
metadata = document.metadata
|
metadata = document.metadata
|
||||||
@@ -580,16 +664,10 @@ class Template(BaseModel):
|
|||||||
markdown.markdown(textwrap.dedent(strip_body)) if strip_body else ""
|
markdown.markdown(textwrap.dedent(strip_body)) if strip_body else ""
|
||||||
)
|
)
|
||||||
|
|
||||||
document_html = HTML(
|
if export_format == "pdf":
|
||||||
string=DjangoTemplate(self.code).render(
|
return self.generate_pdf(body_html, metadata)
|
||||||
Context({"body": html.format_html(body_html), **metadata})
|
|
||||||
)
|
return self.generate_word(body_html, metadata)
|
||||||
)
|
|
||||||
css = CSS(
|
|
||||||
string=self.css,
|
|
||||||
font_config=FontConfiguration(),
|
|
||||||
)
|
|
||||||
return document_html.write_pdf(stylesheets=[css], zoom=1)
|
|
||||||
|
|
||||||
|
|
||||||
class TemplateAccess(BaseAccess):
|
class TemplateAccess(BaseAccess):
|
||||||
|
|||||||
BIN
src/backend/core/static/reference.docx
Normal file
BIN
src/backend/core/static/reference.docx
Normal file
Binary file not shown.
@@ -178,3 +178,26 @@ def test_api_templates_generate_document_type_unknown():
|
|||||||
'"unknown" is not a valid choice.',
|
'"unknown" is not a valid choice.',
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_templates_generate_document_export_docx():
|
||||||
|
"""Generate pdf document with the body type html."""
|
||||||
|
user = factories.UserFactory()
|
||||||
|
|
||||||
|
client = APIClient()
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
template = factories.TemplateFactory(is_public=True)
|
||||||
|
data = {"body": "<p>Test body</p>", "body_type": "html", "format": "docx"}
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
f"/api/v1.0/templates/{template.id!s}/generate-document/",
|
||||||
|
data,
|
||||||
|
format="json",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert (
|
||||||
|
response.headers["content-type"]
|
||||||
|
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<page size="A4">
|
<page size="A4">
|
||||||
<div class="header">
|
<div class="header">
|
||||||
<img
|
<img width="200"
|
||||||
src="https://upload.wikimedia.org/wikipedia/fr/7/72/Logo_du_Gouvernement_de_la_R%C3%A9publique_fran%C3%A7aise_%282020%29.svg"
|
src="https://impress-staging.beta.numerique.gouv.fr/assets/logo-gouv.png"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
<div class="content">
|
<div class="content">
|
||||||
|
|||||||
@@ -1,18 +1,20 @@
|
|||||||
body {
|
body {
|
||||||
background: white;
|
background: white;
|
||||||
font-family: arial
|
font-family: arial;
|
||||||
}
|
|
||||||
.header {
|
|
||||||
display: flex;
|
|
||||||
justify-content: space-between;
|
|
||||||
}
|
}
|
||||||
.header img {
|
.header img {
|
||||||
width: 5cm;
|
width: 5cm;
|
||||||
margin-left: -0.4cm;
|
margin-left: -0.4cm;
|
||||||
}
|
}
|
||||||
.body{
|
.body{
|
||||||
margin-top: 1.5rem
|
margin-top: 1.5rem;
|
||||||
}
|
}
|
||||||
img {
|
img {
|
||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
}
|
}
|
||||||
|
[custom-style="center"] {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
[custom-style="right"] {
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ dependencies = [
|
|||||||
"nested-multipart-parser==1.5.0",
|
"nested-multipart-parser==1.5.0",
|
||||||
"psycopg[binary]==3.1.14",
|
"psycopg[binary]==3.1.14",
|
||||||
"PyJWT==2.8.0",
|
"PyJWT==2.8.0",
|
||||||
|
"pypandoc==1.13",
|
||||||
"python-frontmatter==1.0.1",
|
"python-frontmatter==1.0.1",
|
||||||
"requests==2.32.2",
|
"requests==2.32.2",
|
||||||
"sentry-sdk==2.8.0",
|
"sentry-sdk==2.8.0",
|
||||||
|
|||||||
Reference in New Issue
Block a user