From 4280f0779ef000a6983807a355be6907ec7ae05d Mon Sep 17 00:00:00 2001 From: Anthony LC Date: Wed, 7 Aug 2024 14:44:18 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F(backend)=20export=20to=20?= =?UTF-8?q?docx?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can now export our document to a docx file. This is done by converting the html to a docx file using the pypandoc and pandoc library. We added the "format" param to the generate-document endpoint, "format" accept "pdf" or "docx" as value. --- .github/workflows/impress.yml | 2 +- Dockerfile | 1 + src/backend/core/api/serializers.py | 6 + src/backend/core/api/viewsets.py | 22 ++-- src/backend/core/models.py | 106 +++++++++++++++--- src/backend/core/static/reference.docx | Bin 0 -> 7227 bytes .../test_api_templates_generate_document.py | 23 ++++ src/backend/demo/data/template/code.txt | 4 +- src/backend/demo/data/template/css.txt | 14 ++- src/backend/pyproject.toml | 1 + 10 files changed, 147 insertions(+), 32 deletions(-) create mode 100644 src/backend/core/static/reference.docx diff --git a/.github/workflows/impress.yml b/.github/workflows/impress.yml index acbaa36e..5fdf3a97 100644 --- a/.github/workflows/impress.yml +++ b/.github/workflows/impress.yml @@ -198,7 +198,7 @@ jobs: - name: Install gettext (required to compile messages) run: | sudo apt-get update - sudo apt-get install -y gettext + sudo apt-get install -y gettext pandoc - name: Generate a MO file from strings extracted from the project run: python manage.py compilemessages diff --git a/Dockerfile b/Dockerfile index fce18ac7..9f212837 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,6 +75,7 @@ RUN apt-get update && \ libgdk-pixbuf2.0-0 \ libpango-1.0-0 \ libpangocairo-1.0-0 \ + pandoc \ shared-mime-info && \ rm -rf /var/lib/apt/lists/* diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 540a55f8..e43f7e76 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -180,6 +180,12 @@ class DocumentGenerationSerializer(serializers.Serializer): required=False, default="html", ) + format = serializers.ChoiceField( + choices=["pdf", "docx"], + label=_("Format"), + required=False, + default="pdf", + ) class InvitationSerializer(serializers.ModelSerializer): diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 3af432c1..07691042 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1,13 +1,11 @@ """API endpoints""" -from io import BytesIO - from django.contrib.postgres.aggregates import ArrayAgg from django.db.models import ( OuterRef, Q, Subquery, ) -from django.http import FileResponse, Http404 +from django.http import Http404 from botocore.exceptions import ClientError from rest_framework import ( @@ -460,7 +458,16 @@ class TemplateViewSet( # pylint: disable=unused-argument def generate_document(self, request, pk=None): """ - Generate and return pdf for this template with the content passed. + Generate and return a document for this template around the + body passed as argument. + + 2 types of body are accepted: + - HTML: body_type = "html" + - Markdown: body_type = "markdown" + + 2 types of documents can be generated: + - PDF: format = "pdf" + - Docx: format = "docx" """ serializer = serializers.DocumentGenerationSerializer(data=request.data) @@ -471,13 +478,10 @@ class TemplateViewSet( body = serializer.validated_data["body"] body_type = serializer.validated_data["body_type"] + export_format = serializer.validated_data["format"] template = self.get_object() - pdf_content = template.generate_document(body, body_type) - - response = FileResponse(BytesIO(pdf_content), content_type="application/pdf") - response["Content-Disposition"] = f"attachment; filename={template.title}.pdf" - return response + return template.generate_document(body, body_type, export_format) class TemplateAccessViewSet( diff --git a/src/backend/core/models.py b/src/backend/core/models.py index a1cb9efa..66631359 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -2,10 +2,13 @@ Declare and configure the models for the impress core application """ import hashlib +import os import smtplib +import tempfile import textwrap import uuid from datetime import timedelta +from io import BytesIO from logging import getLogger from django.conf import settings @@ -16,6 +19,7 @@ from django.core import exceptions, mail, validators from django.core.files.base import ContentFile from django.core.files.storage import default_storage from django.db import models +from django.http import FileResponse from django.template.base import Template as DjangoTemplate from django.template.context import Context from django.template.loader import render_to_string @@ -26,10 +30,10 @@ from django.utils.translation import override import frontmatter import markdown +import pypandoc +import weasyprint from botocore.exceptions import ClientError from timezone_field import TimeZoneField -from weasyprint import CSS, HTML -from weasyprint.text.fonts import FontConfiguration logger = getLogger(__name__) @@ -564,10 +568,90 @@ class Template(BaseModel): "retrieve": can_get, } - def generate_document(self, body, body_type): + def generate_pdf(self, body_html, metadata): """ - Generate and return a PDF document for this template around the + Generate and return a pdf document wrapped around the current template + """ + document_html = weasyprint.HTML( + string=DjangoTemplate(self.code).render( + Context({"body": html.format_html(body_html), **metadata}) + ) + ) + css = weasyprint.CSS( + string=self.css, + font_config=weasyprint.text.fonts.FontConfiguration(), + ) + + pdf_content = document_html.write_pdf(stylesheets=[css], zoom=1) + response = FileResponse(BytesIO(pdf_content), content_type="application/pdf") + response["Content-Disposition"] = f"attachment; filename={self.title}.pdf" + + return response + + def generate_word(self, body_html, metadata): + """ + Generate and return a docx document wrapped around the current template + """ + template_string = DjangoTemplate(self.code).render( + Context({"body": html.format_html(body_html), **metadata}) + ) + + html_string = f""" + + + + + + + {template_string} + + + """ + + reference_docx = "core/static/reference.docx" + + # Convert the HTML to a temporary docx file + with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file: + output_path = tmp_file.name + + pypandoc.convert_text( + html_string, + "docx", + format="html", + outputfile=output_path, + extra_args=["--reference-doc", reference_docx], + ) + + # Create a BytesIO object to store the output of the temporary docx file + with open(output_path, "rb") as f: + output = BytesIO(f.read()) + + # Remove the temporary docx file + os.remove(output_path) + + output.seek(0) + + response = FileResponse( + output, + content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ) + response["Content-Disposition"] = f"attachment; filename={self.title}.docx" + return response + + def generate_document(self, body, body_type, export_format): + """ + Generate and return a document for this template around the body passed as argument. + + 2 types of body are accepted: + - HTML: body_type = "html" + - Markdown: body_type = "markdown" + + 2 types of documents can be generated: + - PDF: export_format = "pdf" + - Docx: export_format = "docx" """ document = frontmatter.loads(body) metadata = document.metadata @@ -580,16 +664,10 @@ class Template(BaseModel): markdown.markdown(textwrap.dedent(strip_body)) if strip_body else "" ) - document_html = HTML( - string=DjangoTemplate(self.code).render( - Context({"body": html.format_html(body_html), **metadata}) - ) - ) - css = CSS( - string=self.css, - font_config=FontConfiguration(), - ) - return document_html.write_pdf(stylesheets=[css], zoom=1) + if export_format == "pdf": + return self.generate_pdf(body_html, metadata) + + return self.generate_word(body_html, metadata) class TemplateAccess(BaseAccess): diff --git a/src/backend/core/static/reference.docx b/src/backend/core/static/reference.docx new file mode 100644 index 0000000000000000000000000000000000000000..2192455df1a394ac05c4739b93f04abd2a6d6bc8 GIT binary patch literal 7227 zcmaJ`1yo$ivPFWsyL)g5?(Q(SJA=CicM=HhHUWZ5g1ZL~4k5S&3mM#kg+JW)-%D=( zf463xv(BuU>eF4js=8`7KneC4E)*gnBGiKtsy@_jf&zIqbq6|nu(Lhgt7m!@VL373 zH~imPWcm9vX-iwxcfOj+kq-<>96N%~QI;<;SYHDmw*ho|$+>U#_GpS$NgulWN>j-Y z;40ffW5Ue8_vLSRT{m<@t2Eo1+I}dOKHIl=KdL&6ESvNrBy(KeWG|a0(tXzB%wUug zjj-|MEbL~R-4VwFIK+8I->EvRp^>@F`)$(CFL5AgLsnum27^JZrWF3sVx!Vf=}_Rg z(cn#LnG*K~aO(D$^2pz2KWV&IGf|065E)L%8&qVOi?eE^X@#!8@OUO&I!5~KD!a&T zYx%r)q);SbGFv7#Qvv%OVVp~2=bM9h9w+mY<3nCvz{`_EkiY8W zgFV-JMr1RrlQf+sE*#aFcwG!iR_Q*l@uYN^J6%yVR#7qr-E2y@T``loYUZSj8Ey%g z1H_#i#gSGR*BE2wxB#zc4^x^LZ=~B+G%t~7DYHm04n1V<8>dv0s;T5=xnJ3fMaC*X z!tz;%pc>kS5SdZJAp9L=A44^qads?*!LG7CvOmC8eJLf}&!wNT>bqc>T=m=~#b)!) zM-QP2cB}g5@Qh0FahN5c1{)x5O5;}0OS09~5BQ9+AAP~mnqtVhZu0b4uWgQzeqMHQ zg0~cH(EMT8cFi*JN83oew-X{h!lm8JvkDtJ1fR|+MrnDBh7q@W%9)~UG$ZV}QGM~z zBKLSCWui|ZB3jRlnamrP( zOkGHKQoQqD(UHcI3s}k`o2=^e^a8hUgbX6nWtd|Rn1>`+8Vo;h1+c%RKj)}Hw?fNH zZb4PE{fO6I;PCDRF~b!6n#2-tL9mn{LT}9igZIqon@}8=2tUR+sH_BwlmhSZKwU6- z@bt~bbU{@uXup@ELy!&qsJo#g&&94gICW2>&5zXX{ZU-Amj%53GB4Pc;7qmV#}AscNo9 zpou5zz7z_4%lR3%faJRgot7&V*deiOtYZ&UFxviAArTcuR9-E`6A-)21N%x4AQb)) z2%O)6u=MiqbaDC%i-}rfNVr69u9hM$E^8qq%{0XF3u#>^t~Eh>xD2br2Kv*49mPje zh^o$tJV;exE@IiYvNJP5v7(_fu(~`H^DFd@Y?zTpcu5>m~J@*=IkNtGA(W8{yK?PL)fcmavNSDZe?C$6<}YXa!tY zscPlveZ?Np^*01Hqq5)74HY(De<1m;zc6hgu40vu>#uFe5NO46z`+#vofdBYxpT4L zDDn+${NBnX59QsI)v{#BGCoqxz5OYLlV*BcU!!^{I!|Ej2IxKOZ9b!MB<)7%yL{OD z_6IBZyBXWjoq_?MFoH|we$OfNM85;z%4F6;u zZ>tKYImp^G{t@-36#18Rez!}p)^dCf2WH^0E@d}^cxp9ugOzEaGjn65{9;fDC#>H0 z97i19hg)~G>my_yTO}NQGMJk zI$g7JBTb%P3X!au(a}gSG#s)VrJ9T3Q#j*nqAZi^e7`xU;<(rSSxkv@z^hs7uz@x1 zGit6BO%dUD#nI2oDfE0VR?f$WcGE|Ti&wQrOe0BUUClka)hD>-8EIPhYA8*!J2r)Q z=6d>cx)Tnk59XB}lUHY|i>``!IxXSV8aBf2K280ge!kkJ_StaqXTtHp&mx~k*_tdQ z+D~0`iiQS=xwKl?mxnUL@>l$TNH+Ed`>40?$kJ1~5nr`|m1R^#VGj|w>D&@Jp)whK zFL4f>P*Bd&m|Z_k5RG!6d_)#Muf6b℘@?oP|hbzENoUK*;U@Fnf%Md=*NTNkQ{4 zFs6uiAUGqq&w2R`ftug%n{_>zJ>QQ<=Vq0X5SO_vUFbA9Goe8}6F%MLwo3_@w+T99 zBp=4Hb?iDL@Rs~pVF?vQ8qZ#)8YmR8hq@^Yyt>Lbx4i`0$wZ>4_&R>;VQq?4n~XrY zGkU`z8l8}yF}*LPW;m_$!z7~T9l!hJEht3--=R!ovsTS@f3B3Mur~CjJQGTrUldf-yH%*s+Mc>{E`b#cn z0{gL>7UlEd=4kJ}6HE;ZCu3VNm48>DF!=5&zAg}efKGXPcjv!eA0$4C*^ASD-sEJ6 zDbz3H%U#>O%Ch#xG1e`_cmoTS;R@F3>dvS8A;@RqM>s+j-GQZd5MOO`F2Y_lzDLs( z0+92wSPq@;P1TKI^bgiL z6B>ky66s4-!-uoZBItEa=q*@35_lJBf9MU{Na218T*BqEyz@#ldfX~u^XeYUZk(pJ z{?-r&OuyU_x!cIWgwvE;8wn1Z`BeckbV(e4^m{y+9{l7(-1@3#FoUJ2p>L+lah6qy zTd?-wU?`9{GU|egdU;M!TjgDK`NeFg8@c&SFVpi(O-|zA3|$LFZ0c zJd}9V&vIX*?dR-h={OF6><;gk7lJj|(ZfTKEtI8H*(kzT`5`_T?m8r)aE zLZfq<%HHD^y9TP{8t${TGG;C~31J7d-%4T?6dwE3(Opc_xf{RY5$TX&yH^sw9n5b} zd29uw#=xN?)&5u|LwAf+KsaSXA}AQ5fC;67!gXXsY^tkx-iFN@oQ&q@c0NOjl@t?9 z_=CeuuBxbQcO6ku$zz+$ePq=Lkc6!^0&J%BT6SYc=#i+<)CKg*Y~j@GCL-f3)h>yt zYD|VZy!Yw-pzIgz8$llhEzv9=fWoxqGm8=#eGse}ZzM^HuqVvE1yhOBYXB`VKiTq% zhj8s&T9oGKW1MfiqGW1>w1y}f-4bg=FGF=Pt@68MhX^WWD#PHSSypdeq*jIlqsJ4j zPO~0<3?w!nVTPjjDnOCvM2+fmQOg_qImKQwAiqyWqKLBm@q~e>vX3YKH%)?9MiVo)gzNJH%}-rnkDFC|x68MPm9xl{wZ2muiW6^0t? z8hA01MN$#{$d}1dqNNXf&3*=65Qj??fsk`8my73&MCXLYZKnk zKbFHeWp6oKQ1cjqai)9|wL<(Xw8Y3tk-lj3RqZHh$Mm$E|H4s|LtI`+IElKmb=3R7 z8Zsi3L73on5Zu%ay0FxJxik0v-C_BDsNlX0OdkXHN%=vonW^tHS8njrQ-%i|!UAU3 z`_(sa@6_Ark9XQpr-!mtZh{y)Oc8FlLeODAt@q_59128>P5n9)JQw+yd7BXEE zwR$+3MH(4kUAw{bMMn-oB2WzevilA~1aZ8w&?@XSg=omF=nD!l&7JVPToJe!@0(|K z)|bf$TM^htzuVr&REA^M5wwV37o8)+>E+9uAZ6d2tVL*n-6!ne;|06;^2{LJ|Br;mZFZt7CMM zS4JbyCb#aU00`{_Wo|;XXyqgOZN#!Q86pV3AYGF;ezVntly^wCW4!t=i5;qRb)GfNTLY7dixj+MSXCku|2_>GMu$@ z=pICw-9wi9!vChVOLvnnC8`^0V9PxX2c=4Lv4tIlJKxU@e_H3((G@9tLLrPRS2X_L6x>WW&H$N{M)$XgpwC$#;L zY2;M@6Fviz&ZbKK$~D6DNe&=2NQv4AFd3EZOygK zXanCky){8P7A}@gG=~-X>O(fS-v9v~nDW-!v$uLbVUGCBg(}Rq@VqwzdmC7a)v|Q0 z!~;|SLGMqQE@T{Q<-XP@IU7)pdNUl;S6b?uU!#RVFF$`{gElZ|WD?}nve*?n6R?C# zGk6J4yyGWBaY1H3gSKVz^Fk=OYbrwH@E|AGF?H*xq*!=7YR+~>;Eers;r8GO{A+CX z<{mt>>WP`zdy_{;!R#Zo$WDo%=U^hXi`cAsRNan5VLJ(|;1|}Nn1OYR3Ou*AKOG2u zOmgh8T&=&@HncdK22}kB5cN<0KA9GFb_wG-KUcyoZVClT(4-3herB#jC-2pHqgfxo zs4bG5TbOYp-1>!0b&KZ%0qSFk&;uQ_Lc6xP~pb@ z%L|ncH*S8tZTz#9c~(Ef7t%r*!=xTCL=@$I9+TSHD@E~VO1UU62!m6 z2EhQgCkz&k+MYL%FI!Hg@(^dg-2;|MCn1%nUNW`#Y>=|Bdk3P?A!t#PC`qS#@-|i1 zefIsv|Za$kA4OFLf`I)uG-d?u%204%lXT>eIs%Q_fEF`Ne8OiUEqihQp?D>s@x z6J&os?XyTBhV8&*w)J^a^klWL_F%rXx9=QK^?YwkBCe=0u|!1+espJrO~O6{&ZAt? zO%~Acg+a-4<+C%+SsFnvf!OZUxoFt_>t|bVK5pa$R6fTNV?s&ua>aR9W%&UzE&0AL z19v*k;0^RSs&Tg`%179*j)I>Gq@nAC%xy#{C`PS+Dv+@MT_9PzID2ZDTR8qjQgkNw zLONj7sAFN7R3W8B=mPuIh$#L7vISo#KCdtGuQdnEfi7zE)!1R=@RFT$SI3V^j_f)V z6EU#xyddP4e4mQMXS%o=*teIDZmuMdWo-j@QqFUV^}l<+}7~wUe}JN zS*+&6f^P7ccI+ug`ynmn=Nmv{uY;B9(L}||^i^hrsOJ`LM zI3GetX6uYhOc>g7dy3_b-$>z;fcqe3-^iS~_I&x;Vm4?OTq>=@6 zT^p**oba@8m6v245mzZRyf(9|?}P(IU3vA13-fNAcNvS#o3gI#^n6l#vN!_J)tn2` z=;l59CyW2521C^tClzO&IX63S0jmx4= zQDN$|<@`!Wp|J&81WE6wvd7^>PLc&tUpp}RC3+6llQS-{B}+hdifWU+oBFGbu%(h= z7wWfYGSSm!i5i_difa6~#7;bQ`Cc;F0$$|YBK_an{LPaf#)$Y8tEP3TPmJ}JyS^n^IjY~%3 z>QcprX3b=RQcemEp9w+KSWTW*OuZ{`zSgX^P>#VdR)Wj=Pe@yhfLKR5t|CiOzEzHW1bD7Ry z8q#_Sj@jL0S}xFk9$-HTP6_}qcpmgQ?MPo+gb~UcXLL-HCl`g0cI#34IpSP24QYuX z&)5t6NLYbh$ zr$0m>*u@mjDXTwqst;9w3$h><8cm@TaYq+-7ThTb0?S$6hXix>ewcX?r%Q8RI3$M? zu*FV;9e<*pkWhM4T2oKD`IWpo*FLb`K-RJYE-ma+X+6V2o-)FS!|68`VQ61>;S=&m z1iqmgrSm0`is1x-CykK+h^pujYWroU7C)vhtP%sbum(I`wR#7HTKg zI%}(q9c8VznI{T0F`t^Lbulx?2wE|sqQmrFR&0i$oQ{Lx?y8(D+VF*!L#NqTs#3a{ z%n$SjorkKUR3oV$j3{hT;<62z)h^)KJ)g0~)enTvkRvc(B)U$+u+tf@7|9a147yV} zV0I70AUyB!k|{de=;iXHN-UACsS+~z2zNJHCea7#s(n{yxfIVfD!-+_jozk%e3EPh zo)fOLIQ0)RW-NLXr8;fEjUfFnh0?Ih|E7RZ0X7fap0Gq4R-%zbSXvtT*xr6+mRB(c zp-}1Cu}L!C1$FT}(ru1jjAD}-X_uN%R9YpLQM=GLJm` zv_VDiB5ev$&ZhX(fGPG1(RVh&;kmvBroI>2p$u=YTJ%vh+NUShQ|9_1X4o*b1sIB{xCEC#=cztFypS@7DQ4AP z{NO*Rphbn7osQ3&w3X#QDNN1q)=4X`s`sUxrv&|CCT)HzmWD3eOB3OuFIK!{JF(Sp zZ_VCc!RbetxiDS^AjRB_=}i$b#c8r^R92P&@AI_9*@wO|kH(?rOA!|$WbOI@>a@v{ z@6gDbjZ#y#o5o|y!Z;ipM)Z3);?$QaJpgwieU13*6-w=+Tw1&WK}^-%KeEicF3~8p z!7QL_<~5cJrEDmBkn4&nvi$B?q+AI{Xji|gt@m!wt$g1}EBE|Y15&Fygzjj9&*d^S zt`dFb(hZ{Iwxb&E;t6Sz3h-`IF#WZTU1z;zDW}!rp(hw$rJEP?pi3Ty(n&6K_Z5ug z_#CssuYK9g5ZYf^Xkg55er$pd{`uTsT_WK1Fr0sN@M$B4c1=Aw2s$DpyE{{IPgqZ~ z=R5Jz-u5;ZaSpn`sNH#RW)RF1SSHyI^Nx_5rV9TA7Bmbl)URWf-_IAGj#>ULza6^# zsrWnT{j|~i5-P~6{?B2=pX$F;wNK3WFByQi)qm9gmnZ*I`*%Y0$;p0+Dr86ekM=Ke z_s;=-=M0~C(_calVR`Test body

", "body_type": "html", "format": "docx"} + + response = client.post( + f"/api/v1.0/templates/{template.id!s}/generate-document/", + data, + format="json", + ) + + assert response.status_code == 200 + assert ( + response.headers["content-type"] + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ) diff --git a/src/backend/demo/data/template/code.txt b/src/backend/demo/data/template/code.txt index 5fa0d27d..0ab83f60 100644 --- a/src/backend/demo/data/template/code.txt +++ b/src/backend/demo/data/template/code.txt @@ -1,7 +1,7 @@
-
diff --git a/src/backend/demo/data/template/css.txt b/src/backend/demo/data/template/css.txt index 8856136a..79a440ab 100644 --- a/src/backend/demo/data/template/css.txt +++ b/src/backend/demo/data/template/css.txt @@ -1,18 +1,20 @@ body { background: white; - font-family: arial -} -.header { - display: flex; - justify-content: space-between; + font-family: arial; } .header img { width: 5cm; margin-left: -0.4cm; } .body{ - margin-top: 1.5rem + margin-top: 1.5rem; } img { max-width: 100%; } +[custom-style="center"] { + text-align: center; +} +[custom-style="right"] { + text-align: right; +} diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml index 867d91bd..cff0a597 100644 --- a/src/backend/pyproject.toml +++ b/src/backend/pyproject.toml @@ -49,6 +49,7 @@ dependencies = [ "nested-multipart-parser==1.5.0", "psycopg[binary]==3.1.14", "PyJWT==2.8.0", + "pypandoc==1.13", "python-frontmatter==1.0.1", "requests==2.32.2", "sentry-sdk==2.8.0",