from weasyprint import HTML, CSS from trytond.i18n import gettext from trytond.exceptions import UserError from trytond.transaction import Transaction import os import json import tempfile import subprocess class PdfGenerator: """ Generate a PDF out of a rendered template, with the possibility to integrate nicely a header and a footer if provided. Notes: ------ - When Weasyprint renders an html into a PDF, it goes though several intermediate steps. Here, in this class, we deal mostly with a box representation: 1 `Document` have 1 `Page` or more, each `Page` 1 `Box` or more. Each box can contain other box. Hence the recursive method `get_element` for example. For more, see: https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure - Warning: the logic of this class relies heavily on the internal Weasyprint API. This snippet was written at the time of the release 47, it might break in the future. - This generator draws its inspiration and, also a bit of its implementation, from this discussion in the library github issues: https://github.com/Kozea/WeasyPrint/issues/92 """ OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}' def __init__(self, main_html, header_html=None, footer_html=None, last_footer_html=None, base_url=None, side_margin=2, extra_vertical_margin=30): """ Parameters ---------- main_html: str An HTML file (most of the time a template rendered into a string) which represents the core of the PDF to generate. header_html: str An optional header html. footer_html: str An optional footer html. base_url: str An absolute url to the page which serves as a reference to Weasyprint to fetch assets, required to get our media. side_margin: int, interpreted in cm, by default 2cm The margin to apply on the core of the rendered PDF (i.e. main_html). extra_vertical_margin: int, interpreted in pixel, by default 30 pixels An extra margin to apply between the main content and header and the footer. The goal is to avoid having the content of `main_html` touching the header or the footer. """ self.main_html = main_html self.header_html = header_html self.footer_html = footer_html self.last_footer_html = last_footer_html self.base_url = base_url self.side_margin = side_margin self.extra_vertical_margin = extra_vertical_margin def _compute_overlay_element(self, element: str): """ Parameters ---------- element: str Either 'header' or 'footer' Returns ------- element_body: BlockBox A Weasyprint pre-rendered representation of an html element element_height: float The height of this element, which will be then translated in a html height """ html = HTML( string=getattr(self, '{}_html'.format(element)).replace('\n', ''), base_url=self.base_url, ) element_doc = html.render( stylesheets=[CSS(string=self.OVERLAY_LAYOUT)]) element_page = element_doc.pages[0] element_body = PdfGenerator.get_element( element_page._page_box.all_children(), 'body') element_body = element_body.copy_with_children( element_body.all_children()) element_html = PdfGenerator.get_element( element_page._page_box.all_children(), element.replace('_', '-')) if element == 'header': if element_html: element_height = element_html.height else: element_height = 0 if element == 'footer': if element_html: element_height = element_page.height - element_html.position_y else: element_height = element_page.height if element == 'last_footer': if element_html: element_height = (element_page.height - element_html.position_y - element_html.margin_bottom) else: element_height = element_page.height return element_body, element_height def _apply_overlay_on_main(self, main_doc, header_body=None, footer_body=None, last_footer_body=None): """ Insert the header and the footer in the main document. Parameters ---------- main_doc: Document The top level representation for a PDF page in Weasyprint. header_body: BlockBox A representation for an html element in Weasyprint. footer_body: BlockBox A representation for an html element in Weasyprint. last_footer_body: BlockBox A representation for an html element in Weasyprint. """ total_pages = len(main_doc.pages) number_page = 1 for page in main_doc.pages: page_body = PdfGenerator.get_element(page._page_box.all_children(), 'body') if header_body: page_body.children += header_body.all_children() if last_footer_body and number_page == total_pages: page_body.children += last_footer_body.all_children() if footer_body: page_body.children += footer_body.all_children() number_page += 1 def render_html(self): """ Returns ------- pdf: a bytes sequence The rendered PDF. """ if self.header_html: header_body, header_height = self._compute_overlay_element( 'header') else: header_body, header_height = None, 0 if self.footer_html: footer_body, footer_height = self._compute_overlay_element( 'footer') else: footer_body, footer_height = None, 0 if self.last_footer_html: last_footer_body, last_footer_height = ( self._compute_overlay_element('last_footer')) else: last_footer_body, last_footer_height = None, 0 footer_height += last_footer_height margins = '{header_size}px {side_margin} {footer_size}px\ {side_margin}'.format( header_size=header_height + self.extra_vertical_margin, footer_size=footer_height + self.extra_vertical_margin, side_margin='{}cm'.format(self.side_margin), ) content_print_layout = ('@page {size: A4 portrait; margin: %s;}' % margins) html = HTML( string=self.main_html, base_url=self.base_url, ) main_doc = html.render(stylesheets=[CSS(string=content_print_layout)]) if self.header_html or self.footer_html or self.last_footer_html: self._apply_overlay_on_main(main_doc, header_body, footer_body, last_footer_body) return main_doc def render_pdf(self): context = Transaction().context timeout_report = context.get('timeout_report', None) if timeout_report: path = os.path.dirname(os.path.abspath(__file__)) + '/' json_path = self.to_json_file() process = subprocess.Popen(['python3', path+'generator_script.py', json_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8', errors='ignore') out = None try: out, err = process.communicate(timeout=timeout_report) except subprocess.TimeoutExpired: process.kill() out, err = process.communicate() raise UserError(gettext('html_report.msg_error_timeout', seconds=timeout_report)) finally: os.remove(json_path) document = None if out and os.path.exists(out.strip()): with open(out.strip(), 'rb') as file: document = file.read() os.remove(out.strip()) else: document = self.render_html().write_pdf() return document @staticmethod def get_element(boxes, element): """ Given a set of boxes representing the elements of a PDF page in a DOM-like way, find the box which is named `element`. Look at the notes of the class for more details on Weasyprint insides. """ for box in boxes: if box.element_tag == element: return box box_children = PdfGenerator.get_element(box.all_children(), element) if box_children: return box_children def to_json_file(self): """ Write the PdfGenerator properties to a JSON file. Parameters: - filepath: The path to the JSON file. """ data = { "main_html": self.main_html, "header_html": self.header_html, "footer_html": self.footer_html, "last_footer_html": self.last_footer_html, "base_url": self.base_url, "side_margin": self.side_margin, "extra_vertical_margin": self.extra_vertical_margin } with tempfile.NamedTemporaryFile(mode='w', delete=False) as file: filepath = file.name json.dump(data, file) return filepath