trytond-html_report/generator.py

255 lines
9.5 KiB
Python

from weasyprint import HTML, CSS
from trytond.i18n import gettext
from trytond.exceptions import UserError
from trytond.transaction import Transaction
import os
import json
import tempfile
import subprocess
class PdfGenerator:
"""
Generate a PDF out of a rendered template, with the possibility to
integrate nicely a header and a footer if provided.
Notes:
------
- When Weasyprint renders an html into a PDF, it goes though several
intermediate steps. Here, in this class, we deal mostly with a box
representation: 1 `Document` have 1 `Page` or more, each `Page` 1 `Box`
or more. Each box can contain other box. Hence the recursive method
`get_element` for example.
For more, see:
https://weasyprint.readthedocs.io/en/stable/hacking.html#dive-into-the-source
https://weasyprint.readthedocs.io/en/stable/hacking.html#formatting-structure
- Warning: the logic of this class relies heavily on the internal
Weasyprint API. This snippet was written at the time of the release 47,
it might break in the future.
- This generator draws its inspiration and, also a bit of its
implementation, from this discussion in the library github issues:
https://github.com/Kozea/WeasyPrint/issues/92
"""
OVERLAY_LAYOUT = '@page {size: A4 portrait; margin: 0;}'
def __init__(self, main_html, header_html=None, footer_html=None,
last_footer_html=None, base_url=None, side_margin=2,
extra_vertical_margin=30):
"""
Parameters
----------
main_html: str
An HTML file (most of the time a template rendered into a string)
which represents the core of the PDF to generate.
header_html: str
An optional header html.
footer_html: str
An optional footer html.
base_url: str
An absolute url to the page which serves as a reference to
Weasyprint to fetch assets, required to get our media.
side_margin: int, interpreted in cm, by default 2cm
The margin to apply on the core of the rendered PDF
(i.e. main_html).
extra_vertical_margin: int, interpreted in pixel, by default 30 pixels
An extra margin to apply between the main content and header and
the footer.
The goal is to avoid having the content of `main_html` touching the
header or the footer.
"""
self.main_html = main_html
self.header_html = header_html
self.footer_html = footer_html
self.last_footer_html = last_footer_html
self.base_url = base_url
self.side_margin = side_margin
self.extra_vertical_margin = extra_vertical_margin
def _compute_overlay_element(self, element: str):
"""
Parameters
----------
element: str
Either 'header' or 'footer'
Returns
-------
element_body: BlockBox
A Weasyprint pre-rendered representation of an html element
element_height: float
The height of this element, which will be then translated in a html
height
"""
html = HTML(
string=getattr(self, '{}_html'.format(element)).replace('\n', ''),
base_url=self.base_url,
)
element_doc = html.render(
stylesheets=[CSS(string=self.OVERLAY_LAYOUT)])
element_page = element_doc.pages[0]
element_body = PdfGenerator.get_element(
element_page._page_box.all_children(), 'body')
element_body = element_body.copy_with_children(
element_body.all_children())
element_html = PdfGenerator.get_element(
element_page._page_box.all_children(), element.replace('_', '-'))
if element == 'header':
if element_html:
element_height = element_html.height
else:
element_height = 0
if element == 'footer':
if element_html:
element_height = element_page.height - element_html.position_y
else:
element_height = element_page.height
if element == 'last_footer':
if element_html:
element_height = (element_page.height - element_html.position_y
- element_html.margin_bottom)
else:
element_height = element_page.height
return element_body, element_height
def _apply_overlay_on_main(self, main_doc, header_body=None,
footer_body=None, last_footer_body=None):
"""
Insert the header and the footer in the main document.
Parameters
----------
main_doc: Document
The top level representation for a PDF page in Weasyprint.
header_body: BlockBox
A representation for an html element in Weasyprint.
footer_body: BlockBox
A representation for an html element in Weasyprint.
last_footer_body: BlockBox
A representation for an html element in Weasyprint.
"""
total_pages = len(main_doc.pages)
number_page = 1
for page in main_doc.pages:
page_body = PdfGenerator.get_element(page._page_box.all_children(),
'body')
if header_body:
page_body.children += header_body.all_children()
if last_footer_body and number_page == total_pages:
page_body.children += last_footer_body.all_children()
if footer_body:
page_body.children += footer_body.all_children()
number_page += 1
def render_html(self):
"""
Returns
-------
pdf: a bytes sequence
The rendered PDF.
"""
if self.header_html:
header_body, header_height = self._compute_overlay_element(
'header')
else:
header_body, header_height = None, 0
if self.footer_html:
footer_body, footer_height = self._compute_overlay_element(
'footer')
else:
footer_body, footer_height = None, 0
if self.last_footer_html:
last_footer_body, last_footer_height = (
self._compute_overlay_element('last_footer'))
else:
last_footer_body, last_footer_height = None, 0
footer_height += last_footer_height
margins = '{header_size}px {side_margin} {footer_size}px\
{side_margin}'.format(
header_size=header_height + self.extra_vertical_margin,
footer_size=footer_height + self.extra_vertical_margin,
side_margin='{}cm'.format(self.side_margin),
)
content_print_layout = ('@page {size: A4 portrait; margin: %s;}'
% margins)
html = HTML(
string=self.main_html,
base_url=self.base_url,
)
main_doc = html.render(stylesheets=[CSS(string=content_print_layout)])
if self.header_html or self.footer_html or self.last_footer_html:
self._apply_overlay_on_main(main_doc, header_body, footer_body,
last_footer_body)
return main_doc
def render_pdf(self):
context = Transaction().context
timeout_report = context.get('timeout_report', None)
if timeout_report:
path = os.path.dirname(os.path.abspath(__file__)) + '/'
json_path = self.to_json_file()
process = subprocess.Popen(['python3', path+'generator_script.py', json_path],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
encoding='utf-8', errors='ignore')
out = None
try:
out, err = process.communicate(timeout=timeout_report)
except subprocess.TimeoutExpired:
process.kill()
out, err = process.communicate()
raise UserError(gettext('html_report.msg_error_timeout', seconds=timeout_report))
finally:
os.remove(json_path)
document = None
if out and os.path.exists(out.strip()):
with open(out.strip(), 'rb') as file:
document = file.read()
os.remove(out.strip())
else:
document = self.render_html().write_pdf()
return document
@staticmethod
def get_element(boxes, element):
"""
Given a set of boxes representing the elements of a PDF page in a
DOM-like way, find the box which is named `element`.
Look at the notes of the class for more details on Weasyprint insides.
"""
for box in boxes:
if box.element_tag == element:
return box
box_children = PdfGenerator.get_element(box.all_children(), element)
if box_children:
return box_children
def to_json_file(self):
"""
Write the PdfGenerator properties to a JSON file.
Parameters:
- filepath: The path to the JSON file.
"""
data = {
"main_html": self.main_html,
"header_html": self.header_html,
"footer_html": self.footer_html,
"last_footer_html": self.last_footer_html,
"base_url": self.base_url,
"side_margin": self.side_margin,
"extra_vertical_margin": self.extra_vertical_margin
}
with tempfile.NamedTemporaryFile(mode='w', delete=False) as file:
filepath = file.name
json.dump(data, file)
return filepath