Commit cb67ec9c authored by Pierre Dittgen's avatar Pierre Dittgen
Browse files

Use configurable external service for PDF generation (1st implementation: browserless.io)

parent da525b3c
......@@ -18,3 +18,9 @@ CONFIG=config.example.yaml
# Uncomment to enable [Matomo](https://matomo.org/) analytics
# MATOMO_BASE_URL=""
# MATOMO_SITE_ID=""
# PDF export using browserless API service
# /!\ work only if validata-ui is served on a public IP (not localhost)
#
# BROWSERLESS_API_URL="https://chrome.browserless.io/pdf"
# BROWSERLESS_API_TOKEN="XXX"
......@@ -11,6 +11,7 @@ from commonmark import commonmark
from pydantic import HttpUrl
from . import config
from . import pdf_renderer
log = logging.getLogger(__name__)
......@@ -114,5 +115,9 @@ def inject_config():
return {"config": config}
# Used to generate PDF validation report
# If None, PDF report is not available
pdf_service = pdf_renderer.PDFRenderer.create_renderer_from_config(config)
# Keep this import after app initialisation (to avoid cyclic imports)
from . import views # noqa isort:skip
......@@ -80,3 +80,7 @@ if MATOMO_SITE_ID:
MATOMO_SITE_ID = int(MATOMO_SITE_ID)
SENTRY_DSN = os.environ.get("SENTRY_DSN")
# PDF generation service
BROWSERLESS_API_URL = os.getenv("BROWSERLESS_API_URL") or None
BROWSERLESS_API_TOKEN = os.getenv("BROWSERLESS_API_TOKEN") or None
"""PDF report rendering utilities."""
from abc import ABC, abstractmethod
import logging
import requests
log = logging.getLogger(__name__)
class PDFRenderer(ABC):
"""Abstract PDF renderer."""
@abstractmethod
def render(url: str) -> bytes:
"""Render a PDF document content from given URL."""
pass
@staticmethod
def create_renderer_from_config(config):
"""PDF renderer instance factory."""
if config.BROWSERLESS_API_URL and config.BROWSERLESS_API_TOKEN:
log.info("Creating Browserless.io PDF renderer")
return BrowserlessPDFRenderer(
config.BROWSERLESS_API_URL, config.BROWSERLESS_API_TOKEN
)
log.info("No PDF renderer available")
return None
class BrowserlessPDFRenderer(PDFRenderer):
"""Browserless IO implementation."""
def __init__(self, api_url: str, api_token: str):
self.api_url = api_url
self.api_token = api_token
def render(self, url: str):
headers = {
"Cache-Control": "no-cache",
}
params = {"token": self.api_token}
data = {
"url": url,
"options": {
"displayHeaderFooter": True,
"printBackground": False,
"format": "A4",
},
}
# Request server
r = requests.post(self.api_url, headers=headers, params=params, json=data)
r.raise_for_status()
return r.content
......@@ -23,7 +23,7 @@ from validata_core.helpers import (
FileContentValidataResource,
)
from . import app, config, schema_catalog_registry, fetch_schema
from . import app, config, schema_catalog_registry, fetch_schema, pdf_service
from .model import Section
from .ui_util import flash_error, flash_warning
from .validata_util import strip_accents
......@@ -396,9 +396,11 @@ def validate(schema_instance: SchemaInstance, validata_resource: ValidataResourc
schema_info = compute_schema_info(schema_instance.schema, schema_instance.url)
# Build PDF report URL
# PDF report is available only when resource are URL as it require to download them again
# PDF report is available if:
# - a pdf_service has been configured
# - tabular resource to validate is defined as an URL
pdf_report_url = None
if isinstance(validata_resource, URLValidataResource):
if pdf_service and isinstance(validata_resource, URLValidataResource):
base_url = url_for("pdf_report")
query_string = urlencode(
{
......@@ -554,9 +556,13 @@ def pdf_report():
flash_error(err_prefix + " : URL non fournie")
return redirect(url_for("home"))
if pdf_service is None:
flash_error(err_prefix + " : service de génération non configuré")
return redirect(url_for("home"))
# Compute validation report URL
schema_instance = SchemaInstance(request.args)
# Compute pdf url report
base_url = url_for("custom_validator", _external=True)
parameter_dict = {
"input": "url",
......@@ -566,42 +572,25 @@ def pdf_report():
}
validation_url = "{}?{}".format(base_url, urlencode(parameter_dict))
# Create temp file to save validation report
# This temp file will be automatically deleted on context exit
with tempfile.NamedTemporaryFile(
prefix="validata_{}_report_".format(datetime.now().timestamp()), suffix=".pdf"
) as tmpfile:
tmp_pdf_report = Path(tmpfile.name)
# Use chromium headless to generate PDF from validation report page
cmd = [
"chromium",
"--headless",
"--no-sandbox",
"--disable-gpu",
"--disable-dev-shm-usage",
f"--print-to-pdf={tmp_pdf_report}",
validation_url,
]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if result.returncode != 0:
flash_error(err_prefix)
log.error(
"Command %r returned an error: %r", cmd, result.stdout.decode("utf-8")
)
return redirect(url_for("home"))
# Ask for PDF report generation
try:
pdf_bytes_content = pdf_service.render(validation_url)
except Exception as ex:
log.exception(err_prefix)
flash_error(err_prefix + " : contactez votre administrateur")
return redirect(url_for("home"))
# Send PDF report
pdf_filename = "Rapport de validation {}.pdf".format(
datetime.now().strftime("%d-%m-%Y %Hh%M")
)
response = make_response(tmp_pdf_report.read_bytes())
response.headers.set("Content-disposition", "attachment", filename=pdf_filename)
response.headers.set("Content-type", "application/pdf")
response.headers.set("Content-length", tmp_pdf_report.stat().st_size)
# Compute pdf filename
pdf_filename = "Rapport de validation {}.pdf".format(
datetime.now().strftime("%d-%m-%Y %Hh%M")
)
return response
# Prepare and send response
response = make_response(pdf_bytes_content)
response.headers.set("Content-Disposition", "attachment", filename=pdf_filename)
response.headers.set("Content-Length", len(pdf_bytes_content))
response.headers.set("Content-Type", "application/pdf")
return response
def extract_schema_metadata(table_schema: frictionless.Schema):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment