views.py 23.7 KB
Newer Older
1
2
3
"""
    Routes
"""
4
import copy
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
5
import io
6
import json
Christophe Benz's avatar
Christophe Benz committed
7
import logging
8
from collections import Counter
9
from datetime import datetime
Christophe Benz's avatar
Christophe Benz committed
10
from urllib.parse import urlencode, urljoin
11

Pierre Dittgen's avatar
Pierre Dittgen committed
12
import frictionless
13
import jsonschema
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
14
import requests
15
from commonmark import commonmark
16
from flask import abort, make_response, redirect, render_template, request, url_for
17
from opendataschema import GitSchemaReference, by_commit_date
18

19
import validata_core
Pierre Dittgen's avatar
Pierre Dittgen committed
20
21
22
23
24
from validata_core.helpers import (
    ValidataResource,
    URLValidataResource,
    FileContentValidataResource,
)
25

Pierre Dittgen's avatar
Pierre Dittgen committed
26
from . import app, config, schema_catalog_registry, fetch_schema
Pierre Dittgen's avatar
Pierre Dittgen committed
27
from .model import Section
28
from .ui_util import flash_error, flash_warning
29
30
from .validata_util import strip_accents

31

Christophe Benz's avatar
Christophe Benz committed
32
33
log = logging.getLogger(__name__)

34

35
36
37
38
39
def get_schema_catalog(section_name):
    """Return a schema catalog associated to a section_name"""
    return schema_catalog_registry.build_schema_catalog(section_name)


40
41
42
class SchemaInstance:
    """Handy class to handle schema information"""

43
    def __init__(self, parameter_dict):
44
45
46
47
48
49
50
51
52
53
        """Initializes schema instance from requests dict and tableschema catalog (for name ref)"""
        self.section_name = None
        self.section_title = None
        self.name = None
        self.url = None
        self.ref = None
        self.reference = None
        self.doc_url = None
        self.branches = None
        self.tags = None
54
55

        # From schema_url
Christophe Benz's avatar
Christophe Benz committed
56
        if parameter_dict.get("schema_url"):
57
58
            self.url = parameter_dict["schema_url"]
            self.section_title = "Autre schéma"
59
60

        # from schema_name (and schema_ref)
Pierre Dittgen's avatar
Pierre Dittgen committed
61
62
63
        elif parameter_dict.get("schema_name"):
            self.schema_and_section_name = parameter_dict["schema_name"]
            self.ref = parameter_dict.get("schema_ref")
64

Pierre Dittgen's avatar
Pierre Dittgen committed
65
            # Check schema name
Pierre Dittgen's avatar
Pierre Dittgen committed
66
            chunks = self.schema_and_section_name.split(".")
Pierre Dittgen's avatar
Pierre Dittgen committed
67
            if len(chunks) != 2:
68
                abort(400, "Paramètre 'schema_name' invalide")
Pierre Dittgen's avatar
Pierre Dittgen committed
69

70
71
            self.section_name, self.name = chunks
            self.section_title = self.find_section_title(self.section_name)
Pierre Dittgen's avatar
Pierre Dittgen committed
72
73

            # Look for schema catalog first
74
75
76
77
78
            try:
                table_schema_catalog = get_schema_catalog(self.section_name)
            except Exception as ex:
                log.exception(ex)
                abort(400, "Erreur de traitement du catalogue")
Pierre Dittgen's avatar
Pierre Dittgen committed
79
            if table_schema_catalog is None:
80
                abort(400, "Catalogue indisponible")
81
82
83

            schema_reference = table_schema_catalog.reference_by_name.get(self.name)
            if schema_reference is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
84
85
86
87
88
89
                abort(
                    400,
                    "Schéma '{}' non trouvé dans le catalogue de la section '{}'".format(
                        self.name, self.section_name
                    ),
                )
90
91

            if isinstance(schema_reference, GitSchemaReference):
Pierre Dittgen's avatar
Pierre Dittgen committed
92
93
94
                self.tags = sorted(
                    schema_reference.iter_tags(), key=by_commit_date, reverse=True
                )
95
                if self.ref is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
                    schema_ref = (
                        self.tags[0]
                        if self.tags
                        else schema_reference.get_default_branch()
                    )
                    abort(
                        redirect(
                            compute_validation_form_url(
                                {
                                    "schema_name": self.schema_and_section_name,
                                    "schema_ref": schema_ref.name,
                                }
                            )
                        )
                    )
111
                tag_names = [tag.name for tag in self.tags]
Pierre Dittgen's avatar
Pierre Dittgen committed
112
113
114
115
116
117
118
119
                self.branches = [
                    branch
                    for branch in schema_reference.iter_branches()
                    if branch.name not in tag_names
                ]
                self.doc_url = schema_reference.get_doc_url(
                    ref=self.ref
                ) or schema_reference.get_project_url(ref=self.ref)
120

121
            self.url = schema_reference.get_schema_url(ref=self.ref)
122
123

        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
124
            flash_error("Erreur dans la récupération des informations de schéma")
Pierre Dittgen's avatar
Pierre Dittgen committed
125
            abort(redirect(url_for("home")))
126

127
        try:
Pierre Dittgen's avatar
Pierre Dittgen committed
128
            self.schema = fetch_schema(self.url)
129
130
        except json.JSONDecodeError as e:
            log.exception(e)
Pierre Dittgen's avatar
Pierre Dittgen committed
131
            flash_error("Le format du schéma n'est pas reconnu")
Pierre Dittgen's avatar
Pierre Dittgen committed
132
            abort(redirect(url_for("home")))
133
134
        except Exception as e:
            log.exception(e)
Pierre Dittgen's avatar
Pierre Dittgen committed
135
            flash_error("Impossible de récupérer le schéma")
Pierre Dittgen's avatar
Pierre Dittgen committed
136
            abort(redirect(url_for("home")))
137
138
139
140

    def request_parameters(self):
        if self.name:
            return {
Pierre Dittgen's avatar
Pierre Dittgen committed
141
142
                "schema_name": self.schema_and_section_name,
                "schema_ref": "" if self.ref is None else self.ref,
143
            }
Pierre Dittgen's avatar
Pierre Dittgen committed
144
        return {"schema_url": self.url}
145

146
    def find_section_title(self, section_name):
147
        if config.CONFIG:
Pierre Dittgen's avatar
Pierre Dittgen committed
148
149
150
            for section in config.CONFIG.homepage.sections:
                if section.name == section_name:
                    return section.title
Christophe Benz's avatar
Christophe Benz committed
151
152
        return None

153

154
155
156
def build_template_source_data(header, rows, preview_rows_nb=5):
    """Build source data information to preview in validation report page."""
    source_header_info = [(colname, False) for colname in header]
Pierre Dittgen's avatar
Pierre Dittgen committed
157

158
    rows_count = len(rows)
Pierre Dittgen's avatar
Pierre Dittgen committed
159
    preview_rows_count = min(preview_rows_nb, rows_count)
Pierre Dittgen's avatar
Pierre Dittgen committed
160
    return {
Pierre Dittgen's avatar
Pierre Dittgen committed
161
        "source_header_info": source_header_info,
162
        "header": header,
Pierre Dittgen's avatar
Pierre Dittgen committed
163
        "rows_nb": rows_count,
164
        "data_rows": rows,
Pierre Dittgen's avatar
Pierre Dittgen committed
165
        "preview_rows_count": preview_rows_count,
166
        "preview_rows": rows[:preview_rows_count],
Pierre Dittgen's avatar
Pierre Dittgen committed
167
    }
168
169


170
def build_ui_errors(errors):
Pierre Dittgen's avatar
Pierre Dittgen committed
171
    """Add context to errors, converts markdown content to HTML"""
172

Pierre Dittgen's avatar
Pierre Dittgen committed
173
174
    def improve_err(err):
        """Adds context info based on row-nb presence and converts content to HTML"""
175

Pierre Dittgen's avatar
Pierre Dittgen committed
176
177
        # Context
        update_keys = {
Pierre Dittgen's avatar
Pierre Dittgen committed
178
            "context": "body"
179
            if "row-number" in err and err["row-number"] is not None
Pierre Dittgen's avatar
Pierre Dittgen committed
180
            else "table",
Pierre Dittgen's avatar
Pierre Dittgen committed
181
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
182

183
        # Set title
184
        if "title" not in err:
185
            update_keys["title"] = err["name"]
186

187
        # Set content
188
        content = "*content soon available*"
189
        if "message" in err:
190
            content = err["message"]
191
192
        elif "description" in err:
            content = err["description"]
193
        update_keys["content"] = commonmark(content)
194

Pierre Dittgen's avatar
Pierre Dittgen committed
195
        return {**err, **update_keys}
196

Pierre Dittgen's avatar
Pierre Dittgen committed
197
    return list(map(improve_err, errors))
198
199


Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
200
def create_validata_ui_report(validata_core_report, schema_dict):
201
202
203
204
205
206
207
    """Creates an error report easier to handle and display in templates:
    - only one table
    - errors are contextualized
    - error-counts is ok
    - errors are grouped by lines
    - errors are separated into "structure" and "body"
    - error messages are improved
208
    """
209
    v_report = copy.deepcopy(validata_core_report)
210

211
212
213
    # Create a new UI report from information picked in validata report
    ui_report = {}
    ui_report["table"] = {}
214

215
    # source headers
Pierre Dittgen's avatar
Pierre Dittgen committed
216
    headers = v_report.table["header"]
217
    ui_report["table"]["header"] = headers
Pierre Dittgen's avatar
Pierre Dittgen committed
218

219
    # source dimension
Pierre Dittgen's avatar
Pierre Dittgen committed
220
221
    ui_report["table"]["col_count"] = len(headers)
    ui_report["table"]["row_count"] = v_report.table["stats"]["rows"]
222
223

    # Computes column info from schema
Pierre Dittgen's avatar
Pierre Dittgen committed
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
    fields_dict = {
        f["name"]: (f.get("title", f["name"]), f.get("description", ""))
        for f in schema_dict.get("fields", [])
    }
    ui_report["table"]["headers_title"] = [
        fields_dict[h][0] if h in fields_dict else "Colonne inconnue" for h in headers
    ]
    ui_report["table"]["headers_description"] = [
        fields_dict[h][1]
        if h in fields_dict
        else "Cette colonne n'est pas définie dans le schema"
        for h in headers
    ]
    missing_headers = [
        err["message-data"]["column-name"]
        for err in v_report.table["errors"]
        if err["code"] == "missing-header"
    ]
    ui_report["table"]["cols_alert"] = [
        "table-danger" if h not in fields_dict or h in missing_headers else ""
        for h in headers
    ]
246

247
    # prepare error structure for UI needs
Pierre Dittgen's avatar
Pierre Dittgen committed
248
    errors = build_ui_errors(v_report.table["errors"])
249

Pierre Dittgen's avatar
Pierre Dittgen committed
250
    # Count errors and warnings
Pierre Dittgen's avatar
Pierre Dittgen committed
251
    ui_report["error_count"] = len(errors)
Pierre Dittgen's avatar
Pierre Dittgen committed
252
253
    ui_report["warn_count"] = len(v_report.table["structure_warnings"])
    ui_report["warnings"] = v_report.table["structure_warnings"]
254
255

    # Then group them in 2 groups : structure and body
Pierre Dittgen's avatar
Pierre Dittgen committed
256
    ui_report["table"]["errors"] = {"structure": [], "body": []}
257
    for err in errors:
Pierre Dittgen's avatar
Pierre Dittgen committed
258
259
        if "#head" in err["tags"] or "#structure" in err["tags"]:
            ui_report["table"]["errors"]["structure"].append(err)
Pierre Dittgen's avatar
Pierre Dittgen committed
260
        elif "#body" in err["tags"] or "#content" in err["tags"]:
Pierre Dittgen's avatar
Pierre Dittgen committed
261
            ui_report["table"]["errors"]["body"].append(err)
262

Pierre Dittgen's avatar
Pierre Dittgen committed
263
264
265
    # Group body errors by row id
    rows = []
    current_row_id = 0
Pierre Dittgen's avatar
Pierre Dittgen committed
266
    for err in ui_report["table"]["errors"]["body"]:
267
        if "rowPosition" not in err:
268
            continue
Pierre Dittgen's avatar
Pierre Dittgen committed
269
        row_id = err["rowPosition"]
Pierre Dittgen's avatar
Pierre Dittgen committed
270
271
        if row_id != current_row_id:
            current_row_id = row_id
Pierre Dittgen's avatar
Pierre Dittgen committed
272
            rows.append({"row_id": current_row_id, "errors": {}})
Pierre Dittgen's avatar
Pierre Dittgen committed
273

Pierre Dittgen's avatar
Pierre Dittgen committed
274
        column_id = err.get("fieldPosition")
Pierre Dittgen's avatar
Pierre Dittgen committed
275
        if column_id is not None:
Pierre Dittgen's avatar
Pierre Dittgen committed
276
            rows[-1]["errors"][column_id] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
277
        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
278
279
            rows[-1]["errors"]["row"] = err
    ui_report["table"]["errors"]["body_by_rows"] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
280

281
    # Sort by error names in statistics
282
    ui_report["table"]["count-by-code"] = {}
283
284
    stats = {}
    total_errors_count = 0
Pierre Dittgen's avatar
Pierre Dittgen committed
285
    for key in ("structure", "body"):
286
287
        # convert dict into tuples with french title instead of error code
        # and sorts by title
288
        key_errors = ui_report["table"]["errors"][key]
289
        key_errors_count = len(key_errors)
290
        ct = Counter(ke["name"] for ke in key_errors)
291
292
293

        error_stats = {
            "count": key_errors_count,
Pierre Dittgen's avatar
Pierre Dittgen committed
294
            "count-by-code": sorted((k, v) for k, v in ct.items()),
295
296
297
298
        }
        total_errors_count += key_errors_count

        # Add error rows count
Pierre Dittgen's avatar
Pierre Dittgen committed
299
300
301
302
        if key == "body":
            error_rows = {
                err["rowPosition"] for err in key_errors if "rowPosition" in err
            }
303
304
305
306
307
            error_stats["rows-count"] = len(error_rows)

        stats[f"{key}-errors"] = error_stats

    stats["count"] = total_errors_count
308
    ui_report["table"]["error-stats"] = stats
309

310
    return ui_report
Pierre Dittgen's avatar
Pierre Dittgen committed
311
312


Pierre Dittgen's avatar
Pierre Dittgen committed
313
314
def compute_badge_message_and_color(badge):
    """Computes message and color from badge information"""
Pierre Dittgen's avatar
Pierre Dittgen committed
315
316
    structure = badge["structure"]
    body = badge.get("body")
Pierre Dittgen's avatar
Pierre Dittgen committed
317
318

    # Bad structure, stop here
Pierre Dittgen's avatar
Pierre Dittgen committed
319
320
    if structure == "KO":
        return ("structure invalide", "red")
Pierre Dittgen's avatar
Pierre Dittgen committed
321
322

    # No body error
Pierre Dittgen's avatar
Pierre Dittgen committed
323
324
    if body == "OK":
        return (
Pierre Dittgen's avatar
Pierre Dittgen committed
325
            ("partiellement valide", "yellowgreen")
Pierre Dittgen's avatar
Pierre Dittgen committed
326
327
328
            if structure == "WARN"
            else ("valide", "green")
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
329
330

    # else compute quality ratio percent
Pierre Dittgen's avatar
Pierre Dittgen committed
331
332
333
    p = (1 - badge["error-ratio"]) * 100.0
    msg = "cellules valides : {:.1f}%".format(p)
    return (msg, "red") if body == "KO" else (msg, "orange")
Pierre Dittgen's avatar
Pierre Dittgen committed
334
335
336
337
338
339


def get_badge_url_and_message(badge):
    """Gets badge url from badge information"""

    msg, color = compute_badge_message_and_color(badge)
Christophe Benz's avatar
Christophe Benz committed
340
    badge_url = "{}?{}".format(
Pierre Dittgen's avatar
Pierre Dittgen committed
341
342
        urljoin(config.SHIELDS_IO_BASE_URL, "/static/v1.svg"),
        urlencode({"label": "Validata", "message": msg, "color": color}),
Christophe Benz's avatar
Christophe Benz committed
343
344
    )
    return (badge_url, msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
345
346


347
def validate(schema_instance: SchemaInstance, validata_resource: ValidataResource):
348
349
    """ Validate source and display report """

Pierre Dittgen's avatar
Pierre Dittgen committed
350
    # Parse source data once
351
    header, rows = validata_resource.extract_tabular_data()
Pierre Dittgen's avatar
Pierre Dittgen committed
352
353

    # Call validata_core with parsed data
Pierre Dittgen's avatar
Pierre Dittgen committed
354
    validata_core_report = validata_core.validate(
355
        [header] + rows, schema_instance.schema
Pierre Dittgen's avatar
Pierre Dittgen committed
356
    )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
357

358
    # disable badge
Pierre Dittgen's avatar
Pierre Dittgen committed
359
    badge_config = config.BADGE_CONFIG
360

Pierre Dittgen's avatar
Pierre Dittgen committed
361
    # Computes badge from report and badge configuration
Pierre Dittgen's avatar
Pierre Dittgen committed
362
    badge_url, badge_msg = None, None
Pierre Dittgen's avatar
Pierre Dittgen committed
363
    display_badge = badge_config and config.SHIELDS_IO_BASE_URL
Pierre Dittgen's avatar
Pierre Dittgen committed
364
    if display_badge:
Pierre Dittgen's avatar
Pierre Dittgen committed
365
366
367
        badge_stats = validata_core.compute_badge_metrics(
            validata_core_report, badge_config
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
368
        badge_url, badge_msg = get_badge_url_and_message(badge_stats)
Pierre Dittgen's avatar
Pierre Dittgen committed
369

370
    # Source error
371
372
    source_errors = [
        err
Pierre Dittgen's avatar
Pierre Dittgen committed
373
374
        for err in validata_core_report["tables"][0]["errors"]
        if err["code"] in {"source-error", "unknown-csv-dialect"}
375
    ]
376
377
    if source_errors:
        err = source_errors[0]
Pierre Dittgen's avatar
Pierre Dittgen committed
378
379
380
381
382
383
384
        msg = (
            "l'encodage du fichier est invalide. Veuillez le corriger"
            if "charmap" in err["message"]
            else err["message"]
        )
        flash_error("Erreur de source : {}".format(msg))
        return redirect(url_for("custom_validator"))
385

Pierre Dittgen's avatar
Pierre Dittgen committed
386
    # handle report date
Pierre Dittgen's avatar
Pierre Dittgen committed
387
    report_datetime = datetime.fromisoformat(validata_core_report["date"]).astimezone()
388

389
390
    # create ui_report
    ui_report = create_validata_ui_report(validata_core_report, schema_instance.schema)
Pierre Dittgen's avatar
Pierre Dittgen committed
391

Pierre Dittgen's avatar
Pierre Dittgen committed
392
    # Display report to the user
Pierre Dittgen's avatar
Pierre Dittgen committed
393
394
395
    validator_form_url = compute_validation_form_url(
        schema_instance.request_parameters()
    )
Christophe Benz's avatar
Christophe Benz committed
396
    schema_info = compute_schema_info(schema_instance.schema, schema_instance.url)
397
398

    # Build PDF report URL
Pierre Dittgen's avatar
Pierre Dittgen committed
399
    # PDF report is available only when resource are URL as it require to download them again
400
401
402
403
    pdf_report_url = None
    if isinstance(validata_resource, URLValidataResource):
        base_url = url_for("pdf_report")
        query_string = urlencode(
404
405
406
407
            {
                **schema_instance.request_parameters(),
                "url": validata_resource.url,
            }
Pierre Dittgen's avatar
Pierre Dittgen committed
408
        )
409
        pdf_report_url = f"{base_url}?{query_string}"
Pierre Dittgen's avatar
Pierre Dittgen committed
410
411
412

    return render_template(
        "validation_report.html",
413
        config=config.CONFIG,
Pierre Dittgen's avatar
Pierre Dittgen committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
        badge_msg=badge_msg,
        badge_url=badge_url,
        breadcrumbs=[
            {"title": "Accueil", "url": url_for("home")},
            {"title": schema_instance.section_title},
            {"title": schema_info["title"], "url": validator_form_url},
            {"title": "Rapport de validation"},
        ],
        display_badge=display_badge,
        doc_url=schema_instance.doc_url,
        pdf_report_url=pdf_report_url,
        print_mode=request.args.get("print", "false") == "true",
        report=ui_report,
        schema_current_version=schema_instance.ref,
        schema_info=schema_info,
        section_title=schema_instance.section_title,
430
431
        source_data=build_template_source_data(header, rows),
        source=validata_resource,
Pierre Dittgen's avatar
Pierre Dittgen committed
432
433
        validation_date=report_datetime.strftime("le %d/%m/%Y à %Hh%M"),
    )
434
435


436
437
def bytes_data(f):
    """ Gets bytes data from Werkzeug FileStorage instance """
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
438
    iob = io.BytesIO()
439
440
441
442
443
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


Pierre Dittgen's avatar
Pierre Dittgen committed
444
def retrieve_schema_catalog(section: Section):
Pierre Dittgen's avatar
Pierre Dittgen committed
445
    """Retrieve schema catalog and return formatted error if it fails."""
446
447
448
449

    def format_error_message(err_message, exc):
        """Prepare a bootstrap error message with details if wanted."""

Pierre Dittgen's avatar
Pierre Dittgen committed
450
        exception_text = "\n".join([str(arg) for arg in exc.args])
451

Pierre Dittgen's avatar
Pierre Dittgen committed
452
        return f"""{err_msg}
453
454
455
456
457
458
459
460
461
        <div class="float-right">
            <button type="button" class="btn btn-info btn-xs" data-toggle="collapse" data-target="#exception_info">détails</button>
        </div>
        <div id="exception_info" class="collapse">
                <pre>{exception_text}</pre>
        </div>
"""

    try:
Pierre Dittgen's avatar
Pierre Dittgen committed
462
        schema_catalog = get_schema_catalog(section.name)
463
        return (schema_catalog, None)
Pierre Dittgen's avatar
Pierre Dittgen committed
464

465
466
467
468
469
470
471
472
473
    except Exception as exc:
        log.exception(exc)
        err_msg = "une erreur s'est produite"
        if isinstance(exc, requests.ConnectionError):
            err_msg = "problème de connexion"
        elif isinstance(exc, json.decoder.JSONDecodeError):
            err_msg = "format JSON incorrect"
        elif isinstance(exc, jsonschema.exceptions.ValidationError):
            err_msg = "le catalogue ne respecte pas le schéma de référence"
Pierre Dittgen's avatar
Pierre Dittgen committed
474

475
        error_catalog = {
Pierre Dittgen's avatar
Pierre Dittgen committed
476
            **{k: v for k, v in section.dict().items() if k != "catalog"},
Pierre Dittgen's avatar
Pierre Dittgen committed
477
            "err": format_error_message(err_msg, exc),
478
479
480
481
        }
        return None, error_catalog


482
483
484
# Routes


Pierre Dittgen's avatar
Pierre Dittgen committed
485
@app.route("/")
486
487
def home():
    """ Home page """
488
489
490

    def iter_sections():
        """Yield sections of the home page, filled with schema metadata."""
Pierre Dittgen's avatar
Pierre Dittgen committed
491
492

        # Iterate on all sections
Pierre Dittgen's avatar
Pierre Dittgen committed
493
        for section in config.CONFIG.homepage.sections:
Pierre Dittgen's avatar
Pierre Dittgen committed
494

Pierre Dittgen's avatar
Pierre Dittgen committed
495
            # section with only links to external validators
Pierre Dittgen's avatar
Pierre Dittgen committed
496
            if section.links:
Pierre Dittgen's avatar
Pierre Dittgen committed
497
                yield section
498
                continue
499

Pierre Dittgen's avatar
Pierre Dittgen committed
500
            # section with catalog
Pierre Dittgen's avatar
Pierre Dittgen committed
501
            if section.catalog is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
502
503
504
                # skip section
                continue

505
506
507
508
509
            # retrieving schema catatalog
            schema_catalog, catalog_error = retrieve_schema_catalog(section)
            if schema_catalog is None:
                yield catalog_error
                continue
Pierre Dittgen's avatar
Pierre Dittgen committed
510
511
512
513
514

            # Working on catalog
            schema_info_list = []
            for schema_reference in schema_catalog.references:
                # Loads default table schema for each schema reference
Pierre Dittgen's avatar
Pierre Dittgen committed
515
                schema_info = {"name": schema_reference.name}
516
                try:
Pierre Dittgen's avatar
Pierre Dittgen committed
517
                    table_schema = fetch_schema(schema_reference.get_schema_url())
Pierre Dittgen's avatar
Pierre Dittgen committed
518
                except json.JSONDecodeError:
Pierre Dittgen's avatar
Pierre Dittgen committed
519
520
521
522
                    schema_info["err"] = True
                    schema_info[
                        "title"
                    ] = f"le format du schéma « {schema_info['name']} » n'est pas reconnu"
523
                except Exception:
Pierre Dittgen's avatar
Pierre Dittgen committed
524
525
526
527
                    schema_info["err"] = True
                    schema_info[
                        "title"
                    ] = f"le schéma « {schema_info['name']} » n'est pas disponible"
528
                else:
Pierre Dittgen's avatar
Pierre Dittgen committed
529
530
531
                    schema_info["title"] = (
                        table_schema.get("title") or schema_info["name"]
                    )
Pierre Dittgen's avatar
Pierre Dittgen committed
532
533
                schema_info_list.append(schema_info)
            schema_info_list = sorted(
Pierre Dittgen's avatar
Pierre Dittgen committed
534
535
                schema_info_list, key=lambda sc: strip_accents(sc["title"].lower())
            )
Pierre Dittgen's avatar
Pierre Dittgen committed
536

Pierre Dittgen's avatar
Pierre Dittgen committed
537
            yield {
Pierre Dittgen's avatar
Pierre Dittgen committed
538
                **{k: v for k, v in section.dict().items() if k != "catalog"},
Pierre Dittgen's avatar
Pierre Dittgen committed
539
540
                "catalog": schema_info_list,
            }
541

Pierre Dittgen's avatar
Pierre Dittgen committed
542
543
544
    return render_template(
        "home.html", config=config.CONFIG, sections=list(iter_sections())
    )
545
546


Pierre Dittgen's avatar
Pierre Dittgen committed
547
@app.route("/pdf")
Pierre Dittgen's avatar
Pierre Dittgen committed
548
def pdf_report():
549
    """PDF report generation"""
Pierre Dittgen's avatar
Pierre Dittgen committed
550
    err_prefix = "Erreur de génération du rapport PDF"
551

Pierre Dittgen's avatar
Pierre Dittgen committed
552
    url_param = request.args.get("url")
553
    if not url_param:
Pierre Dittgen's avatar
Pierre Dittgen committed
554
555
        flash_error(err_prefix + " : URL non fournie")
        return redirect(url_for("home"))
556

557
    schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
558

559
    # Compute pdf url report
Pierre Dittgen's avatar
Pierre Dittgen committed
560
    base_url = url_for("custom_validator", _external=True)
561
    parameter_dict = {
Pierre Dittgen's avatar
Pierre Dittgen committed
562
563
564
565
        "input": "url",
        "print": "true",
        "url": url_param,
        **schema_instance.request_parameters(),
566
    }
Christophe Benz's avatar
Christophe Benz committed
567
    validation_url = "{}?{}".format(base_url, urlencode(parameter_dict))
568
569

    # Create temp file to save validation report
570
    # This temp file will be automatically deleted on context exit
Pierre Dittgen's avatar
Pierre Dittgen committed
571
572
573
    with tempfile.NamedTemporaryFile(
        prefix="validata_{}_report_".format(datetime.now().timestamp()), suffix=".pdf"
    ) as tmpfile:
Christophe Benz's avatar
Christophe Benz committed
574
        tmp_pdf_report = Path(tmpfile.name)
575

576
577
578
579
580
        # Use chromium headless to generate PDF from validation report page
        cmd = [
            "chromium",
            "--headless",
            "--no-sandbox",
581
            "--disable-gpu",
Pierre Dittgen's avatar
Pierre Dittgen committed
582
            "--disable-dev-shm-usage",
583
584
585
            f"--print-to-pdf={tmp_pdf_report}",
            validation_url,
        ]
586

587
588
589
590
591
592
593
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        if result.returncode != 0:
            flash_error(err_prefix)
            log.error(
                "Command %r returned an error: %r", cmd, result.stdout.decode("utf-8")
            )
            return redirect(url_for("home"))
594

Pierre Dittgen's avatar
Pierre Dittgen committed
595
596
597
598
599
600
601
602
        # Send PDF report
        pdf_filename = "Rapport de validation {}.pdf".format(
            datetime.now().strftime("%d-%m-%Y %Hh%M")
        )
        response = make_response(tmp_pdf_report.read_bytes())
        response.headers.set("Content-disposition", "attachment", filename=pdf_filename)
        response.headers.set("Content-type", "application/pdf")
        response.headers.set("Content-length", tmp_pdf_report.stat().st_size)
603

Pierre Dittgen's avatar
Pierre Dittgen committed
604
        return response
605
606


Pierre Dittgen's avatar
Pierre Dittgen committed
607
def extract_schema_metadata(table_schema: frictionless.Schema):
Pierre Dittgen's avatar
Pierre Dittgen committed
608
    """Gets author, contibutor, version...metadata from schema header"""
Pierre Dittgen's avatar
Pierre Dittgen committed
609
    return {k: v for k, v in table_schema.items() if k != "fields"}
Pierre Dittgen's avatar
Pierre Dittgen committed
610
611


Pierre Dittgen's avatar
Pierre Dittgen committed
612
def compute_schema_info(table_schema: frictionless.Schema, schema_url):
Pierre Dittgen's avatar
Pierre Dittgen committed
613
    """Factor code for validator form page"""
614

Pierre Dittgen's avatar
Pierre Dittgen committed
615
616
    # Schema URL + schema metadata info
    schema_info = {
Pierre Dittgen's avatar
Pierre Dittgen committed
617
        "path": schema_url,
Christophe Benz's avatar
Christophe Benz committed
618
619
        # a "path" metadata property can be found in Table Schema, and we'd like it to override the `schema_url`
        # given by the user (in case schema was given by URL)
Pierre Dittgen's avatar
Pierre Dittgen committed
620
        **extract_schema_metadata(table_schema),
Pierre Dittgen's avatar
Pierre Dittgen committed
621
    }
Christophe Benz's avatar
Christophe Benz committed
622
    return schema_info
623
624


625
def compute_validation_form_url(request_parameters: dict):
626
    """Computes validation form url with schema URL parameter"""
Pierre Dittgen's avatar
Pierre Dittgen committed
627
    url = url_for("custom_validator")
Christophe Benz's avatar
Christophe Benz committed
628
    return "{}?{}".format(url, urlencode(request_parameters))
Pierre Dittgen's avatar
Pierre Dittgen committed
629
630


Pierre Dittgen's avatar
Pierre Dittgen committed
631
@app.route("/table-schema", methods=["GET", "POST"])
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
632
def custom_validator():
633
    """Validator form"""
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
634

Pierre Dittgen's avatar
Pierre Dittgen committed
635
    if request.method == "GET":
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
636

637
638
        # input is a hidden form parameter to know
        # if this is the initial page display or if the validation has been asked for
Pierre Dittgen's avatar
Pierre Dittgen committed
639
        input_param = request.args.get("input")
640
641

        # url of resource to be validated
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
642
643
        url_param = request.args.get("url")

644
        schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
645

Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
646
647
        # First form display
        if input_param is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
648
649
650
651
652
            schema_info = compute_schema_info(
                schema_instance.schema, schema_instance.url
            )
            return render_template(
                "validation_form.html",
653
                config=config.CONFIG,
Pierre Dittgen's avatar
Pierre Dittgen committed
654
655
656
657
658
659
660
661
662
663
664
665
666
                branches=schema_instance.branches,
                breadcrumbs=[
                    {"url": url_for("home"), "title": "Accueil"},
                    {"title": schema_instance.section_title},
                    {"title": schema_info["title"]},
                ],
                doc_url=schema_instance.doc_url,
                schema_current_version=schema_instance.ref,
                schema_info=schema_info,
                schema_params=schema_instance.request_parameters(),
                section_title=schema_instance.section_title,
                tags=schema_instance.tags,
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
667
668
669

        # Process URL
        else:
670
            if not url_param:
Christophe Benz's avatar
Christophe Benz committed
671
                flash_error("Vous n'avez pas indiqué d'URL à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
672
673
674
                return redirect(
                    compute_validation_form_url(schema_instance.request_parameters())
                )
675
676
            return validate(schema_instance, URLValidataResource(url_param))

Pierre Dittgen's avatar
Pierre Dittgen committed
677
    elif request.method == "POST":
678

679
        schema_instance = SchemaInstance(request.form)
Pierre Dittgen's avatar
Pierre Dittgen committed
680

Pierre Dittgen's avatar
Pierre Dittgen committed
681
        input_param = request.form.get("input")
Pierre Dittgen's avatar
Pierre Dittgen committed
682
683
        if input_param is None:
            flash_error("Vous n'avez pas indiqué de fichier à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
684
685
686
            return redirect(
                compute_validation_form_url(schema_instance.request_parameters())
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
687
688

        # File validation
Pierre Dittgen's avatar
Pierre Dittgen committed
689
690
        if input_param == "file":
            f = request.files.get("file")
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
691
692
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
693
694
695
696
                return redirect(
                    compute_validation_form_url(schema_instance.request_parameters())
                )
            return validate(
697
                schema_instance, FileContentValidataResource(f.filename, bytes_data(f))
Pierre Dittgen's avatar
Pierre Dittgen committed
698
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
699

Pierre Dittgen's avatar
Pierre Dittgen committed
700
        return "Combinaison de paramètres non supportée", 400
701
702
703

    else:
        return "Method not allowed", 405