views.py 25.7 KB
Newer Older
Pierre Dittgen's avatar
Pierre Dittgen committed
1
"""Routes."""
2
import copy
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
3
import io
4
import json
Christophe Benz's avatar
Christophe Benz committed
5
import logging
6
from collections import Counter
7
from datetime import datetime
8
from pathlib import Path
Pierre Dittgen's avatar
Pierre Dittgen committed
9
from typing import Any, Dict, Iterable, List
Christophe Benz's avatar
Christophe Benz committed
10
from urllib.parse import urlencode, urljoin
11

Pierre Dittgen's avatar
Pierre Dittgen committed
12
import frictionless
13
import jsonschema
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
14
import requests
Christophe Benz's avatar
Christophe Benz committed
15
import validata_core
16
from commonmark import commonmark
Pierre Dittgen's avatar
Pierre Dittgen committed
17
from flask import abort, make_response, redirect, render_template, request, url_for
18
from opendataschema import GitSchemaReference, by_commit_date
Pierre Dittgen's avatar
Pierre Dittgen committed
19
20
21
22
from validata_core.helpers import (
    FileContentValidataResource,
    URLValidataResource,
    ValidataResource,
Pierre Dittgen's avatar
Pierre Dittgen committed
23
    ValidataSourceError,
Pierre Dittgen's avatar
Pierre Dittgen committed
24
25
26
    is_body_error,
    is_structure_error,
)
27

Christophe Benz's avatar
Christophe Benz committed
28
from . import app, config, fetch_schema, pdf_service, schema_catalog_registry
Pierre Dittgen's avatar
Pierre Dittgen committed
29
from .model import Section
30
from .ui_util import flash_error, flash_warning
31
32
from .validata_util import strip_accents

Christophe Benz's avatar
Christophe Benz committed
33
34
log = logging.getLogger(__name__)

35

36
def get_schema_catalog(section_name):
Pierre Dittgen's avatar
Pierre Dittgen committed
37
    """Return a schema catalog associated to a section_name."""
38
39
40
    return schema_catalog_registry.build_schema_catalog(section_name)


41
class SchemaInstance:
Pierre Dittgen's avatar
Pierre Dittgen committed
42
    """Handy class to handle schema information."""
43

44
    def __init__(self, parameter_dict):
Pierre Dittgen's avatar
Pierre Dittgen committed
45
        """Initialize schema instance and tableschema catalog."""
46
47
48
49
50
51
52
53
54
        self.section_name = None
        self.section_title = None
        self.name = None
        self.url = None
        self.ref = None
        self.reference = None
        self.doc_url = None
        self.branches = None
        self.tags = None
55
56

        # From schema_url
Christophe Benz's avatar
Christophe Benz committed
57
        if parameter_dict.get("schema_url"):
58
59
            self.url = parameter_dict["schema_url"]
            self.section_title = "Autre schéma"
60
61

        # from schema_name (and schema_ref)
Pierre Dittgen's avatar
Pierre Dittgen committed
62
63
64
        elif parameter_dict.get("schema_name"):
            self.schema_and_section_name = parameter_dict["schema_name"]
            self.ref = parameter_dict.get("schema_ref")
65

Pierre Dittgen's avatar
Pierre Dittgen committed
66
            # Check schema name
Pierre Dittgen's avatar
Pierre Dittgen committed
67
            chunks = self.schema_and_section_name.split(".")
Pierre Dittgen's avatar
Pierre Dittgen committed
68
            if len(chunks) != 2:
69
                abort(400, "Paramètre 'schema_name' invalide")
Pierre Dittgen's avatar
Pierre Dittgen committed
70

71
72
            self.section_name, self.name = chunks
            self.section_title = self.find_section_title(self.section_name)
Pierre Dittgen's avatar
Pierre Dittgen committed
73
74

            # Look for schema catalog first
75
76
            try:
                table_schema_catalog = get_schema_catalog(self.section_name)
Pierre Dittgen's avatar
Pierre Dittgen committed
77
78
            except Exception:
                log.exception("")
79
                abort(400, "Erreur de traitement du catalogue")
Pierre Dittgen's avatar
Pierre Dittgen committed
80
            if table_schema_catalog is None:
81
                abort(400, "Catalogue indisponible")
82
83
84

            schema_reference = table_schema_catalog.reference_by_name.get(self.name)
            if schema_reference is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
85
86
                abort(
                    400,
Christophe Benz's avatar
Christophe Benz committed
87
88
                    f"Schéma {self.name!r} non trouvé dans le catalogue de la "
                    f"section {self.section_name!r}",
Pierre Dittgen's avatar
Pierre Dittgen committed
89
                )
90
91

            if isinstance(schema_reference, GitSchemaReference):
Pierre Dittgen's avatar
Pierre Dittgen committed
92
93
94
                self.tags = sorted(
                    schema_reference.iter_tags(), key=by_commit_date, reverse=True
                )
95
                if self.ref is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
                    schema_ref = (
                        self.tags[0]
                        if self.tags
                        else schema_reference.get_default_branch()
                    )
                    abort(
                        redirect(
                            compute_validation_form_url(
                                {
                                    "schema_name": self.schema_and_section_name,
                                    "schema_ref": schema_ref.name,
                                }
                            )
                        )
                    )
111
                tag_names = [tag.name for tag in self.tags]
Pierre Dittgen's avatar
Pierre Dittgen committed
112
113
114
115
116
117
118
119
                self.branches = [
                    branch
                    for branch in schema_reference.iter_branches()
                    if branch.name not in tag_names
                ]
                self.doc_url = schema_reference.get_doc_url(
                    ref=self.ref
                ) or schema_reference.get_project_url(ref=self.ref)
120

121
            self.url = schema_reference.get_schema_url(ref=self.ref)
122
123

        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
124
            flash_error("Erreur dans la récupération des informations de schéma")
Pierre Dittgen's avatar
Pierre Dittgen committed
125
            abort(redirect(url_for("home")))
126

127
        try:
Pierre Dittgen's avatar
Pierre Dittgen committed
128
            self.schema = fetch_schema(self.url)
Pierre Dittgen's avatar
Pierre Dittgen committed
129
130
131
132
        except json.JSONDecodeError:
            err_msg = "Le format du schéma n'est pas reconnu"
            log.exception(err_msg)
            flash_error(err_msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
133
            abort(redirect(url_for("home")))
Pierre Dittgen's avatar
Pierre Dittgen committed
134
135
136
137
        except Exception:
            err_msg = "Impossible de récupérer le schéma"
            log.exception(err_msg)
            flash_error(err_msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
138
            abort(redirect(url_for("home")))
139
140

    def request_parameters(self):
Pierre Dittgen's avatar
Pierre Dittgen committed
141
        """Build request parameter dict to identify schema."""
Pierre Dittgen's avatar
Pierre Dittgen committed
142
143
144
145
146
147
148
149
        return (
            {
                "schema_name": self.schema_and_section_name,
                "schema_ref": "" if self.ref is None else self.ref,
            }
            if self.name
            else {"schema_url": self.url}
        )
150

151
    def find_section_title(self, section_name):
Pierre Dittgen's avatar
Pierre Dittgen committed
152
        """Return section title or None if not found."""
153
        if config.CONFIG:
Pierre Dittgen's avatar
Pierre Dittgen committed
154
155
156
            for section in config.CONFIG.homepage.sections:
                if section.name == section_name:
                    return section.title
Christophe Benz's avatar
Christophe Benz committed
157
158
        return None

159

160
161
162
def build_template_source_data(header, rows, preview_rows_nb=5):
    """Build source data information to preview in validation report page."""
    source_header_info = [(colname, False) for colname in header]
Pierre Dittgen's avatar
Pierre Dittgen committed
163

164
    rows_count = len(rows)
Pierre Dittgen's avatar
Pierre Dittgen committed
165
    preview_rows_count = min(preview_rows_nb, rows_count)
Pierre Dittgen's avatar
Pierre Dittgen committed
166
    return {
Pierre Dittgen's avatar
Pierre Dittgen committed
167
        "source_header_info": source_header_info,
168
        "header": header,
Pierre Dittgen's avatar
Pierre Dittgen committed
169
        "rows_nb": rows_count,
170
        "data_rows": rows,
Pierre Dittgen's avatar
Pierre Dittgen committed
171
        "preview_rows_count": preview_rows_count,
172
        "preview_rows": rows[:preview_rows_count],
Pierre Dittgen's avatar
Pierre Dittgen committed
173
    }
174
175


176
def build_ui_errors(errors):
Pierre Dittgen's avatar
Pierre Dittgen committed
177
    """Add context to errors, converts markdown content to HTML."""
178

Pierre Dittgen's avatar
Pierre Dittgen committed
179
    def improve_err(err):
Pierre Dittgen's avatar
Pierre Dittgen committed
180
        """Add context info based on row-nb presence and converts content to HTML."""
Pierre Dittgen's avatar
Pierre Dittgen committed
181
182
        # Context
        update_keys = {
Pierre Dittgen's avatar
Pierre Dittgen committed
183
            "context": "body"
184
            if "row-number" in err and err["row-number"] is not None
Pierre Dittgen's avatar
Pierre Dittgen committed
185
            else "table",
Pierre Dittgen's avatar
Pierre Dittgen committed
186
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
187

188
        # Set title
189
        if "title" not in err:
190
            update_keys["title"] = err["name"]
191

192
        # Set content
193
        content = "*content soon available*"
194
        if "message" in err:
195
            content = err["message"]
196
197
        elif "description" in err:
            content = err["description"]
198
        update_keys["content"] = commonmark(content)
199

Pierre Dittgen's avatar
Pierre Dittgen committed
200
        return {**err, **update_keys}
201

Pierre Dittgen's avatar
Pierre Dittgen committed
202
    return list(map(improve_err, errors))
203
204


Pierre Dittgen's avatar
Pierre Dittgen committed
205
def create_validata_ui_report(rows_count: int, validata_core_report, schema_dict):
Pierre Dittgen's avatar
Pierre Dittgen committed
206
207
208
    """Create an error report easier to handle and display using templates.

    improvements done:
209
210
211
212
213
214
    - only one table
    - errors are contextualized
    - error-counts is ok
    - errors are grouped by lines
    - errors are separated into "structure" and "body"
    - error messages are improved
215
    """
216
    v_report = copy.deepcopy(validata_core_report.to_dict())
217

218
    # Create a new UI report from information picked in validata report
Pierre Dittgen's avatar
Pierre Dittgen committed
219
    ui_report: Dict[str, Any] = {}
220
    ui_report["table"] = {}
221

222
    # source headers
223
    headers = v_report["tasks"][0]["resource"]["data"][0]
224
    ui_report["table"]["header"] = headers
Pierre Dittgen's avatar
Pierre Dittgen committed
225

226
    # source dimension
Pierre Dittgen's avatar
Pierre Dittgen committed
227
    ui_report["table"]["col_count"] = len(headers)
Pierre Dittgen's avatar
Pierre Dittgen committed
228
    ui_report["table"]["row_count"] = rows_count
229
230

    # Computes column info from schema
Pierre Dittgen's avatar
Pierre Dittgen committed
231
232
233
234
235
236
237
238
239
240
241
242
243
    fields_dict = {
        f["name"]: (f.get("title", f["name"]), f.get("description", ""))
        for f in schema_dict.get("fields", [])
    }
    ui_report["table"]["headers_title"] = [
        fields_dict[h][0] if h in fields_dict else "Colonne inconnue" for h in headers
    ]
    ui_report["table"]["headers_description"] = [
        fields_dict[h][1]
        if h in fields_dict
        else "Cette colonne n'est pas définie dans le schema"
        for h in headers
    ]
244
    v_report_table = v_report["tasks"][0]
Pierre Dittgen's avatar
Pierre Dittgen committed
245
246
    missing_headers = [
        err["message-data"]["column-name"]
247
        for err in v_report_table["errors"]
Pierre Dittgen's avatar
Pierre Dittgen committed
248
249
250
251
252
253
        if err["code"] == "missing-header"
    ]
    ui_report["table"]["cols_alert"] = [
        "table-danger" if h not in fields_dict or h in missing_headers else ""
        for h in headers
    ]
254

255
    # prepare error structure for UI needs
256
    errors = build_ui_errors(v_report_table["errors"])
257

Pierre Dittgen's avatar
Pierre Dittgen committed
258
    # Count errors and warnings
Pierre Dittgen's avatar
Pierre Dittgen committed
259
    ui_report["error_count"] = len(errors)
260
261
    ui_report["warn_count"] = len(v_report_table["structure_warnings"])
    ui_report["warnings"] = v_report_table["structure_warnings"]
262
263

    # Then group them in 2 groups : structure and body
Pierre Dittgen's avatar
Pierre Dittgen committed
264
    ui_report["table"]["errors"] = {"structure": [], "body": []}
265
    for err in errors:
266
        if is_structure_error(err):
Pierre Dittgen's avatar
Pierre Dittgen committed
267
            ui_report["table"]["errors"]["structure"].append(err)
268
        elif is_body_error(err):
Pierre Dittgen's avatar
Pierre Dittgen committed
269
            ui_report["table"]["errors"]["body"].append(err)
270

Pierre Dittgen's avatar
Pierre Dittgen committed
271
    # Group body errors by row id
Pierre Dittgen's avatar
Pierre Dittgen committed
272
    rows: List[Dict] = []
Pierre Dittgen's avatar
Pierre Dittgen committed
273
    current_row_id = 0
Pierre Dittgen's avatar
Pierre Dittgen committed
274
    for err in ui_report["table"]["errors"]["body"]:
275
        if "rowPosition" not in err:
276
            continue
Pierre Dittgen's avatar
Pierre Dittgen committed
277
        row_id = err["rowPosition"]
Pierre Dittgen's avatar
Pierre Dittgen committed
278
279
        if row_id != current_row_id:
            current_row_id = row_id
Pierre Dittgen's avatar
Pierre Dittgen committed
280
            rows.append({"row_id": current_row_id, "errors": {}})
Pierre Dittgen's avatar
Pierre Dittgen committed
281

Pierre Dittgen's avatar
Pierre Dittgen committed
282
        column_id = err.get("fieldPosition")
Pierre Dittgen's avatar
Pierre Dittgen committed
283
        if column_id is not None:
Pierre Dittgen's avatar
Pierre Dittgen committed
284
            rows[-1]["errors"][column_id] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
285
        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
286
287
            rows[-1]["errors"]["row"] = err
    ui_report["table"]["errors"]["body_by_rows"] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
288

289
    # Sort by error names in statistics
290
    ui_report["table"]["count-by-code"] = {}
Pierre Dittgen's avatar
Pierre Dittgen committed
291
    stats: Dict[str, Any] = {}
292
    total_errors_count = 0
Pierre Dittgen's avatar
Pierre Dittgen committed
293
    for key in ("structure", "body"):
294
295
        # convert dict into tuples with french title instead of error code
        # and sorts by title
296
        key_errors = ui_report["table"]["errors"][key]
297
        key_errors_count = len(key_errors)
298
        ct = Counter(ke["name"] for ke in key_errors)
299
300
301

        error_stats = {
            "count": key_errors_count,
Pierre Dittgen's avatar
Pierre Dittgen committed
302
            "count-by-code": sorted((k, v) for k, v in ct.items()),
303
304
305
306
        }
        total_errors_count += key_errors_count

        # Add error rows count
Pierre Dittgen's avatar
Pierre Dittgen committed
307
308
309
310
        if key == "body":
            error_rows = {
                err["rowPosition"] for err in key_errors if "rowPosition" in err
            }
311
312
313
314
315
            error_stats["rows-count"] = len(error_rows)

        stats[f"{key}-errors"] = error_stats

    stats["count"] = total_errors_count
316
    ui_report["table"]["error-stats"] = stats
317

318
    return ui_report
Pierre Dittgen's avatar
Pierre Dittgen committed
319
320


Pierre Dittgen's avatar
Pierre Dittgen committed
321
def compute_badge_message_and_color(badge):
Pierre Dittgen's avatar
Pierre Dittgen committed
322
    """Compute message and color from badge information."""
Pierre Dittgen's avatar
Pierre Dittgen committed
323
324
    structure = badge["structure"]
    body = badge.get("body")
Pierre Dittgen's avatar
Pierre Dittgen committed
325
326

    # Bad structure, stop here
Pierre Dittgen's avatar
Pierre Dittgen committed
327
328
    if structure == "KO":
        return ("structure invalide", "red")
Pierre Dittgen's avatar
Pierre Dittgen committed
329
330

    # No body error
Pierre Dittgen's avatar
Pierre Dittgen committed
331
332
    if body == "OK":
        return (
Pierre Dittgen's avatar
Pierre Dittgen committed
333
            ("partiellement valide", "yellowgreen")
Pierre Dittgen's avatar
Pierre Dittgen committed
334
335
336
            if structure == "WARN"
            else ("valide", "green")
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
337
338

    # else compute quality ratio percent
Pierre Dittgen's avatar
Pierre Dittgen committed
339
340
341
    p = (1 - badge["error-ratio"]) * 100.0
    msg = "cellules valides : {:.1f}%".format(p)
    return (msg, "red") if body == "KO" else (msg, "orange")
Pierre Dittgen's avatar
Pierre Dittgen committed
342
343
344


def get_badge_url_and_message(badge):
Pierre Dittgen's avatar
Pierre Dittgen committed
345
    """Get badge url from badge information."""
Pierre Dittgen's avatar
Pierre Dittgen committed
346
    msg, color = compute_badge_message_and_color(badge)
Christophe Benz's avatar
Christophe Benz committed
347
    badge_url = "{}?{}".format(
Pierre Dittgen's avatar
Pierre Dittgen committed
348
349
        urljoin(config.SHIELDS_IO_BASE_URL, "/static/v1.svg"),
        urlencode({"label": "Validata", "message": msg, "color": color}),
Christophe Benz's avatar
Christophe Benz committed
350
351
    )
    return (badge_url, msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
352
353


Pierre Dittgen's avatar
Pierre Dittgen committed
354
355
356
357
358
359
360
361
362
363
def iter_task_errors(validata_core_report, *, error_code: str) -> Iterable[dict]:
    if not validata_core_report["tasks"]:
        return
    yield from [
        err
        for err in validata_core_report["tasks"][0]["errors"]
        if err["code"] == error_code
    ]


364
def validate(schema_instance: SchemaInstance, validata_resource: ValidataResource):
Pierre Dittgen's avatar
Pierre Dittgen committed
365
    """Validate source and display report."""
Pierre Dittgen's avatar
Pierre Dittgen committed
366

367
368
369
370
371
372
373
374
    def compute_resource_info(resource: ValidataResource):
        source = resource.get_source()
        return {
            "type": "url" if source.startswith("http") else "file",
            "url": source,
            "filename": Path(source).name,
        }

Pierre Dittgen's avatar
Pierre Dittgen committed
375
    # Parse source data once
Pierre Dittgen's avatar
Pierre Dittgen committed
376
377
378
379
380
381
382
    try:
        header, rows = validata_resource.extract_tabular_data()
    except ValidataSourceError as err:
        flash_error(f"Erreur de lecture du fichier source : {err.message}")
        return redirect(
            compute_validation_form_url(schema_instance.request_parameters())
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
383
    rows_count = len(rows)
Pierre Dittgen's avatar
Pierre Dittgen committed
384
385

    # Call validata_core with parsed data
Pierre Dittgen's avatar
Pierre Dittgen committed
386
    validata_core_report = validata_core.validate(
387
        [header] + rows, schema_instance.schema
Pierre Dittgen's avatar
Pierre Dittgen committed
388
    )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
389

390
    # disable badge
Pierre Dittgen's avatar
Pierre Dittgen committed
391
    badge_config = config.BADGE_CONFIG
392

Pierre Dittgen's avatar
Pierre Dittgen committed
393
    # Computes badge from report and badge configuration
Pierre Dittgen's avatar
Pierre Dittgen committed
394
    badge_url, badge_msg = None, None
Pierre Dittgen's avatar
Pierre Dittgen committed
395
    display_badge = badge_config and config.SHIELDS_IO_BASE_URL
Pierre Dittgen's avatar
Pierre Dittgen committed
396
    if display_badge:
Pierre Dittgen's avatar
Pierre Dittgen committed
397
398
399
        badge_stats = validata_core.compute_badge_metrics(
            validata_core_report, badge_config
        )
400
401
402
        if badge_stats:
            badge_url, badge_msg = get_badge_url_and_message(badge_stats)

Pierre Dittgen's avatar
Pierre Dittgen committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
    check_errors = list(
        iter_task_errors(validata_core_report, error_code="check-error")
    )
    if check_errors:
        ui_error_msg = (
            'Erreur de schéma dans la section "custom_checks" : '
            f"{check_errors[0]['note']} Validation annulée."
        )
        flash_error(ui_error_msg)
        return redirect(
            compute_validation_form_url(schema_instance.request_parameters())
        )

    task_errors = list(iter_task_errors(validata_core_report, error_code="task-error"))
    if task_errors:
        log.error(task_errors[0]["message"])
        ui_error_msg = (
            "Une erreur est survenue durant la validation,"
            " consultez notre FAQ pour plus d'informations."
        )
423
        flash_error(ui_error_msg)
424
425
426
        return redirect(
            compute_validation_form_url(schema_instance.request_parameters())
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
427

428
    # Source error
429
430
    source_errors = [
        err
431
        for err in validata_core_report["tasks"][0]["errors"]
Pierre Dittgen's avatar
Pierre Dittgen committed
432
        if err["code"] in {"source-error", "unknown-csv-dialect"}
433
    ]
434
    if source_errors:
Pierre Dittgen's avatar
Pierre Dittgen committed
435
        source_error = source_errors[0]
Pierre Dittgen's avatar
Pierre Dittgen committed
436
437
        msg = (
            "l'encodage du fichier est invalide. Veuillez le corriger"
Pierre Dittgen's avatar
Pierre Dittgen committed
438
439
            if "charmap" in source_error["message"]
            else source_error["message"]
Pierre Dittgen's avatar
Pierre Dittgen committed
440
441
442
        )
        flash_error("Erreur de source : {}".format(msg))
        return redirect(url_for("custom_validator"))
443

Pierre Dittgen's avatar
Pierre Dittgen committed
444
    # handle report date
Pierre Dittgen's avatar
Pierre Dittgen committed
445
    report_datetime = datetime.fromisoformat(validata_core_report["date"]).astimezone()
446

447
    # create ui_report
Pierre Dittgen's avatar
Pierre Dittgen committed
448
449
450
    ui_report = create_validata_ui_report(
        rows_count, validata_core_report, schema_instance.schema
    )
Pierre Dittgen's avatar
Pierre Dittgen committed
451

Pierre Dittgen's avatar
Pierre Dittgen committed
452
    # Display report to the user
Pierre Dittgen's avatar
Pierre Dittgen committed
453
454
455
    validator_form_url = compute_validation_form_url(
        schema_instance.request_parameters()
    )
Christophe Benz's avatar
Christophe Benz committed
456
    schema_info = compute_schema_info(schema_instance.schema, schema_instance.url)
457
458

    # Build PDF report URL
459
460
461
    # PDF report is available if:
    # - a pdf_service has been configured
    # - tabular resource to validate is defined as an URL
462
    pdf_report_url = None
463
    if pdf_service and isinstance(validata_resource, URLValidataResource):
464
465
        base_url = url_for("pdf_report")
        query_string = urlencode(
466
467
468
469
            {
                **schema_instance.request_parameters(),
                "url": validata_resource.url,
            }
Pierre Dittgen's avatar
Pierre Dittgen committed
470
        )
471
        pdf_report_url = f"{base_url}?{query_string}"
Pierre Dittgen's avatar
Pierre Dittgen committed
472
473
474

    return render_template(
        "validation_report.html",
475
        config=config,
Pierre Dittgen's avatar
Pierre Dittgen committed
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
        badge_msg=badge_msg,
        badge_url=badge_url,
        breadcrumbs=[
            {"title": "Accueil", "url": url_for("home")},
            {"title": schema_instance.section_title},
            {"title": schema_info["title"], "url": validator_form_url},
            {"title": "Rapport de validation"},
        ],
        display_badge=display_badge,
        doc_url=schema_instance.doc_url,
        pdf_report_url=pdf_report_url,
        print_mode=request.args.get("print", "false") == "true",
        report=ui_report,
        schema_current_version=schema_instance.ref,
        schema_info=schema_info,
        section_title=schema_instance.section_title,
492
        source_data=build_template_source_data(header, rows),
493
        resource=compute_resource_info(validata_resource),
Pierre Dittgen's avatar
Pierre Dittgen committed
494
495
        validation_date=report_datetime.strftime("le %d/%m/%Y à %Hh%M"),
    )
496
497


498
def bytes_data(f):
Pierre Dittgen's avatar
Pierre Dittgen committed
499
    """Get bytes data from Werkzeug FileStorage instance."""
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
500
    iob = io.BytesIO()
501
502
503
504
505
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


Pierre Dittgen's avatar
Pierre Dittgen committed
506
def retrieve_schema_catalog(section: Section):
Pierre Dittgen's avatar
Pierre Dittgen committed
507
    """Retrieve schema catalog and return formatted error if it fails."""
508
509
510

    def format_error_message(err_message, exc):
        """Prepare a bootstrap error message with details if wanted."""
Pierre Dittgen's avatar
Pierre Dittgen committed
511
        exception_text = "\n".join([str(arg) for arg in exc.args])
512

Pierre Dittgen's avatar
Pierre Dittgen committed
513
        return f"""{err_msg}
514
        <div class="float-right">
Christophe Benz's avatar
Christophe Benz committed
515
516
            <button type="button" class="btn btn-info btn-xs" data-toggle="collapse"
                data-target="#exception_info">détails</button>
517
518
519
520
521
522
523
        </div>
        <div id="exception_info" class="collapse">
                <pre>{exception_text}</pre>
        </div>
"""

    try:
Pierre Dittgen's avatar
Pierre Dittgen committed
524
        schema_catalog = get_schema_catalog(section.name)
525
        return (schema_catalog, None)
Pierre Dittgen's avatar
Pierre Dittgen committed
526

527
528
529
530
531
532
533
534
    except Exception as exc:
        err_msg = "une erreur s'est produite"
        if isinstance(exc, requests.ConnectionError):
            err_msg = "problème de connexion"
        elif isinstance(exc, json.decoder.JSONDecodeError):
            err_msg = "format JSON incorrect"
        elif isinstance(exc, jsonschema.exceptions.ValidationError):
            err_msg = "le catalogue ne respecte pas le schéma de référence"
Pierre Dittgen's avatar
Pierre Dittgen committed
535
        log.exception(err_msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
536

537
        error_catalog = {
Pierre Dittgen's avatar
Pierre Dittgen committed
538
            **{k: v for k, v in section.dict().items() if k != "catalog"},
Pierre Dittgen's avatar
Pierre Dittgen committed
539
            "err": format_error_message(err_msg, exc),
540
541
542
543
        }
        return None, error_catalog


544
545
546
# Routes


Pierre Dittgen's avatar
Pierre Dittgen committed
547
@app.route("/")
548
def home():
Pierre Dittgen's avatar
Pierre Dittgen committed
549
    """Home page."""
550
551
552

    def iter_sections():
        """Yield sections of the home page, filled with schema metadata."""
Pierre Dittgen's avatar
Pierre Dittgen committed
553
        # Iterate on all sections
Pierre Dittgen's avatar
Pierre Dittgen committed
554
        for section in config.CONFIG.homepage.sections:
Pierre Dittgen's avatar
Pierre Dittgen committed
555

Pierre Dittgen's avatar
Pierre Dittgen committed
556
            # section with only links to external validators
Pierre Dittgen's avatar
Pierre Dittgen committed
557
            if section.links:
Pierre Dittgen's avatar
Pierre Dittgen committed
558
                yield section
559
                continue
560

Pierre Dittgen's avatar
Pierre Dittgen committed
561
            # section with catalog
Pierre Dittgen's avatar
Pierre Dittgen committed
562
            if section.catalog is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
563
564
565
                # skip section
                continue

566
567
568
569
570
            # retrieving schema catatalog
            schema_catalog, catalog_error = retrieve_schema_catalog(section)
            if schema_catalog is None:
                yield catalog_error
                continue
Pierre Dittgen's avatar
Pierre Dittgen committed
571
572
573
574

            # Working on catalog
            schema_info_list = []
            for schema_reference in schema_catalog.references:
575
576
577
                # retain tableschema only
                if schema_reference.get_schema_type() != "tableschema":
                    continue
Pierre Dittgen's avatar
Pierre Dittgen committed
578
                # Loads default table schema for each schema reference
Pierre Dittgen's avatar
Pierre Dittgen committed
579
                schema_info = {"name": schema_reference.name}
580
                try:
Pierre Dittgen's avatar
Pierre Dittgen committed
581
                    table_schema = fetch_schema(schema_reference.get_schema_url())
Pierre Dittgen's avatar
Pierre Dittgen committed
582
                except json.JSONDecodeError:
Pierre Dittgen's avatar
Pierre Dittgen committed
583
                    schema_info["err"] = True
Christophe Benz's avatar
Christophe Benz committed
584
585
586
587
                    schema_info["title"] = (
                        f"le format du schéma « {schema_info['name']} » "
                        "n'est pas reconnu"
                    )
588
                except Exception:
Pierre Dittgen's avatar
Pierre Dittgen committed
589
                    schema_info["err"] = True
Christophe Benz's avatar
Christophe Benz committed
590
591
592
                    schema_info["title"] = (
                        f"le schéma « {schema_info['name']} » " "n'est pas disponible"
                    )
593
                else:
Pierre Dittgen's avatar
Pierre Dittgen committed
594
595
596
                    schema_info["title"] = (
                        table_schema.get("title") or schema_info["name"]
                    )
Pierre Dittgen's avatar
Pierre Dittgen committed
597
598
                schema_info_list.append(schema_info)
            schema_info_list = sorted(
Pierre Dittgen's avatar
Pierre Dittgen committed
599
600
                schema_info_list, key=lambda sc: strip_accents(sc["title"].lower())
            )
Pierre Dittgen's avatar
Pierre Dittgen committed
601

Pierre Dittgen's avatar
Pierre Dittgen committed
602
            yield {
Pierre Dittgen's avatar
Pierre Dittgen committed
603
                **{k: v for k, v in section.dict().items() if k != "catalog"},
Pierre Dittgen's avatar
Pierre Dittgen committed
604
605
                "catalog": schema_info_list,
            }
606

607
    return render_template("home.html", config=config, sections=list(iter_sections()))
608
609


Pierre Dittgen's avatar
Pierre Dittgen committed
610
@app.route("/pdf")
Pierre Dittgen's avatar
Pierre Dittgen committed
611
def pdf_report():
Pierre Dittgen's avatar
Pierre Dittgen committed
612
    """PDF report generation."""
Pierre Dittgen's avatar
Pierre Dittgen committed
613
    err_prefix = "Erreur de génération du rapport PDF"
614

Pierre Dittgen's avatar
Pierre Dittgen committed
615
    url_param = request.args.get("url")
616
    if not url_param:
Pierre Dittgen's avatar
Pierre Dittgen committed
617
618
        flash_error(err_prefix + " : URL non fournie")
        return redirect(url_for("home"))
619

620
621
622
623
624
    if pdf_service is None:
        flash_error(err_prefix + " : service de génération non configuré")
        return redirect(url_for("home"))

    # Compute validation report URL
625
    schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
626

Pierre Dittgen's avatar
Pierre Dittgen committed
627
    base_url = url_for("custom_validator", _external=True)
628
    parameter_dict = {
Pierre Dittgen's avatar
Pierre Dittgen committed
629
630
631
632
        "input": "url",
        "print": "true",
        "url": url_param,
        **schema_instance.request_parameters(),
633
    }
Christophe Benz's avatar
Christophe Benz committed
634
    validation_url = "{}?{}".format(base_url, urlencode(parameter_dict))
Pierre Dittgen's avatar
Pierre Dittgen committed
635
    log.info("Validation URL = %s", validation_url)
636

637
638
639
    # Ask for PDF report generation
    try:
        pdf_bytes_content = pdf_service.render(validation_url)
Christophe Benz's avatar
Christophe Benz committed
640
    except Exception:
641
642
643
        log.exception(err_prefix)
        flash_error(err_prefix + " : contactez votre administrateur")
        return redirect(url_for("home"))
644

645
646
647
648
    # Compute pdf filename
    pdf_filename = "Rapport de validation {}.pdf".format(
        datetime.now().strftime("%d-%m-%Y %Hh%M")
    )
649

650
651
652
653
654
655
    # Prepare and send response
    response = make_response(pdf_bytes_content)
    response.headers.set("Content-Disposition", "attachment", filename=pdf_filename)
    response.headers.set("Content-Length", len(pdf_bytes_content))
    response.headers.set("Content-Type", "application/pdf")
    return response
656
657


Pierre Dittgen's avatar
Pierre Dittgen committed
658
def extract_schema_metadata(table_schema: frictionless.Schema):
Pierre Dittgen's avatar
Pierre Dittgen committed
659
    """Get author, contibutor, version...metadata from schema header."""
Pierre Dittgen's avatar
Pierre Dittgen committed
660
    return {k: v for k, v in table_schema.items() if k != "fields"}
Pierre Dittgen's avatar
Pierre Dittgen committed
661
662


Pierre Dittgen's avatar
Pierre Dittgen committed
663
def compute_schema_info(table_schema: frictionless.Schema, schema_url):
Pierre Dittgen's avatar
Pierre Dittgen committed
664
    """Factor code for validator form page."""
Pierre Dittgen's avatar
Pierre Dittgen committed
665
666
    # Schema URL + schema metadata info
    schema_info = {
Pierre Dittgen's avatar
Pierre Dittgen committed
667
        "path": schema_url,
Christophe Benz's avatar
Christophe Benz committed
668
669
        # a "path" metadata property can be found in Table Schema,
        # and we'd like it to override the `schema_url`
Christophe Benz's avatar
Christophe Benz committed
670
        # given by the user (in case schema was given by URL)
Pierre Dittgen's avatar
Pierre Dittgen committed
671
        **extract_schema_metadata(table_schema),
Pierre Dittgen's avatar
Pierre Dittgen committed
672
    }
Christophe Benz's avatar
Christophe Benz committed
673
    return schema_info
674
675


676
def compute_validation_form_url(request_parameters: dict):
Pierre Dittgen's avatar
Pierre Dittgen committed
677
    """Compute validation form url with schema URL parameter."""
Pierre Dittgen's avatar
Pierre Dittgen committed
678
    url = url_for("custom_validator")
Christophe Benz's avatar
Christophe Benz committed
679
    return "{}?{}".format(url, urlencode(request_parameters))
Pierre Dittgen's avatar
Pierre Dittgen committed
680
681


Pierre Dittgen's avatar
Pierre Dittgen committed
682
@app.route("/table-schema", methods=["GET", "POST"])
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
683
def custom_validator():
Pierre Dittgen's avatar
Pierre Dittgen committed
684
    """Display validator form."""
Pierre Dittgen's avatar
Pierre Dittgen committed
685
    if request.method == "GET":
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
686

687
688
        # input is a hidden form parameter to know
        # if this is the initial page display or if the validation has been asked for
Pierre Dittgen's avatar
Pierre Dittgen committed
689
        input_param = request.args.get("input")
690
691

        # url of resource to be validated
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
692
693
        url_param = request.args.get("url")

694
        schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
695

Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
696
697
        # First form display
        if input_param is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
698
699
700
701
702
            schema_info = compute_schema_info(
                schema_instance.schema, schema_instance.url
            )
            return render_template(
                "validation_form.html",
703
                config=config,
Pierre Dittgen's avatar
Pierre Dittgen committed
704
705
706
707
708
709
710
711
712
713
714
715
716
                branches=schema_instance.branches,
                breadcrumbs=[
                    {"url": url_for("home"), "title": "Accueil"},
                    {"title": schema_instance.section_title},
                    {"title": schema_info["title"]},
                ],
                doc_url=schema_instance.doc_url,
                schema_current_version=schema_instance.ref,
                schema_info=schema_info,
                schema_params=schema_instance.request_parameters(),
                section_title=schema_instance.section_title,
                tags=schema_instance.tags,
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
717
718
719

        # Process URL
        else:
720
721
722
723
            validation_form_url = compute_validation_form_url(
                schema_instance.request_parameters()
            )

724
            if not url_param:
Christophe Benz's avatar
Christophe Benz committed
725
                flash_error("Vous n'avez pas indiqué d'URL à valider")
726
727
728
729
730
731
                return redirect(validation_form_url)
            try:
                return validate(schema_instance, URLValidataResource(url_param))
            except frictionless.FrictionlessException as ex:
                flash_error(ex.error.message)
                return redirect(validation_form_url)
732

Pierre Dittgen's avatar
Pierre Dittgen committed
733
    elif request.method == "POST":
734

735
        schema_instance = SchemaInstance(request.form)
Pierre Dittgen's avatar
Pierre Dittgen committed
736

Pierre Dittgen's avatar
Pierre Dittgen committed
737
        input_param = request.form.get("input")
Pierre Dittgen's avatar
Pierre Dittgen committed
738
739
        if input_param is None:
            flash_error("Vous n'avez pas indiqué de fichier à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
740
741
742
            return redirect(
                compute_validation_form_url(schema_instance.request_parameters())
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
743
744

        # File validation
Pierre Dittgen's avatar
Pierre Dittgen committed
745
746
        if input_param == "file":
            f = request.files.get("file")
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
747
748
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
749
750
751
752
                return redirect(
                    compute_validation_form_url(schema_instance.request_parameters())
                )
            return validate(
753
                schema_instance, FileContentValidataResource(f.filename, bytes_data(f))
Pierre Dittgen's avatar
Pierre Dittgen committed
754
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
755

Pierre Dittgen's avatar
Pierre Dittgen committed
756
        return "Combinaison de paramètres non supportée", 400
757
758
759

    else:
        return "Method not allowed", 405