views.py 25.2 KB
Newer Older
Pierre Dittgen's avatar
Pierre Dittgen committed
1
"""Routes."""
2
import copy
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
3
import io
4
import json
Christophe Benz's avatar
Christophe Benz committed
5
import logging
6
from collections import Counter
7
from datetime import datetime
8
from pathlib import Path
Pierre Dittgen's avatar
Pierre Dittgen committed
9
from typing import Any, Dict, List
Christophe Benz's avatar
Christophe Benz committed
10
from urllib.parse import urlencode, urljoin
11

Pierre Dittgen's avatar
Pierre Dittgen committed
12
import frictionless
13
import jsonschema
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
14
import requests
Christophe Benz's avatar
Christophe Benz committed
15
import validata_core
16
from commonmark import commonmark
Pierre Dittgen's avatar
Pierre Dittgen committed
17
from flask import abort, make_response, redirect, render_template, request, url_for
18
from opendataschema import GitSchemaReference, by_commit_date
Pierre Dittgen's avatar
Pierre Dittgen committed
19
20
21
22
from validata_core.helpers import (
    FileContentValidataResource,
    URLValidataResource,
    ValidataResource,
Pierre Dittgen's avatar
Pierre Dittgen committed
23
    ValidataSourceError,
Pierre Dittgen's avatar
Pierre Dittgen committed
24
25
26
    is_body_error,
    is_structure_error,
)
27

Christophe Benz's avatar
Christophe Benz committed
28
from . import app, config, fetch_schema, pdf_service, schema_catalog_registry
Pierre Dittgen's avatar
Pierre Dittgen committed
29
from .model import Section
30
from .ui_util import flash_error, flash_warning
31
32
from .validata_util import strip_accents

Christophe Benz's avatar
Christophe Benz committed
33
34
log = logging.getLogger(__name__)

35

36
def get_schema_catalog(section_name):
Pierre Dittgen's avatar
Pierre Dittgen committed
37
    """Return a schema catalog associated to a section_name."""
38
39
40
    return schema_catalog_registry.build_schema_catalog(section_name)


41
class SchemaInstance:
Pierre Dittgen's avatar
Pierre Dittgen committed
42
    """Handy class to handle schema information."""
43

44
    def __init__(self, parameter_dict):
Pierre Dittgen's avatar
Pierre Dittgen committed
45
        """Initialize schema instance and tableschema catalog."""
46
47
48
49
50
51
52
53
54
        self.section_name = None
        self.section_title = None
        self.name = None
        self.url = None
        self.ref = None
        self.reference = None
        self.doc_url = None
        self.branches = None
        self.tags = None
55
56

        # From schema_url
Christophe Benz's avatar
Christophe Benz committed
57
        if parameter_dict.get("schema_url"):
58
59
            self.url = parameter_dict["schema_url"]
            self.section_title = "Autre schéma"
60
61

        # from schema_name (and schema_ref)
Pierre Dittgen's avatar
Pierre Dittgen committed
62
63
64
        elif parameter_dict.get("schema_name"):
            self.schema_and_section_name = parameter_dict["schema_name"]
            self.ref = parameter_dict.get("schema_ref")
65

Pierre Dittgen's avatar
Pierre Dittgen committed
66
            # Check schema name
Pierre Dittgen's avatar
Pierre Dittgen committed
67
            chunks = self.schema_and_section_name.split(".")
Pierre Dittgen's avatar
Pierre Dittgen committed
68
            if len(chunks) != 2:
69
                abort(400, "Paramètre 'schema_name' invalide")
Pierre Dittgen's avatar
Pierre Dittgen committed
70

71
72
            self.section_name, self.name = chunks
            self.section_title = self.find_section_title(self.section_name)
Pierre Dittgen's avatar
Pierre Dittgen committed
73
74

            # Look for schema catalog first
75
76
            try:
                table_schema_catalog = get_schema_catalog(self.section_name)
Pierre Dittgen's avatar
Pierre Dittgen committed
77
78
            except Exception:
                log.exception("")
79
                abort(400, "Erreur de traitement du catalogue")
Pierre Dittgen's avatar
Pierre Dittgen committed
80
            if table_schema_catalog is None:
81
                abort(400, "Catalogue indisponible")
82
83
84

            schema_reference = table_schema_catalog.reference_by_name.get(self.name)
            if schema_reference is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
85
86
                abort(
                    400,
Christophe Benz's avatar
Christophe Benz committed
87
88
                    f"Schéma {self.name!r} non trouvé dans le catalogue de la "
                    f"section {self.section_name!r}",
Pierre Dittgen's avatar
Pierre Dittgen committed
89
                )
90
91

            if isinstance(schema_reference, GitSchemaReference):
Pierre Dittgen's avatar
Pierre Dittgen committed
92
93
94
                self.tags = sorted(
                    schema_reference.iter_tags(), key=by_commit_date, reverse=True
                )
95
                if self.ref is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
                    schema_ref = (
                        self.tags[0]
                        if self.tags
                        else schema_reference.get_default_branch()
                    )
                    abort(
                        redirect(
                            compute_validation_form_url(
                                {
                                    "schema_name": self.schema_and_section_name,
                                    "schema_ref": schema_ref.name,
                                }
                            )
                        )
                    )
111
                tag_names = [tag.name for tag in self.tags]
Pierre Dittgen's avatar
Pierre Dittgen committed
112
113
114
115
116
117
118
119
                self.branches = [
                    branch
                    for branch in schema_reference.iter_branches()
                    if branch.name not in tag_names
                ]
                self.doc_url = schema_reference.get_doc_url(
                    ref=self.ref
                ) or schema_reference.get_project_url(ref=self.ref)
120

121
            self.url = schema_reference.get_schema_url(ref=self.ref)
122
123

        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
124
            flash_error("Erreur dans la récupération des informations de schéma")
Pierre Dittgen's avatar
Pierre Dittgen committed
125
            abort(redirect(url_for("home")))
126

127
        try:
Pierre Dittgen's avatar
Pierre Dittgen committed
128
            self.schema = fetch_schema(self.url)
Pierre Dittgen's avatar
Pierre Dittgen committed
129
130
131
132
        except json.JSONDecodeError:
            err_msg = "Le format du schéma n'est pas reconnu"
            log.exception(err_msg)
            flash_error(err_msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
133
            abort(redirect(url_for("home")))
Pierre Dittgen's avatar
Pierre Dittgen committed
134
135
136
137
        except Exception:
            err_msg = "Impossible de récupérer le schéma"
            log.exception(err_msg)
            flash_error(err_msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
138
            abort(redirect(url_for("home")))
139
140

    def request_parameters(self):
Pierre Dittgen's avatar
Pierre Dittgen committed
141
        """Build request parameter dict to identify schema."""
Pierre Dittgen's avatar
Pierre Dittgen committed
142
143
144
145
146
147
148
149
        return (
            {
                "schema_name": self.schema_and_section_name,
                "schema_ref": "" if self.ref is None else self.ref,
            }
            if self.name
            else {"schema_url": self.url}
        )
150

151
    def find_section_title(self, section_name):
Pierre Dittgen's avatar
Pierre Dittgen committed
152
        """Return section title or None if not found."""
153
        if config.CONFIG:
Pierre Dittgen's avatar
Pierre Dittgen committed
154
155
156
            for section in config.CONFIG.homepage.sections:
                if section.name == section_name:
                    return section.title
Christophe Benz's avatar
Christophe Benz committed
157
158
        return None

159

160
161
162
def build_template_source_data(header, rows, preview_rows_nb=5):
    """Build source data information to preview in validation report page."""
    source_header_info = [(colname, False) for colname in header]
Pierre Dittgen's avatar
Pierre Dittgen committed
163

164
    rows_count = len(rows)
Pierre Dittgen's avatar
Pierre Dittgen committed
165
    preview_rows_count = min(preview_rows_nb, rows_count)
Pierre Dittgen's avatar
Pierre Dittgen committed
166
    return {
Pierre Dittgen's avatar
Pierre Dittgen committed
167
        "source_header_info": source_header_info,
168
        "header": header,
Pierre Dittgen's avatar
Pierre Dittgen committed
169
        "rows_nb": rows_count,
170
        "data_rows": rows,
Pierre Dittgen's avatar
Pierre Dittgen committed
171
        "preview_rows_count": preview_rows_count,
172
        "preview_rows": rows[:preview_rows_count],
Pierre Dittgen's avatar
Pierre Dittgen committed
173
    }
174
175


176
def build_ui_errors(errors):
Pierre Dittgen's avatar
Pierre Dittgen committed
177
    """Add context to errors, converts markdown content to HTML."""
178

Pierre Dittgen's avatar
Pierre Dittgen committed
179
    def improve_err(err):
Pierre Dittgen's avatar
Pierre Dittgen committed
180
        """Add context info based on row-nb presence and converts content to HTML."""
Pierre Dittgen's avatar
Pierre Dittgen committed
181
182
        # Context
        update_keys = {
Pierre Dittgen's avatar
Pierre Dittgen committed
183
            "context": "body"
184
            if "row-number" in err and err["row-number"] is not None
Pierre Dittgen's avatar
Pierre Dittgen committed
185
            else "table",
Pierre Dittgen's avatar
Pierre Dittgen committed
186
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
187

188
        # Set title
189
        if "title" not in err:
190
            update_keys["title"] = err["name"]
191

192
        # Set content
193
        content = "*content soon available*"
194
        if "message" in err:
195
            content = err["message"]
196
197
        elif "description" in err:
            content = err["description"]
198
        update_keys["content"] = commonmark(content)
199

Pierre Dittgen's avatar
Pierre Dittgen committed
200
        return {**err, **update_keys}
201

Pierre Dittgen's avatar
Pierre Dittgen committed
202
    return list(map(improve_err, errors))
203
204


Pierre Dittgen's avatar
Pierre Dittgen committed
205
def create_validata_ui_report(rows_count: int, validata_core_report, schema_dict):
Pierre Dittgen's avatar
Pierre Dittgen committed
206
207
208
    """Create an error report easier to handle and display using templates.

    improvements done:
209
210
211
212
213
214
    - only one table
    - errors are contextualized
    - error-counts is ok
    - errors are grouped by lines
    - errors are separated into "structure" and "body"
    - error messages are improved
215
    """
216
    v_report = copy.deepcopy(validata_core_report.to_dict())
217

218
    # Create a new UI report from information picked in validata report
Pierre Dittgen's avatar
Pierre Dittgen committed
219
    ui_report: Dict[str, Any] = {}
220
    ui_report["table"] = {}
221

222
    # source headers
223
    headers = v_report["tasks"][0]["resource"]["data"][0]
224
    ui_report["table"]["header"] = headers
Pierre Dittgen's avatar
Pierre Dittgen committed
225

226
    # source dimension
Pierre Dittgen's avatar
Pierre Dittgen committed
227
    ui_report["table"]["col_count"] = len(headers)
Pierre Dittgen's avatar
Pierre Dittgen committed
228
    ui_report["table"]["row_count"] = rows_count
229
230

    # Computes column info from schema
Pierre Dittgen's avatar
Pierre Dittgen committed
231
232
233
234
235
236
237
238
239
240
241
242
243
    fields_dict = {
        f["name"]: (f.get("title", f["name"]), f.get("description", ""))
        for f in schema_dict.get("fields", [])
    }
    ui_report["table"]["headers_title"] = [
        fields_dict[h][0] if h in fields_dict else "Colonne inconnue" for h in headers
    ]
    ui_report["table"]["headers_description"] = [
        fields_dict[h][1]
        if h in fields_dict
        else "Cette colonne n'est pas définie dans le schema"
        for h in headers
    ]
244
    v_report_table = v_report["tasks"][0]
Pierre Dittgen's avatar
Pierre Dittgen committed
245
246
    missing_headers = [
        err["message-data"]["column-name"]
247
        for err in v_report_table["errors"]
Pierre Dittgen's avatar
Pierre Dittgen committed
248
249
250
251
252
253
        if err["code"] == "missing-header"
    ]
    ui_report["table"]["cols_alert"] = [
        "table-danger" if h not in fields_dict or h in missing_headers else ""
        for h in headers
    ]
254

255
    # prepare error structure for UI needs
256
    errors = build_ui_errors(v_report_table["errors"])
257

Pierre Dittgen's avatar
Pierre Dittgen committed
258
    # Count errors and warnings
Pierre Dittgen's avatar
Pierre Dittgen committed
259
    ui_report["error_count"] = len(errors)
260
261
    ui_report["warn_count"] = len(v_report_table["structure_warnings"])
    ui_report["warnings"] = v_report_table["structure_warnings"]
262
263

    # Then group them in 2 groups : structure and body
Pierre Dittgen's avatar
Pierre Dittgen committed
264
    ui_report["table"]["errors"] = {"structure": [], "body": []}
265
    for err in errors:
266
        if is_structure_error(err):
Pierre Dittgen's avatar
Pierre Dittgen committed
267
            ui_report["table"]["errors"]["structure"].append(err)
268
        elif is_body_error(err):
Pierre Dittgen's avatar
Pierre Dittgen committed
269
            ui_report["table"]["errors"]["body"].append(err)
270

Pierre Dittgen's avatar
Pierre Dittgen committed
271
    # Group body errors by row id
Pierre Dittgen's avatar
Pierre Dittgen committed
272
    rows: List[Dict] = []
Pierre Dittgen's avatar
Pierre Dittgen committed
273
    current_row_id = 0
Pierre Dittgen's avatar
Pierre Dittgen committed
274
    for err in ui_report["table"]["errors"]["body"]:
275
        if "rowPosition" not in err:
276
            continue
Pierre Dittgen's avatar
Pierre Dittgen committed
277
        row_id = err["rowPosition"]
Pierre Dittgen's avatar
Pierre Dittgen committed
278
279
        if row_id != current_row_id:
            current_row_id = row_id
Pierre Dittgen's avatar
Pierre Dittgen committed
280
            rows.append({"row_id": current_row_id, "errors": {}})
Pierre Dittgen's avatar
Pierre Dittgen committed
281

Pierre Dittgen's avatar
Pierre Dittgen committed
282
        column_id = err.get("fieldPosition")
Pierre Dittgen's avatar
Pierre Dittgen committed
283
        if column_id is not None:
Pierre Dittgen's avatar
Pierre Dittgen committed
284
            rows[-1]["errors"][column_id] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
285
        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
286
287
            rows[-1]["errors"]["row"] = err
    ui_report["table"]["errors"]["body_by_rows"] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
288

289
    # Sort by error names in statistics
290
    ui_report["table"]["count-by-code"] = {}
Pierre Dittgen's avatar
Pierre Dittgen committed
291
    stats: Dict[str, Any] = {}
292
    total_errors_count = 0
Pierre Dittgen's avatar
Pierre Dittgen committed
293
    for key in ("structure", "body"):
294
295
        # convert dict into tuples with french title instead of error code
        # and sorts by title
296
        key_errors = ui_report["table"]["errors"][key]
297
        key_errors_count = len(key_errors)
298
        ct = Counter(ke["name"] for ke in key_errors)
299
300
301

        error_stats = {
            "count": key_errors_count,
Pierre Dittgen's avatar
Pierre Dittgen committed
302
            "count-by-code": sorted((k, v) for k, v in ct.items()),
303
304
305
306
        }
        total_errors_count += key_errors_count

        # Add error rows count
Pierre Dittgen's avatar
Pierre Dittgen committed
307
308
309
310
        if key == "body":
            error_rows = {
                err["rowPosition"] for err in key_errors if "rowPosition" in err
            }
311
312
313
314
315
            error_stats["rows-count"] = len(error_rows)

        stats[f"{key}-errors"] = error_stats

    stats["count"] = total_errors_count
316
    ui_report["table"]["error-stats"] = stats
317

318
    return ui_report
Pierre Dittgen's avatar
Pierre Dittgen committed
319
320


Pierre Dittgen's avatar
Pierre Dittgen committed
321
def compute_badge_message_and_color(badge):
Pierre Dittgen's avatar
Pierre Dittgen committed
322
    """Compute message and color from badge information."""
Pierre Dittgen's avatar
Pierre Dittgen committed
323
324
    structure = badge["structure"]
    body = badge.get("body")
Pierre Dittgen's avatar
Pierre Dittgen committed
325
326

    # Bad structure, stop here
Pierre Dittgen's avatar
Pierre Dittgen committed
327
328
    if structure == "KO":
        return ("structure invalide", "red")
Pierre Dittgen's avatar
Pierre Dittgen committed
329
330

    # No body error
Pierre Dittgen's avatar
Pierre Dittgen committed
331
332
    if body == "OK":
        return (
Pierre Dittgen's avatar
Pierre Dittgen committed
333
            ("partiellement valide", "yellowgreen")
Pierre Dittgen's avatar
Pierre Dittgen committed
334
335
336
            if structure == "WARN"
            else ("valide", "green")
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
337
338

    # else compute quality ratio percent
Pierre Dittgen's avatar
Pierre Dittgen committed
339
340
341
    p = (1 - badge["error-ratio"]) * 100.0
    msg = "cellules valides : {:.1f}%".format(p)
    return (msg, "red") if body == "KO" else (msg, "orange")
Pierre Dittgen's avatar
Pierre Dittgen committed
342
343
344


def get_badge_url_and_message(badge):
Pierre Dittgen's avatar
Pierre Dittgen committed
345
    """Get badge url from badge information."""
Pierre Dittgen's avatar
Pierre Dittgen committed
346
    msg, color = compute_badge_message_and_color(badge)
Christophe Benz's avatar
Christophe Benz committed
347
    badge_url = "{}?{}".format(
Pierre Dittgen's avatar
Pierre Dittgen committed
348
349
        urljoin(config.SHIELDS_IO_BASE_URL, "/static/v1.svg"),
        urlencode({"label": "Validata", "message": msg, "color": color}),
Christophe Benz's avatar
Christophe Benz committed
350
351
    )
    return (badge_url, msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
352
353


354
def validate(schema_instance: SchemaInstance, validata_resource: ValidataResource):
Pierre Dittgen's avatar
Pierre Dittgen committed
355
    """Validate source and display report."""
Pierre Dittgen's avatar
Pierre Dittgen committed
356

357
358
359
360
361
362
363
364
    def compute_resource_info(resource: ValidataResource):
        source = resource.get_source()
        return {
            "type": "url" if source.startswith("http") else "file",
            "url": source,
            "filename": Path(source).name,
        }

Pierre Dittgen's avatar
Pierre Dittgen committed
365
    # Parse source data once
Pierre Dittgen's avatar
Pierre Dittgen committed
366
367
368
369
370
371
372
    try:
        header, rows = validata_resource.extract_tabular_data()
    except ValidataSourceError as err:
        flash_error(f"Erreur de lecture du fichier source : {err.message}")
        return redirect(
            compute_validation_form_url(schema_instance.request_parameters())
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
373
    rows_count = len(rows)
Pierre Dittgen's avatar
Pierre Dittgen committed
374
375

    # Call validata_core with parsed data
Pierre Dittgen's avatar
Pierre Dittgen committed
376
    validata_core_report = validata_core.validate(
377
        [header] + rows, schema_instance.schema
Pierre Dittgen's avatar
Pierre Dittgen committed
378
    )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
379

380
    # disable badge
Pierre Dittgen's avatar
Pierre Dittgen committed
381
    badge_config = config.BADGE_CONFIG
382

Pierre Dittgen's avatar
Pierre Dittgen committed
383
    # Computes badge from report and badge configuration
Pierre Dittgen's avatar
Pierre Dittgen committed
384
    badge_url, badge_msg = None, None
Pierre Dittgen's avatar
Pierre Dittgen committed
385
    display_badge = badge_config and config.SHIELDS_IO_BASE_URL
Pierre Dittgen's avatar
Pierre Dittgen committed
386
    if display_badge:
Pierre Dittgen's avatar
Pierre Dittgen committed
387
388
389
        badge_stats = validata_core.compute_badge_metrics(
            validata_core_report, badge_config
        )
390
391
392
393
394
395
        if badge_stats:
            badge_url, badge_msg = get_badge_url_and_message(badge_stats)

    # Non table errors
    if validata_core_report["errors"]:
        non_table_error = validata_core_report["errors"][0]
396
397
398
399
400
401
402
403
404
        msg = non_table_error.message
        log.error(msg)
        ui_error_msg = f"Une erreur est survenue durant la validation : {msg}"
        if msg.startswith("The validation task has an error"):
            ui_error_msg = (
                "La validation ne peut s'effectuer sur ce fichier, "
                "celui-ci n'est pas compatible avec le schéma."
            )
        flash_error(ui_error_msg)
405
406
407
        return redirect(
            compute_validation_form_url(schema_instance.request_parameters())
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
408

409
    # Source error
410
411
    source_errors = [
        err
412
        for err in validata_core_report["tasks"][0]["errors"]
Pierre Dittgen's avatar
Pierre Dittgen committed
413
        if err["code"] in {"source-error", "unknown-csv-dialect"}
414
    ]
415
    if source_errors:
Pierre Dittgen's avatar
Pierre Dittgen committed
416
        source_error = source_errors[0]
Pierre Dittgen's avatar
Pierre Dittgen committed
417
418
        msg = (
            "l'encodage du fichier est invalide. Veuillez le corriger"
Pierre Dittgen's avatar
Pierre Dittgen committed
419
420
            if "charmap" in source_error["message"]
            else source_error["message"]
Pierre Dittgen's avatar
Pierre Dittgen committed
421
422
423
        )
        flash_error("Erreur de source : {}".format(msg))
        return redirect(url_for("custom_validator"))
424

Pierre Dittgen's avatar
Pierre Dittgen committed
425
    # handle report date
Pierre Dittgen's avatar
Pierre Dittgen committed
426
    report_datetime = datetime.fromisoformat(validata_core_report["date"]).astimezone()
427

428
    # create ui_report
Pierre Dittgen's avatar
Pierre Dittgen committed
429
430
431
    ui_report = create_validata_ui_report(
        rows_count, validata_core_report, schema_instance.schema
    )
Pierre Dittgen's avatar
Pierre Dittgen committed
432

Pierre Dittgen's avatar
Pierre Dittgen committed
433
    # Display report to the user
Pierre Dittgen's avatar
Pierre Dittgen committed
434
435
436
    validator_form_url = compute_validation_form_url(
        schema_instance.request_parameters()
    )
Christophe Benz's avatar
Christophe Benz committed
437
    schema_info = compute_schema_info(schema_instance.schema, schema_instance.url)
438
439

    # Build PDF report URL
440
441
442
    # PDF report is available if:
    # - a pdf_service has been configured
    # - tabular resource to validate is defined as an URL
443
    pdf_report_url = None
444
    if pdf_service and isinstance(validata_resource, URLValidataResource):
445
446
        base_url = url_for("pdf_report")
        query_string = urlencode(
447
448
449
450
            {
                **schema_instance.request_parameters(),
                "url": validata_resource.url,
            }
Pierre Dittgen's avatar
Pierre Dittgen committed
451
        )
452
        pdf_report_url = f"{base_url}?{query_string}"
Pierre Dittgen's avatar
Pierre Dittgen committed
453
454
455

    return render_template(
        "validation_report.html",
456
        config=config,
Pierre Dittgen's avatar
Pierre Dittgen committed
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
        badge_msg=badge_msg,
        badge_url=badge_url,
        breadcrumbs=[
            {"title": "Accueil", "url": url_for("home")},
            {"title": schema_instance.section_title},
            {"title": schema_info["title"], "url": validator_form_url},
            {"title": "Rapport de validation"},
        ],
        display_badge=display_badge,
        doc_url=schema_instance.doc_url,
        pdf_report_url=pdf_report_url,
        print_mode=request.args.get("print", "false") == "true",
        report=ui_report,
        schema_current_version=schema_instance.ref,
        schema_info=schema_info,
        section_title=schema_instance.section_title,
473
        source_data=build_template_source_data(header, rows),
474
        resource=compute_resource_info(validata_resource),
Pierre Dittgen's avatar
Pierre Dittgen committed
475
476
        validation_date=report_datetime.strftime("le %d/%m/%Y à %Hh%M"),
    )
477
478


479
def bytes_data(f):
Pierre Dittgen's avatar
Pierre Dittgen committed
480
    """Get bytes data from Werkzeug FileStorage instance."""
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
481
    iob = io.BytesIO()
482
483
484
485
486
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


Pierre Dittgen's avatar
Pierre Dittgen committed
487
def retrieve_schema_catalog(section: Section):
Pierre Dittgen's avatar
Pierre Dittgen committed
488
    """Retrieve schema catalog and return formatted error if it fails."""
489
490
491

    def format_error_message(err_message, exc):
        """Prepare a bootstrap error message with details if wanted."""
Pierre Dittgen's avatar
Pierre Dittgen committed
492
        exception_text = "\n".join([str(arg) for arg in exc.args])
493

Pierre Dittgen's avatar
Pierre Dittgen committed
494
        return f"""{err_msg}
495
        <div class="float-right">
Christophe Benz's avatar
Christophe Benz committed
496
497
            <button type="button" class="btn btn-info btn-xs" data-toggle="collapse"
                data-target="#exception_info">détails</button>
498
499
500
501
502
503
504
        </div>
        <div id="exception_info" class="collapse">
                <pre>{exception_text}</pre>
        </div>
"""

    try:
Pierre Dittgen's avatar
Pierre Dittgen committed
505
        schema_catalog = get_schema_catalog(section.name)
506
        return (schema_catalog, None)
Pierre Dittgen's avatar
Pierre Dittgen committed
507

508
509
510
511
512
513
514
515
    except Exception as exc:
        err_msg = "une erreur s'est produite"
        if isinstance(exc, requests.ConnectionError):
            err_msg = "problème de connexion"
        elif isinstance(exc, json.decoder.JSONDecodeError):
            err_msg = "format JSON incorrect"
        elif isinstance(exc, jsonschema.exceptions.ValidationError):
            err_msg = "le catalogue ne respecte pas le schéma de référence"
Pierre Dittgen's avatar
Pierre Dittgen committed
516
        log.exception(err_msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
517

518
        error_catalog = {
Pierre Dittgen's avatar
Pierre Dittgen committed
519
            **{k: v for k, v in section.dict().items() if k != "catalog"},
Pierre Dittgen's avatar
Pierre Dittgen committed
520
            "err": format_error_message(err_msg, exc),
521
522
523
524
        }
        return None, error_catalog


525
526
527
# Routes


Pierre Dittgen's avatar
Pierre Dittgen committed
528
@app.route("/")
529
def home():
Pierre Dittgen's avatar
Pierre Dittgen committed
530
    """Home page."""
531
532
533

    def iter_sections():
        """Yield sections of the home page, filled with schema metadata."""
Pierre Dittgen's avatar
Pierre Dittgen committed
534
        # Iterate on all sections
Pierre Dittgen's avatar
Pierre Dittgen committed
535
        for section in config.CONFIG.homepage.sections:
Pierre Dittgen's avatar
Pierre Dittgen committed
536

Pierre Dittgen's avatar
Pierre Dittgen committed
537
            # section with only links to external validators
Pierre Dittgen's avatar
Pierre Dittgen committed
538
            if section.links:
Pierre Dittgen's avatar
Pierre Dittgen committed
539
                yield section
540
                continue
541

Pierre Dittgen's avatar
Pierre Dittgen committed
542
            # section with catalog
Pierre Dittgen's avatar
Pierre Dittgen committed
543
            if section.catalog is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
544
545
546
                # skip section
                continue

547
548
549
550
551
            # retrieving schema catatalog
            schema_catalog, catalog_error = retrieve_schema_catalog(section)
            if schema_catalog is None:
                yield catalog_error
                continue
Pierre Dittgen's avatar
Pierre Dittgen committed
552
553
554
555

            # Working on catalog
            schema_info_list = []
            for schema_reference in schema_catalog.references:
556
557
558
                # retain tableschema only
                if schema_reference.get_schema_type() != "tableschema":
                    continue
Pierre Dittgen's avatar
Pierre Dittgen committed
559
                # Loads default table schema for each schema reference
Pierre Dittgen's avatar
Pierre Dittgen committed
560
                schema_info = {"name": schema_reference.name}
561
                try:
Pierre Dittgen's avatar
Pierre Dittgen committed
562
                    table_schema = fetch_schema(schema_reference.get_schema_url())
Pierre Dittgen's avatar
Pierre Dittgen committed
563
                except json.JSONDecodeError:
Pierre Dittgen's avatar
Pierre Dittgen committed
564
                    schema_info["err"] = True
Christophe Benz's avatar
Christophe Benz committed
565
566
567
568
                    schema_info["title"] = (
                        f"le format du schéma « {schema_info['name']} » "
                        "n'est pas reconnu"
                    )
569
                except Exception:
Pierre Dittgen's avatar
Pierre Dittgen committed
570
                    schema_info["err"] = True
Christophe Benz's avatar
Christophe Benz committed
571
572
573
                    schema_info["title"] = (
                        f"le schéma « {schema_info['name']} » " "n'est pas disponible"
                    )
574
                else:
Pierre Dittgen's avatar
Pierre Dittgen committed
575
576
577
                    schema_info["title"] = (
                        table_schema.get("title") or schema_info["name"]
                    )
Pierre Dittgen's avatar
Pierre Dittgen committed
578
579
                schema_info_list.append(schema_info)
            schema_info_list = sorted(
Pierre Dittgen's avatar
Pierre Dittgen committed
580
581
                schema_info_list, key=lambda sc: strip_accents(sc["title"].lower())
            )
Pierre Dittgen's avatar
Pierre Dittgen committed
582

Pierre Dittgen's avatar
Pierre Dittgen committed
583
            yield {
Pierre Dittgen's avatar
Pierre Dittgen committed
584
                **{k: v for k, v in section.dict().items() if k != "catalog"},
Pierre Dittgen's avatar
Pierre Dittgen committed
585
586
                "catalog": schema_info_list,
            }
587

588
    return render_template("home.html", config=config, sections=list(iter_sections()))
589
590


Pierre Dittgen's avatar
Pierre Dittgen committed
591
@app.route("/pdf")
Pierre Dittgen's avatar
Pierre Dittgen committed
592
def pdf_report():
Pierre Dittgen's avatar
Pierre Dittgen committed
593
    """PDF report generation."""
Pierre Dittgen's avatar
Pierre Dittgen committed
594
    err_prefix = "Erreur de génération du rapport PDF"
595

Pierre Dittgen's avatar
Pierre Dittgen committed
596
    url_param = request.args.get("url")
597
    if not url_param:
Pierre Dittgen's avatar
Pierre Dittgen committed
598
599
        flash_error(err_prefix + " : URL non fournie")
        return redirect(url_for("home"))
600

601
602
603
604
605
    if pdf_service is None:
        flash_error(err_prefix + " : service de génération non configuré")
        return redirect(url_for("home"))

    # Compute validation report URL
606
    schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
607

Pierre Dittgen's avatar
Pierre Dittgen committed
608
    base_url = url_for("custom_validator", _external=True)
609
    parameter_dict = {
Pierre Dittgen's avatar
Pierre Dittgen committed
610
611
612
613
        "input": "url",
        "print": "true",
        "url": url_param,
        **schema_instance.request_parameters(),
614
    }
Christophe Benz's avatar
Christophe Benz committed
615
    validation_url = "{}?{}".format(base_url, urlencode(parameter_dict))
Pierre Dittgen's avatar
Pierre Dittgen committed
616
    log.info("Validation URL = %s", validation_url)
617

618
619
620
    # Ask for PDF report generation
    try:
        pdf_bytes_content = pdf_service.render(validation_url)
Christophe Benz's avatar
Christophe Benz committed
621
    except Exception:
622
623
624
        log.exception(err_prefix)
        flash_error(err_prefix + " : contactez votre administrateur")
        return redirect(url_for("home"))
625

626
627
628
629
    # Compute pdf filename
    pdf_filename = "Rapport de validation {}.pdf".format(
        datetime.now().strftime("%d-%m-%Y %Hh%M")
    )
630

631
632
633
634
635
636
    # Prepare and send response
    response = make_response(pdf_bytes_content)
    response.headers.set("Content-Disposition", "attachment", filename=pdf_filename)
    response.headers.set("Content-Length", len(pdf_bytes_content))
    response.headers.set("Content-Type", "application/pdf")
    return response
637
638


Pierre Dittgen's avatar
Pierre Dittgen committed
639
def extract_schema_metadata(table_schema: frictionless.Schema):
Pierre Dittgen's avatar
Pierre Dittgen committed
640
    """Get author, contibutor, version...metadata from schema header."""
Pierre Dittgen's avatar
Pierre Dittgen committed
641
    return {k: v for k, v in table_schema.items() if k != "fields"}
Pierre Dittgen's avatar
Pierre Dittgen committed
642
643


Pierre Dittgen's avatar
Pierre Dittgen committed
644
def compute_schema_info(table_schema: frictionless.Schema, schema_url):
Pierre Dittgen's avatar
Pierre Dittgen committed
645
    """Factor code for validator form page."""
Pierre Dittgen's avatar
Pierre Dittgen committed
646
647
    # Schema URL + schema metadata info
    schema_info = {
Pierre Dittgen's avatar
Pierre Dittgen committed
648
        "path": schema_url,
Christophe Benz's avatar
Christophe Benz committed
649
650
        # a "path" metadata property can be found in Table Schema,
        # and we'd like it to override the `schema_url`
Christophe Benz's avatar
Christophe Benz committed
651
        # given by the user (in case schema was given by URL)
Pierre Dittgen's avatar
Pierre Dittgen committed
652
        **extract_schema_metadata(table_schema),
Pierre Dittgen's avatar
Pierre Dittgen committed
653
    }
Christophe Benz's avatar
Christophe Benz committed
654
    return schema_info
655
656


657
def compute_validation_form_url(request_parameters: dict):
Pierre Dittgen's avatar
Pierre Dittgen committed
658
    """Compute validation form url with schema URL parameter."""
Pierre Dittgen's avatar
Pierre Dittgen committed
659
    url = url_for("custom_validator")
Christophe Benz's avatar
Christophe Benz committed
660
    return "{}?{}".format(url, urlencode(request_parameters))
Pierre Dittgen's avatar
Pierre Dittgen committed
661
662


Pierre Dittgen's avatar
Pierre Dittgen committed
663
@app.route("/table-schema", methods=["GET", "POST"])
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
664
def custom_validator():
Pierre Dittgen's avatar
Pierre Dittgen committed
665
    """Display validator form."""
Pierre Dittgen's avatar
Pierre Dittgen committed
666
    if request.method == "GET":
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
667

668
669
        # input is a hidden form parameter to know
        # if this is the initial page display or if the validation has been asked for
Pierre Dittgen's avatar
Pierre Dittgen committed
670
        input_param = request.args.get("input")
671
672

        # url of resource to be validated
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
673
674
        url_param = request.args.get("url")

675
        schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
676

Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
677
678
        # First form display
        if input_param is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
679
680
681
682
683
            schema_info = compute_schema_info(
                schema_instance.schema, schema_instance.url
            )
            return render_template(
                "validation_form.html",
684
                config=config,
Pierre Dittgen's avatar
Pierre Dittgen committed
685
686
687
688
689
690
691
692
693
694
695
696
697
                branches=schema_instance.branches,
                breadcrumbs=[
                    {"url": url_for("home"), "title": "Accueil"},
                    {"title": schema_instance.section_title},
                    {"title": schema_info["title"]},
                ],
                doc_url=schema_instance.doc_url,
                schema_current_version=schema_instance.ref,
                schema_info=schema_info,
                schema_params=schema_instance.request_parameters(),
                section_title=schema_instance.section_title,
                tags=schema_instance.tags,
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
698
699
700

        # Process URL
        else:
701
702
703
704
            validation_form_url = compute_validation_form_url(
                schema_instance.request_parameters()
            )

705
            if not url_param:
Christophe Benz's avatar
Christophe Benz committed
706
                flash_error("Vous n'avez pas indiqué d'URL à valider")
707
708
709
710
711
712
                return redirect(validation_form_url)
            try:
                return validate(schema_instance, URLValidataResource(url_param))
            except frictionless.FrictionlessException as ex:
                flash_error(ex.error.message)
                return redirect(validation_form_url)
713

Pierre Dittgen's avatar
Pierre Dittgen committed
714
    elif request.method == "POST":
715

716
        schema_instance = SchemaInstance(request.form)
Pierre Dittgen's avatar
Pierre Dittgen committed
717

Pierre Dittgen's avatar
Pierre Dittgen committed
718
        input_param = request.form.get("input")
Pierre Dittgen's avatar
Pierre Dittgen committed
719
720
        if input_param is None:
            flash_error("Vous n'avez pas indiqué de fichier à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
721
722
723
            return redirect(
                compute_validation_form_url(schema_instance.request_parameters())
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
724
725

        # File validation
Pierre Dittgen's avatar
Pierre Dittgen committed
726
727
        if input_param == "file":
            f = request.files.get("file")
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
728
729
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
Pierre Dittgen's avatar
Pierre Dittgen committed
730
731
732
733
                return redirect(
                    compute_validation_form_url(schema_instance.request_parameters())
                )
            return validate(
734
                schema_instance, FileContentValidataResource(f.filename, bytes_data(f))
Pierre Dittgen's avatar
Pierre Dittgen committed
735
            )
Pierre Dittgen's avatar
wip    
Pierre Dittgen committed
736

Pierre Dittgen's avatar
Pierre Dittgen committed
737
        return "Combinaison de paramètres non supportée", 400
738
739
740

    else:
        return "Method not allowed", 405