views.py 30.4 KB
Newer Older
1 2 3
"""
    Routes
"""
4
import copy
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
5
import io
6
import json
Christophe Benz's avatar
Christophe Benz committed
7
import logging
8
import subprocess
Christophe Benz's avatar
Christophe Benz committed
9
import tempfile
10
from datetime import datetime
11
from operator import itemgetter
12
from pathlib import Path
Christophe Benz's avatar
Christophe Benz committed
13
from urllib.parse import urlencode, urljoin
14

15
import jsonschema
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
16
import requests
17
import tableschema
18
from backports.datetime_fromisoformat import MonkeyPatch
19
from commonmark import commonmark
20
from flask import abort, make_response, redirect, render_template, request, url_for
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
21

Pierre Dittgen's avatar
Pierre Dittgen committed
22
import tabulator
23
from opendataschema import GitSchemaReference, by_commit_date
Pierre Dittgen's avatar
Pierre Dittgen committed
24
from validata_core import messages, repair
25

26
from . import app, config, schema_catalog_registry, tableschema_from_url
27
from .ui_util import flash_error, flash_warning
28
from .validata_util import UploadedFileValidataResource, URLValidataResource, ValidataResource, strip_accents
29

30 31
MonkeyPatch.patch_fromisoformat()

Christophe Benz's avatar
Christophe Benz committed
32 33
log = logging.getLogger(__name__)

34

35 36 37 38 39
def get_schema_catalog(section_name):
    """Return a schema catalog associated to a section_name"""
    return schema_catalog_registry.build_schema_catalog(section_name)


40 41 42
class SchemaInstance:
    """Handy class to handle schema information"""

43
    def __init__(self, parameter_dict):
44 45 46 47 48 49 50 51 52 53
        """Initializes schema instance from requests dict and tableschema catalog (for name ref)"""
        self.section_name = None
        self.section_title = None
        self.name = None
        self.url = None
        self.ref = None
        self.reference = None
        self.doc_url = None
        self.branches = None
        self.tags = None
54 55

        # From schema_url
Christophe Benz's avatar
Christophe Benz committed
56
        if parameter_dict.get("schema_url"):
57 58
            self.url = parameter_dict["schema_url"]
            self.section_title = "Autre schéma"
59 60

        # from schema_name (and schema_ref)
Christophe Benz's avatar
Christophe Benz committed
61
        elif parameter_dict.get('schema_name'):
62 63
            self.schema_and_section_name = parameter_dict['schema_name']
            self.ref = parameter_dict.get('schema_ref')
64

Pierre Dittgen's avatar
Pierre Dittgen committed
65
            # Check schema name
66
            chunks = self.schema_and_section_name.split('.')
Pierre Dittgen's avatar
Pierre Dittgen committed
67
            if len(chunks) != 2:
68
                abort(400, "Paramètre 'schema_name' invalide")
Pierre Dittgen's avatar
Pierre Dittgen committed
69

70 71
            self.section_name, self.name = chunks
            self.section_title = self.find_section_title(self.section_name)
Pierre Dittgen's avatar
Pierre Dittgen committed
72 73

            # Look for schema catalog first
74 75 76 77 78
            try:
                table_schema_catalog = get_schema_catalog(self.section_name)
            except Exception as ex:
                log.exception(ex)
                abort(400, "Erreur de traitement du catalogue")
Pierre Dittgen's avatar
Pierre Dittgen committed
79
            if table_schema_catalog is None:
80
                abort(400, "Catalogue indisponible")
81 82 83 84 85 86

            schema_reference = table_schema_catalog.reference_by_name.get(self.name)
            if schema_reference is None:
                abort(400, "Schéma '{}' non trouvé dans le catalogue de la section '{}'".format(self.name, self.section_name))

            if isinstance(schema_reference, GitSchemaReference):
87
                self.tags = sorted(schema_reference.iter_tags(), key=by_commit_date, reverse=True)
88
                if self.ref is None:
89
                    schema_ref = self.tags[0] if self.tags else schema_reference.get_default_branch()
90 91 92 93 94 95 96
                    abort(redirect(compute_validation_form_url({
                        'schema_name': self.schema_and_section_name,
                        'schema_ref': schema_ref.name
                    })))
                tag_names = [tag.name for tag in self.tags]
                self.branches = [branch for branch in schema_reference.iter_branches()
                                 if branch.name not in tag_names]
97 98
                self.doc_url = schema_reference.get_doc_url(ref=self.ref) or \
                    schema_reference.get_project_url(ref=self.ref)
99

100
            self.url = schema_reference.get_schema_url(ref=self.ref)
101 102

        else:
Pierre Dittgen's avatar
Pierre Dittgen committed
103 104
            flash_error("Erreur dans la récupération des informations de schéma")
            abort(redirect(url_for('home')))
105

106 107 108 109 110 111 112 113 114 115
        try:
            self.schema = tableschema_from_url(self.url)
        except json.JSONDecodeError as e:
            log.exception(e)
            flash_error("Format de schéma non reconnu")
            abort(redirect(url_for('home')))
        except Exception as e:
            log.exception(e)
            flash_error("Erreur lors de l'obtention du schéma")
            abort(redirect(url_for('home')))
116 117 118 119

    def request_parameters(self):
        if self.name:
            return {
120
                'schema_name': self.schema_and_section_name,
121 122 123 124 125 126
                'schema_ref': '' if self.ref is None else self.ref
            }
        return {
            'schema_url': self.url
        }

127
    def find_section_title(self, section_name):
Christophe Benz's avatar
Christophe Benz committed
128 129 130 131 132 133
        if config.HOMEPAGE_CONFIG:
            for section in config.HOMEPAGE_CONFIG['sections']:
                if section["name"] == section_name:
                    return section.get("title")
        return None

134

Pierre Dittgen's avatar
Pierre Dittgen committed
135
def extract_source_data(source: ValidataResource, schema_descriptor, preview_rows_nb=5):
136
    """ Computes table preview """
137 138 139

    def stringify(val):
        """ Transform value into string """
Pierre Dittgen's avatar
Pierre Dittgen committed
140
        return '' if val is None else str(val)
141

142 143
    header = None
    rows = []
Pierre Dittgen's avatar
Pierre Dittgen committed
144
    nb_rows = 0
145

146
    tabulator_source, tabulator_options = source.build_tabulator_stream_args()
Pierre Dittgen's avatar
Pierre Dittgen committed
147 148 149 150 151 152 153 154 155 156 157 158

    # Gets original source, only to get headers
    source_header = None
    with tabulator.Stream(tabulator_source, **tabulator_options) as stream:
        for row in stream:
            if source_header is None:
                source_header = ['' if v is None else v for v in row]
                break

    # Repair source
    tabulator_source, tabulator_options = source.build_tabulator_stream_args()
    fixed_source, repair_report = repair(tabulator_source, schema_descriptor, **tabulator_options)
Pierre Dittgen's avatar
Pierre Dittgen committed
159
    with tabulator.Stream(fixed_source, {**tabulator_options, 'scheme': 'stream', 'format': 'inline'}) as stream:
160 161
        for row in stream:
            if header is None:
162
                header = ['' if v is None else v for v in row]
163
            else:
164
                rows.append(list(map(stringify, row)))
Pierre Dittgen's avatar
Pierre Dittgen committed
165
                nb_rows += 1
166
    preview_rows_nb = min(preview_rows_nb, nb_rows)
Pierre Dittgen's avatar
Pierre Dittgen committed
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

    # Computes original_headers display
    if any([err.code == 'wrong-headers-order' for err in repair_report]):
        source_header_info = [(h, True) for h in source_header]
    else:
        schema_field_names = [f['name'] for f in schema_descriptor.get('fields') or []]
        source_header_info = [(h, not h or h not in schema_field_names) for h in source_header]

    return {
        'source_header_info': source_header_info,
        'header': header,
        'rows_nb': nb_rows,
        'data_rows': rows,
        'preview_rows_nb': preview_rows_nb,
        'preview_rows': rows[:preview_rows_nb]
    }
183 184


Pierre Dittgen's avatar
Pierre Dittgen committed
185 186
def improve_errors(errors):
    """Add context to errors, converts markdown content to HTML"""
187

Pierre Dittgen's avatar
Pierre Dittgen committed
188 189
    def improve_err(err):
        """Adds context info based on row-nb presence and converts content to HTML"""
190

Pierre Dittgen's avatar
Pierre Dittgen committed
191 192 193 194
        # Context
        update_keys = {
            'context': 'body' if 'row-number' in err and not err['row-number'] is None else 'table',
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
195

Pierre Dittgen's avatar
Pierre Dittgen committed
196
        # markdown to HTML (with default values for 'title' and 'content')
Pierre Dittgen's avatar
Pierre Dittgen committed
197

Pierre Dittgen's avatar
Pierre Dittgen committed
198 199 200
        # Set default title if no title
        if not 'title' in err:
            update_keys['title'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
201

Pierre Dittgen's avatar
Pierre Dittgen committed
202 203 204 205
        # Convert message to markdown only if no content
        # => for pre-checks errors
        if 'message' in err and not 'content' in err:
            update_keys['message'] = commonmark(err['message'])
Pierre Dittgen's avatar
Pierre Dittgen committed
206

Pierre Dittgen's avatar
Pierre Dittgen committed
207 208 209
        # Else, default message
        elif not 'message' in err or err['message'] is None:
            update_keys['message'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
210

Pierre Dittgen's avatar
Pierre Dittgen committed
211 212 213
        # Message content
        md_content = '*content soon available*' if not 'content' in err else err['content']
        update_keys['content'] = commonmark(md_content)
214

Pierre Dittgen's avatar
Pierre Dittgen committed
215
        return {**err, **update_keys}
216

Pierre Dittgen's avatar
Pierre Dittgen committed
217
    return list(map(improve_err, errors))
218 219


220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
def compute_repair_actions(structure_errors):
    """Turn structure errors into repair action informations
    """

    def handle_blank_headers(error_list, position_code, action_list, func=None, singular_msg_tpl="", plural_msg_tpl=""):
        """Factors code for blank-header errors
           Warning: error_list parameter is modified in place
        """

        blank_headers = [err for err in error_list
                         if err['code'] == 'blank-header' and err['message-data'].get('position') == position_code]
        if blank_headers:
            if func is None:
                blank_headers_nb = len(blank_headers)
                if blank_headers_nb == 1:
                    action_list.append(singular_msg_tpl)
                else:
                    action_list.append(plural_msg_tpl.format(blank_headers_nb))
            else:
                func(action_list, blank_headers, singular_msg_tpl, plural_msg_tpl)
        for err in blank_headers:
            error_list.remove(err)

    def handle_extra_duplicate_and_missing_errs(error_list, err_code, action_list, singular_msg_tpl, plural_msg_tpl):
        """Factors code for missing headers, extra headers and duplicate headers
           Warning: error_list parameter is modified in place
        """

        header_errors = [err for err in error_list if err['code'] == err_code]
        col_names = ["`{}`".format(err['message-data']['column-name']) for err in header_errors]

        if header_errors:
            if len(header_errors) == 1:
                action_list.append(singular_msg_tpl.format(col_names[0]))
            else:
                action_list.append(plural_msg_tpl.format(', '.join(col_names)))
        for err in header_errors:
            error_list.remove(err)

    # No error, no info!
    if not structure_errors:
        return []

    # keep a list of processed errors
    pending_error_list = structure_errors.copy()

    # action informations
    action_list = []

    # Leading blank headers
    handle_blank_headers(pending_error_list, 'leading', action_list,
                         singular_msg_tpl='1 colonne sans en-tête avant les données a été supprimée',
                         plural_msg_tpl='{} colonnes sans en-tête avant les données ont été supprimées')

    # inside empty header
    def handle_in_blank_headers(action_list, error_list, singular_msg_tpl, plural_msg_tpl):

        def add_msg(action_list, columns_nb, before, after, singular_msg_tpl, plural_msg_tpl):
            if columns_nb == 1:
                action_list.append(singular_msg_tpl.format(before, after))
            else:
                action_list.append(plural_msg_tpl.format(columns_nb, before, after))
        before, after = None, None
        columns_nb = 0
        for err in sorted(error_list, key=lambda elt: elt['message-data']['column-number']):
            before_header_name = err['message-data']['before-header-name']
            after_header_name = err['message-data']['after-header-name']
            if before_header_name == before and after_header_name == after:
                columns_nb += 1
            else:
                if before is not None:
                    add_msg(action_list, columns_nb, before, after, singular_msg_tpl, plural_msg_tpl)
                before = before_header_name
                after = after_header_name
                columns_nb = 1
        add_msg(action_list, columns_nb, before, after, singular_msg_tpl, plural_msg_tpl)

    handle_blank_headers(pending_error_list, 'in', action_list,
                         func=handle_in_blank_headers,
                         singular_msg_tpl='1 colonne sans en-tête (située entre les colonnes **{}** et **{}**) a été supprimée',
                         plural_msg_tpl='{} colonnes sans en-tête (situées entre les colonnes **{}** et **{}**) ont été supprimées')

    # trailing empty headers
    handle_blank_headers(pending_error_list, 'trailing', action_list,
                         singular_msg_tpl='1 colonne sans en-tête après les données a été supprimée',
                         plural_msg_tpl='{} colonnes sans en-tête après les données ont été supprimées')

    # wrong-headers-order
    wrong_headers_order = [err for err in pending_error_list if err['code'] == 'wrong-headers-order']
    if wrong_headers_order:
310 311 312 313 314 315 316 317 318
        actual_order = wrong_headers_order[0]['message-data']['actual-order']
        wanted_order = wrong_headers_order[0]['message-data']['wanted-order']

        def field_list_to_str(field_list):
            return ', '.join(["**{}**".format(f) for f in field_list])

        action_list.append("L'ordre des colonnes du fichier a été rétabli (de {} à {})".format(
            field_list_to_str(actual_order), field_list_to_str(wanted_order)
        ))
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
    pending_error_list = [err for err in pending_error_list if err not in wrong_headers_order]

    # extra-headers
    handle_extra_duplicate_and_missing_errs(pending_error_list,
                                            'extra-header', action_list,
                                            "La colonne {} inconnue du schéma a été rejetée après les données",
                                            "Les colonnes {} inconnues du schéma ont été rejetées après les données")

    # duplicate-header
    handle_extra_duplicate_and_missing_errs(pending_error_list,
                                            'duplicate-header', action_list,
                                            "La colonne {} déjà rencontrée dans le fichier a été rejetée après les données",
                                            "Les colonnes {} déjà rencontrées dans le fichier ont été rejetées après les données")

    # missing-header
    handle_extra_duplicate_and_missing_errs(pending_error_list,
                                            'missing-header', action_list,
                                            "La colonne {} absente du fichier a été ajoutée avec un contenu vide",
                                            "Les colonnes {} absentes du fichier ont été ajoutées avec un contenu vide")

    # unhandled errors (it may normally not happened)
    for err in pending_error_list:
        action_list.append('err: [{}] {}'.format(err['code'], err['message']))

    return action_list


Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
346
def create_validata_ui_report(validata_core_report, schema_dict):
347 348 349 350 351 352
    """ Creates an error report easier to handle and display in templates:
        - only one table
        - errors are contextualized
        - error-counts is ok
        - errors are grouped by lines
        - errors are separated into "structure" and "body"
353
        - error messages are improved
354
    """
Pierre Dittgen's avatar
Pierre Dittgen committed
355
    report = copy.deepcopy(validata_core_report)
356 357 358 359 360 361 362 363 364

    # One table is enough
    del report['table-count']
    report['table'] = report['tables'][0]
    del report['tables']
    del report['table']['error-count']
    del report['table']['time']
    del report['table']['valid']
    del report['valid']
365 366 367
    # use _ instead of - to ease information picking in jinja2 template
    report['table']['row_count'] = report['table']['row-count']

Pierre Dittgen's avatar
Pierre Dittgen committed
368 369 370 371
    # Handy col_count info
    headers = report['table'].get('headers', [])
    report['table']['col_count'] = len(headers)

Pierre Dittgen's avatar
Pierre Dittgen committed
372
    # Computes column info
373
    fields_dict = {f['name']: (f.get('title', f['name']), f.get('description', ''))
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
374
                   for f in schema_dict.get('fields', [])}
Pierre Dittgen's avatar
Pierre Dittgen committed
375
    report['table']['headers_title'] = [fields_dict[h][0] if h in fields_dict else 'Colonne inconnue' for h in headers]
Pierre Dittgen's avatar
Pierre Dittgen committed
376 377
    report['table']['headers_description'] = [fields_dict[h][1]
                                              if h in fields_dict else 'Cette colonne n\'est pas définie dans le schema' for h in headers]
Pierre Dittgen's avatar
Pierre Dittgen committed
378 379 380 381 382
    missing_headers = [err['message-data']['column-name']
                       for err in report['table']['errors']
                       if err['code'] == 'missing-header']
    report['table']['cols_alert'] = ['table-danger' if h not in fields_dict or h in missing_headers else ''
                                     for h in headers]
383

384
    # Provide better (french) messages
Pierre Dittgen's avatar
Pierre Dittgen committed
385 386
    errors = improve_errors(report['table']['errors'])
    del report['table']['errors']
387

388 389 390 391 392 393 394
    # Count errors
    report['error_count'] = len(errors)
    del report['error-count']

    # Then group them in 2 groups : structure and body
    report['table']['errors'] = {'structure': [], 'body': []}
    for err in errors:
395
        if err['tag'] == 'structure':
396 397 398 399
            report['table']['errors']['structure'].append(err)
        else:
            report['table']['errors']['body'].append(err)

Pierre Dittgen's avatar
Pierre Dittgen committed
400 401 402 403
    # Group body errors by row id
    rows = []
    current_row_id = 0
    for err in report['table']['errors']['body']:
404 405
        if not 'row-number' in err:
            print('ERR', err)
Pierre Dittgen's avatar
Pierre Dittgen committed
406 407 408 409 410 411 412 413 414 415 416 417 418
        row_id = err['row-number']
        del err['row-number']
        del err['context']
        if row_id != current_row_id:
            current_row_id = row_id
            rows.append({'row_id': current_row_id, 'errors': {}})

        column_id = err.get('column-number')
        if column_id is not None:
            del err['column-number']
            rows[-1]['errors'][column_id] = err
        else:
            rows[-1]['errors']['row'] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
419
    report['table']['errors']['body_by_rows'] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
420

421 422
    report['repair_actions'] = compute_repair_actions(report['table']['errors']['structure'])

423 424 425 426 427 428
    # Sort by error names in statistics
    stats = report['table']['error-stats']
    code_title_map = messages.ERROR_MESSAGE_DEFAULT_TITLE
    for key in ('structure-errors', 'value-errors'):
        # convert dict into tuples with french title instead of error code
        # and sorts by title
Christophe Benz's avatar
Christophe Benz committed
429 430
        stats[key]['count-by-code'] = sorted(((code_title_map.get(k, k), v)
                                              for k, v in stats[key]['count-by-code'].items()), key=itemgetter(0))
431

Pierre Dittgen's avatar
Pierre Dittgen committed
432 433 434
    return report


Pierre Dittgen's avatar
Pierre Dittgen committed
435 436 437 438 439 440 441
def compute_badge_message_and_color(badge):
    """Computes message and color from badge information"""
    structure = badge['structure']
    body = badge.get('body')

    # Bad structure, stop here
    if structure == 'KO':
442
        return ('structure invalide', 'red')
Pierre Dittgen's avatar
Pierre Dittgen committed
443 444 445 446 447 448

    # No body error
    if body == 'OK':
        return ('structure invalide', 'orange') if structure == 'WARN' else ('valide', 'green')

    # else compute quality ratio percent
Christophe Benz's avatar
Christophe Benz committed
449
    p = (1 - badge['error-ratio']) * 100.0
Pierre Dittgen's avatar
Pierre Dittgen committed
450 451 452 453 454 455 456 457
    msg = 'cellules valides : {:.1f}%'.format(p)
    return (msg, 'red') if body == 'KO' else (msg, 'orange')


def get_badge_url_and_message(badge):
    """Gets badge url from badge information"""

    msg, color = compute_badge_message_and_color(badge)
Christophe Benz's avatar
Christophe Benz committed
458 459 460 461 462
    badge_url = "{}?{}".format(
        urljoin(config.SHIELDS_IO_BASE_URL, '/static/v1.svg'),
        urlencode({"label": "Validata", "message": msg, "color":  color}),
    )
    return (badge_url, msg)
Pierre Dittgen's avatar
Pierre Dittgen committed
463 464


465
def validate(schema_instance: SchemaInstance, source: ValidataResource):
466 467
    """ Validate source and display report """

468
    # Useful to receive response as JSON
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
469 470
    headers = {"Accept": "application/json"}

Pierre Dittgen's avatar
Pierre Dittgen committed
471
    try:
472
        if source.type == 'url':
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
473
            params = {
474
                "schema": schema_instance.url,
475
                "url": source.url,
476
                "repair": True,
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
477
            }
478
            response = requests.get(config.API_VALIDATE_ENDPOINT, params=params, headers=headers)
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
479
        else:
480
            files = {'file': (source.filename, source.build_reader())}
481
            data = {"schema": schema_instance.url, "repair": True}
482
            response = requests.post(config.API_VALIDATE_ENDPOINT, data=data, files=files, headers=headers)
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
483 484
    except requests.ConnectionError as err:
        logging.exception(err)
Christophe Benz's avatar
Christophe Benz committed
485
        flash_error("Erreur technique lors de la validation")
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
486
        return redirect(url_for('home'))
487

488 489 490
    if not response.ok:
        flash_error("Erreur technique lors de la validation")
        return redirect(compute_validation_form_url(schema_instance.request_parameters()))
491

492
    json_response = response.json()
493
    validata_core_report = json_response['report']
Pierre Dittgen's avatar
Pierre Dittgen committed
494
    badge_info = json_response.get('badge')
495

Pierre Dittgen's avatar
Pierre Dittgen committed
496
    # Computes badge from report and badge configuration
Pierre Dittgen's avatar
Pierre Dittgen committed
497 498 499 500
    badge_url, badge_msg = None, None
    display_badge = badge_info and config.SHIELDS_IO_BASE_URL
    if display_badge:
        badge_url, badge_msg = get_badge_url_and_message(badge_info)
Pierre Dittgen's avatar
Pierre Dittgen committed
501

502 503 504 505 506
    source_errors = [
        err
        for err in validata_core_report['tables'][0]['errors']
        if err['code'] in {'source-error', 'unknown-csv-dialect'}
    ]
507 508 509 510 511
    if source_errors:
        err = source_errors[0]
        msg = "l'encodage du fichier est invalide. Veuillez le corriger" if 'charmap' in err[
            'message'] else err['message']
        flash_error('Erreur de source : {}'.format(msg))
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
512
        return redirect(url_for('custom_validator'))
513

Pierre Dittgen's avatar
Pierre Dittgen committed
514
    source_data = extract_source_data(source, schema_instance.schema.descriptor)
515

Pierre Dittgen's avatar
Pierre Dittgen committed
516
    # handle report date
517
    report_datetime = datetime.fromisoformat(validata_core_report['date']).astimezone()
518

Pierre Dittgen's avatar
Pierre Dittgen committed
519
    # Enhance validata_core_report
Pierre Dittgen's avatar
Pierre Dittgen committed
520
    validata_report = create_validata_ui_report(validata_core_report, schema_instance.schema.descriptor)
Pierre Dittgen's avatar
Pierre Dittgen committed
521

Pierre Dittgen's avatar
Pierre Dittgen committed
522
    # Display report to the user
523
    validator_form_url = compute_validation_form_url(schema_instance.request_parameters())
Christophe Benz's avatar
Christophe Benz committed
524
    schema_info = compute_schema_info(schema_instance.schema, schema_instance.url)
Christophe Benz's avatar
Christophe Benz committed
525 526 527 528
    pdf_report_url = "{}?{}".format(url_for('pdf_report'),
                                    urlencode({
                                        **schema_instance.request_parameters(),
                                        "url": source.url,
529
                                    })) if source.type == 'url' else None
530

Christophe Benz's avatar
Christophe Benz committed
531
    return render_template('validation_report.html',
532 533 534 535 536 537 538 539 540
                           badge_msg=badge_msg,
                           badge_url=badge_url,
                           breadcrumbs=[
                               {'title': 'Accueil', 'url': url_for('home')},
                               {'title': schema_instance.section_title},
                               {'title': schema_info['title'], 'url': validator_form_url},
                               {'title': 'Rapport de validation'},
                           ],
                           display_badge=display_badge,
541
                           doc_url=schema_instance.doc_url,
542
                           pdf_report_url=pdf_report_url,
543
                           print_mode=request.args.get('print', 'false') == 'true',
544
                           report_str=json.dumps(validata_report, sort_keys=True, indent=2),
545 546 547
                           report=validata_report,
                           schema_current_version=schema_instance.ref,
                           schema_info=schema_info,
Christophe Benz's avatar
Christophe Benz committed
548
                           section_title=schema_instance.section_title,
549 550 551 552
                           source_data=source_data,
                           source=source,
                           validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'),
                           )
553 554


555 556
def bytes_data(f):
    """ Gets bytes data from Werkzeug FileStorage instance """
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
557
    iob = io.BytesIO()
558 559 560 561 562
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


563 564 565 566 567 568
# Routes


@app.route('/')
def home():
    """ Home page """
569 570 571 572 573 574

    def iter_sections():
        """Yield sections of the home page, filled with schema metadata."""
        if not config.HOMEPAGE_CONFIG:
            return
        for section in config.HOMEPAGE_CONFIG['sections']:
575
            home_section = {k: v for k, v in section.items() if k != 'catalog'}
576
            if "catalog" in section:
577 578 579 580 581 582 583 584 585 586

                try:
                    schema_catalog = get_schema_catalog(section['name'])
                except Exception as exc:
                    log.exception(exc)
                    err_msg = "une erreur s'est produite"
                    if isinstance(exc, requests.ConnectionError):
                        err_msg = "problème de connexion"
                    elif isinstance(exc, json.decoder.JSONDecodeError):
                        err_msg = "format JSON incorrect"
587 588
                    elif isinstance(exc, jsonschema.exceptions.ValidationError):
                        err_msg = "le catalogue ne respecte pas le schéma de référence"
589 590
                    home_section['err'] = err_msg
                else:
591 592 593
                    home_section_catalog = []
                    for schema_reference in schema_catalog.references:
                        # Loads default table schema for each schema reference
594 595 596 597 598 599 600 601 602 603 604
                        schema_info = {
                            'name': schema_reference.name
                        }
                        try:
                            table_schema = tableschema_from_url(schema_reference.get_schema_url())
                        except:
                            schema_info['err'] = True
                            schema_info['title'] = 'Schéma "{}" non disponible'.format(schema_reference.name)
                        else:
                            schema_info['title'] = table_schema.descriptor.get("title") or schema_reference.name
                        home_section_catalog.append(schema_info)
605 606
                    home_section['catalog'] = sorted(
                        home_section_catalog, key=lambda sc: strip_accents(sc['title'].lower()))
607

608 609 610 611 612
            if "links" in section:
                home_section["links"] = section["links"]
            yield home_section

    return render_template('home.html', sections=list(iter_sections()))
613 614


Pierre Dittgen's avatar
Pierre Dittgen committed
615 616
@app.route('/pdf')
def pdf_report():
617
    """PDF report generation"""
618
    err_prefix = 'Erreur de génération du rapport PDF'
619 620 621

    url_param = request.args.get('url')
    if not url_param:
622
        flash_error(err_prefix + ' : URL non fournie')
Pierre Dittgen's avatar
Pierre Dittgen committed
623
        return redirect(url_for('home'))
624

625
    schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
626

627 628 629 630 631 632 633 634
    # Compute pdf url report
    base_url = url_for('custom_validator', _external=True)
    parameter_dict = {
        'input': 'url',
        'print': 'true',
        'url': url_param,
        **schema_instance.request_parameters()
    }
Christophe Benz's avatar
Christophe Benz committed
635
    validation_url = "{}?{}".format(base_url, urlencode(parameter_dict))
636 637

    # Create temp file to save validation report
Pierre Dittgen's avatar
Pierre Dittgen committed
638
    with tempfile.NamedTemporaryFile(prefix='validata_{}_report_'.format(datetime.now().timestamp()), suffix='.pdf') as tmpfile:
Christophe Benz's avatar
Christophe Benz committed
639
        tmp_pdf_report = Path(tmpfile.name)
640

641
    # Use chromium headless to generate PDF from validation report page
642
    cmd = ['chromium', '--headless', '--no-sandbox',
643 644 645
           '--print-to-pdf={}'.format(str(tmp_pdf_report)), validation_url]
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if result.returncode != 0:
646 647
        flash_error(err_prefix)
        log.error("Command %r returned an error: %r", cmd, result.stdout.decode('utf-8'))
648 649
        if tmp_pdf_report.exists():
            tmp_pdf_report.unlink()
Pierre Dittgen's avatar
Pierre Dittgen committed
650
        return redirect(url_for('home'))
651

652
    # Send PDF report
Pierre Dittgen's avatar
Pierre Dittgen committed
653
    pdf_filename = 'Rapport de validation {}.pdf'.format(datetime.now().strftime('%d-%m-%Y %Hh%M'))
Christophe Benz's avatar
Christophe Benz committed
654
    response = make_response(tmp_pdf_report.read_bytes())
Pierre Dittgen's avatar
Pierre Dittgen committed
655
    response.headers.set('Content-disposition', 'attachment', filename=pdf_filename)
656 657 658 659 660 661 662 663
    response.headers.set('Content-type', 'application/pdf')
    response.headers.set('Content-length', tmp_pdf_report.stat().st_size)

    tmp_pdf_report.unlink()

    return response


Pierre Dittgen's avatar
Pierre Dittgen committed
664 665 666 667 668
def extract_schema_metadata(table_schema: tableschema.Schema):
    """Gets author, contibutor, version...metadata from schema header"""
    return {k: v for k, v in table_schema.descriptor.items() if k != 'fields'}


Pierre Dittgen's avatar
Pierre Dittgen committed
669
def compute_schema_info(table_schema: tableschema.Schema, schema_url):
Pierre Dittgen's avatar
Pierre Dittgen committed
670
    """Factor code for validator form page"""
671

Pierre Dittgen's avatar
Pierre Dittgen committed
672 673
    # Schema URL + schema metadata info
    schema_info = {
Christophe Benz's avatar
Christophe Benz committed
674 675 676
        'path': schema_url,
        # a "path" metadata property can be found in Table Schema, and we'd like it to override the `schema_url`
        # given by the user (in case schema was given by URL)
Pierre Dittgen's avatar
Pierre Dittgen committed
677 678
        **extract_schema_metadata(table_schema)
    }
Christophe Benz's avatar
Christophe Benz committed
679
    return schema_info
680 681


682
def compute_validation_form_url(request_parameters: dict):
683 684
    """Computes validation form url with schema URL parameter"""
    url = url_for('custom_validator')
Christophe Benz's avatar
Christophe Benz committed
685
    return "{}?{}".format(url, urlencode(request_parameters))
Pierre Dittgen's avatar
Pierre Dittgen committed
686 687


688
@app.route('/table-schema', methods=['GET', 'POST'])
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
689
def custom_validator():
690
    """Validator form"""
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
691 692 693

    if request.method == 'GET':

694 695
        # input is a hidden form parameter to know
        # if this is the initial page display or if the validation has been asked for
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
696
        input_param = request.args.get('input')
697 698

        # url of resource to be validated
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
699 700
        url_param = request.args.get("url")

701
        schema_instance = SchemaInstance(request.args)
Pierre Dittgen's avatar
Pierre Dittgen committed
702

Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
703 704
        # First form display
        if input_param is None:
Christophe Benz's avatar
Christophe Benz committed
705 706
            schema_info = compute_schema_info(schema_instance.schema, schema_instance.url)
            return render_template('validation_form.html',
707
                                   branches=schema_instance.branches,
Christophe Benz's avatar
Christophe Benz committed
708 709 710 711
                                   breadcrumbs=[
                                       {'url': url_for('home'), 'title': 'Accueil'},
                                       {'title': schema_instance.section_title},
                                       {'title': schema_info['title']},
Christophe Benz's avatar
Christophe Benz committed
712 713 714 715 716 717 718 719
                                   ],
                                   doc_url=schema_instance.doc_url,
                                   schema_current_version=schema_instance.ref,
                                   schema_info=schema_info,
                                   schema_params=schema_instance.request_parameters(),
                                   section_title=schema_instance.section_title,
                                   tags=schema_instance.tags,
                                   )
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
720 721 722

        # Process URL
        else:
723
            if not url_param:
Christophe Benz's avatar
Christophe Benz committed
724
                flash_error("Vous n'avez pas indiqué d'URL à valider")
725 726 727 728 729
                return redirect(compute_validation_form_url(schema_instance.request_parameters()))
            return validate(schema_instance, URLValidataResource(url_param))

    elif request.method == 'POST':

730
        schema_instance = SchemaInstance(request.form)
Pierre Dittgen's avatar
Pierre Dittgen committed
731 732 733 734

        input_param = request.form.get('input')
        if input_param is None:
            flash_error("Vous n'avez pas indiqué de fichier à valider")
735
            return redirect(compute_validation_form_url(schema_instance.request_parameters()))
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
736 737 738 739 740 741

        # File validation
        if input_param == 'file':
            f = request.files.get('file')
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
742
                return redirect(compute_validation_form_url(schema_instance.request_parameters()))
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
743

744
            return validate(schema_instance, UploadedFileValidataResource(f.filename, bytes_data(f)))
Pierre Dittgen's avatar
wip  
Pierre Dittgen committed
745

746 747 748 749
        return 'Combinaison de paramètres non supportée', 400

    else:
        return "Method not allowed", 405