views.py 15.5 KB
Newer Older
1 2 3
"""
    Routes
"""
4
import copy
Pierre Dittgen's avatar
Pierre Dittgen committed
5
import itertools
6
import json
7
import logging
8
import subprocess
9
import tempfile
10
from datetime import datetime
11
from io import BytesIO
12
from operator import itemgetter
13 14
from pathlib import Path
from urllib.parse import quote_plus
15

16
from backports.datetime_fromisoformat import MonkeyPatch
Pierre Dittgen's avatar
Pierre Dittgen committed
17
from commonmark import commonmark
18
from flask import make_response, redirect, render_template, request, url_for
19

20
import tabulator
21
from validata_core import csv_helpers, messages
Pierre Dittgen's avatar
Pierre Dittgen committed
22
from validata_core.loaders import custom_loaders
23
from validata_ui import app
Pierre Dittgen's avatar
Pierre Dittgen committed
24
from validata_ui.ui_util import flash_error, flash_warning
25
from validata_ui.validata_util import ValidataSource
26
from validata_ui.validate_helper import ValidatorHelper
27

28 29
MonkeyPatch.patch_fromisoformat()

30 31
log = logging.getLogger(__name__)

32 33

def extract_source_data(source: ValidataSource, preview_rows_nb=5):
34
    """ Computes table preview """
35 36 37

    def stringify(val):
        """ Transform value into string """
Pierre Dittgen's avatar
Pierre Dittgen committed
38
        return '' if val is None else str(val)
39

40 41
    header = None
    rows = []
Pierre Dittgen's avatar
Pierre Dittgen committed
42
    nb_rows = 0
43

44
    options = {}
45
    if source.format == "csv":
46
        options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
47
                                                          custom_loaders=custom_loaders).delimiter
48
    with tabulator.Stream(source.source, format=source.format, scheme=source.scheme, custom_loaders=custom_loaders,
49
                          **options) as stream:
50 51
        for row in stream:
            if header is None:
52
                header = ['' if v is None else v for v in row]
53
            else:
54
                rows.append(list(map(stringify, row)))
Pierre Dittgen's avatar
Pierre Dittgen committed
55
                nb_rows += 1
56
    preview_rows_nb = min(preview_rows_nb, nb_rows)
57 58
    return {'header': header,
            'rows_nb': nb_rows,
59 60 61
            'data_rows': rows,
            'preview_rows_nb': preview_rows_nb,
            'preview_rows': rows[:preview_rows_nb]}
62 63


Pierre Dittgen's avatar
Pierre Dittgen committed
64 65
def improve_errors(errors):
    """Add context to errors, converts markdown content to HTML"""
66

Pierre Dittgen's avatar
Pierre Dittgen committed
67 68
    def improve_err(err):
        """Adds context info based on row-nb presence and converts content to HTML"""
69

Pierre Dittgen's avatar
Pierre Dittgen committed
70 71 72 73
        # Context
        update_keys = {
            'context': 'body' if 'row-number' in err and not err['row-number'] is None else 'table',
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
74

Pierre Dittgen's avatar
Pierre Dittgen committed
75
        # markdown to HTML (with default values for 'title' and 'content')
Pierre Dittgen's avatar
Pierre Dittgen committed
76

Pierre Dittgen's avatar
Pierre Dittgen committed
77 78 79
        # Set default title if no title
        if not 'title' in err:
            update_keys['title'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
80

Pierre Dittgen's avatar
Pierre Dittgen committed
81 82 83 84
        # Convert message to markdown only if no content
        # => for pre-checks errors
        if 'message' in err and not 'content' in err:
            update_keys['message'] = commonmark(err['message'])
Pierre Dittgen's avatar
Pierre Dittgen committed
85

Pierre Dittgen's avatar
Pierre Dittgen committed
86 87 88
        # Else, default message
        elif not 'message' in err or err['message'] is None:
            update_keys['message'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
89

Pierre Dittgen's avatar
Pierre Dittgen committed
90 91 92
        # Message content
        md_content = '*content soon available*' if not 'content' in err else err['content']
        update_keys['content'] = commonmark(md_content)
93

Pierre Dittgen's avatar
Pierre Dittgen committed
94
        return {**err, **update_keys}
95

Pierre Dittgen's avatar
Pierre Dittgen committed
96
    return list(map(improve_err, errors))
97 98


Pierre Dittgen's avatar
Pierre Dittgen committed
99
def create_validata_ui_report(validata_core_report, schema):
100 101 102 103 104 105
    """ Creates an error report easier to handle and display in templates:
        - only one table
        - errors are contextualized
        - error-counts is ok
        - errors are grouped by lines
        - errors are separated into "structure" and "body"
106
        - error messages are improved
107
    """
Pierre Dittgen's avatar
Pierre Dittgen committed
108
    report = copy.deepcopy(validata_core_report)
109 110 111 112 113 114 115 116 117

    # One table is enough
    del report['table-count']
    report['table'] = report['tables'][0]
    del report['tables']
    del report['table']['error-count']
    del report['table']['time']
    del report['table']['valid']
    del report['valid']
118 119 120
    # use _ instead of - to ease information picking in jinja2 template
    report['table']['row_count'] = report['table']['row-count']

Pierre Dittgen's avatar
Pierre Dittgen committed
121 122 123 124
    # Handy col_count info
    headers = report['table'].get('headers', [])
    report['table']['col_count'] = len(headers)

Pierre Dittgen's avatar
Pierre Dittgen committed
125
    # Computes column info
126
    schema_fields = schema.get('fields', [])
Pierre Dittgen's avatar
Pierre Dittgen committed
127 128 129 130
    fields_dict = {f['name']: (f.get('title', 'titre non défini'), f.get('description', '')) for f in schema_fields}
    report['table']['headers_title'] = [fields_dict[h][0] if h in fields_dict else 'colonne inconnue' for h in headers]
    report['table']['headers_description'] = [fields_dict[h][1]
                                              if h in fields_dict else 'Cette colonne n\'est pas définie dans le schema' for h in headers]
131

132
    # Provide better (french) messages
Pierre Dittgen's avatar
Pierre Dittgen committed
133 134
    errors = improve_errors(report['table']['errors'])
    del report['table']['errors']
135

136 137 138 139 140 141 142
    # Count errors
    report['error_count'] = len(errors)
    del report['error-count']

    # Then group them in 2 groups : structure and body
    report['table']['errors'] = {'structure': [], 'body': []}
    for err in errors:
143
        if err['tag'] == 'structure':
144 145 146 147
            report['table']['errors']['structure'].append(err)
        else:
            report['table']['errors']['body'].append(err)

Pierre Dittgen's avatar
Pierre Dittgen committed
148
    # Checks if there are structure errors different to invalid-column-delimiter
Pierre Dittgen's avatar
Pierre Dittgen committed
149 150 151
    structure_errors = report['table']['errors']['structure']
    report['table']['do_display_body_errors'] = len(structure_errors) == 0 or \
        all(err['code'] == 'invalid-column-delimiter' for err in structure_errors)
152

Pierre Dittgen's avatar
Pierre Dittgen committed
153 154 155 156 157 158 159 160 161
    # Checks if a column comparison is needed
    header_errors = ('missing-headers', 'extra-headers', 'wrong-headers-order')
    structure_errors = [{**err, 'in_column_comp': err['code'] in header_errors} for err in structure_errors]
    report['table']['errors']['structure'] = structure_errors
    column_comparison_needed = any(err['in_column_comp'] == True for err in structure_errors)
    column_comparison_table = []
    if column_comparison_needed:
        column_comparison_table = []
        field_names = [f['name'] for f in schema_fields]
162
        has_case_errors = False
Pierre Dittgen's avatar
Pierre Dittgen committed
163 164
        for t in itertools.zip_longest(headers, field_names, fillvalue=''):
            status = 'ok' if t[0] == t[1] else 'ko'
165 166
            if not has_case_errors and status == 'ko' and t[0].lower() == t[1].lower():
                has_case_errors = True
Pierre Dittgen's avatar
Pierre Dittgen committed
167
            column_comparison_table.append((*t, status))
168 169 170 171 172
        info = {}
        info['table'] = column_comparison_table
        info['has_missing'] = len(headers) < len(field_names)
        info['has_case_errors'] = has_case_errors
        report['table']['column_comparison_info'] = info
Pierre Dittgen's avatar
Pierre Dittgen committed
173 174
    report['table']['column_comparison_needed'] = column_comparison_needed

Pierre Dittgen's avatar
Pierre Dittgen committed
175 176 177 178
    # Group body errors by row id
    rows = []
    current_row_id = 0
    for err in report['table']['errors']['body']:
179 180
        if not 'row-number' in err:
            print('ERR', err)
Pierre Dittgen's avatar
Pierre Dittgen committed
181 182 183 184 185 186 187 188 189 190 191 192 193
        row_id = err['row-number']
        del err['row-number']
        del err['context']
        if row_id != current_row_id:
            current_row_id = row_id
            rows.append({'row_id': current_row_id, 'errors': {}})

        column_id = err.get('column-number')
        if column_id is not None:
            del err['column-number']
            rows[-1]['errors'][column_id] = err
        else:
            rows[-1]['errors']['row'] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
194
    report['table']['errors']['body_by_rows'] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
195

196 197 198 199 200 201 202 203 204
    # Sort by error names in statistics
    stats = report['table']['error-stats']
    code_title_map = messages.ERROR_MESSAGE_DEFAULT_TITLE
    for key in ('structure-errors', 'value-errors'):
        # convert dict into tuples with french title instead of error code
        # and sorts by title
        stats[key]['count-by-code'] = sorted([(code_title_map.get(k, k), v) for k, v in stats[key]['count-by-code'].items()],
                                             key=itemgetter(0))

Pierre Dittgen's avatar
Pierre Dittgen committed
205 206 207
    return report


208
def validate(schema_code, source: ValidataSource):
209 210
    """ Validate source and display report """

211
    try:
Pierre Dittgen's avatar
Pierre Dittgen committed
212
        validata_core_report = ValidatorHelper.validate(
213 214
            schema_code=schema_code,
            force_strings=True,
215
            **source.get_tabulator_params()
216
        )
217 218 219 220 221 222

        # Validator.validate() doesn't throw FormatError if source.format is None
        # Just do it manually
        if source.format is None:
            raise tabulator.exceptions.FormatError()

223 224 225
    except tabulator.exceptions.FormatError:
        flash_error('Erreur : format de fichier non supporté')
        return redirect(url_for('scdl_validator', val_code=schema_code))
226

227 228 229 230 231 232 233 234
    source_errors = [err for err in validata_core_report['tables'][0]['errors'] if err['code'] == 'source-error']
    if source_errors:
        err = source_errors[0]
        msg = "l'encodage du fichier est invalide. Veuillez le corriger" if 'charmap' in err[
            'message'] else err['message']
        flash_error('Erreur de source : {}'.format(msg))
        return redirect(url_for('scdl_validator', val_code=schema_code))

235
    source_data = extract_source_data(source)
236

Pierre Dittgen's avatar
Pierre Dittgen committed
237
    # handle report date
238
    report_datetime = datetime.fromisoformat(validata_core_report['date']).astimezone()
239

Pierre Dittgen's avatar
Pierre Dittgen committed
240 241
    # Enhance validata_core_report
    validata_report = create_validata_ui_report(validata_core_report, ValidatorHelper.schema(schema_code).descriptor)
Pierre Dittgen's avatar
Pierre Dittgen committed
242

Pierre Dittgen's avatar
Pierre Dittgen committed
243
    # Display report to the user
Pierre Dittgen's avatar
Pierre Dittgen committed
244
    val_info = ValidatorHelper.schema_info(schema_code)
245
    return render_template('validation_report.html', title='Rapport de validation',
246
                           val_info=ValidatorHelper.schema_info(schema_code), report=validata_report,
247
                           validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'),
248
                           source=source, source_type=source.type, source_data=source_data,
249
                           print_mode=request.args.get('print', 'false') == 'true',
250
                           report_str=json.dumps(validata_report, sort_keys=True, indent=2),
Pierre Dittgen's avatar
Pierre Dittgen committed
251
                           breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'},
Pierre Dittgen's avatar
Pierre Dittgen committed
252 253
                                        {'url': url_for('scdl_validator', val_code=schema_code),
                                         'title': val_info['title']}])
254 255


256 257 258 259 260 261 262 263
def bytes_data(f):
    """ Gets bytes data from Werkzeug FileStorage instance """
    iob = BytesIO()
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


264 265 266 267 268 269
# Routes


@app.route('/')
def home():
    """ Home page """
Pierre Dittgen's avatar
Pierre Dittgen committed
270
    validators = ValidatorHelper.schema_info_list()
Christophe Benz's avatar
Christophe Benz committed
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
    external_validators = [
        {
            "title": "INSPIRE",
            "description": "proposé par la Commission Européenne pour tester des ressources géographiques (données, services ou métadonnées)",
            "url": "http://inspire-sandbox.jrc.ec.europa.eu/validator/"
        },
        {
            "title": "BAL",
            "description": "proposé par la mission Etalab pour tester des données Adresse produites localement (format BAL 1.1)",
            "url": "https://adresse.data.gouv.fr/bases-locales/validateur"
        },
        {
            "title": "CVDTC",
            "description": "proposé par l'AFIMB dans le cadre du projet Chouette pour tester des données de transport collectif (GTFS ou NEPTUNE)",
            "url": "http://www.conversion-validation-donnees-tc.org/"
        },
    ]
Christophe Benz's avatar
Christophe Benz committed
288
    flash_warning('Ce service est fourni en mode beta - certains problèmes peuvent subsister - nous mettons tout en œuvre pour améliorer son fonctionnement en continu.')
Christophe Benz's avatar
Christophe Benz committed
289
    return render_template('home.html', title='Accueil', validators=validators, external_validators=external_validators)
290 291 292 293 294 295 296 297


@app.route('/validators')
def validators():
    """ No validators page """
    return redirect(url_for('home'))


298 299 300
@app.route('/pdf/<val_code>')
def pdf_report(val_code):
    """PDF report generation"""
301
    err_prefix = 'Erreur de génération du rapport PDF'
302 303

    if not ValidatorHelper.schema_exist(val_code):
304 305
        flash_error(err_prefix + ': schéma inconnu')
        return redirect(url_for('scdl_validator', val_code=val_code))
306 307 308

    url_param = request.args.get('url')
    if not url_param:
309 310
        flash_error(err_prefix + ': URL non fournie')
        return redirect(url_for('scdl_validator', val_code=val_code))
311 312 313 314

    validation_url = '{}?input=url&print=true&url={}'.format(url_for('scdl_validator', val_code=val_code, _external=True),
                                                             quote_plus(url_param))

315 316
    with tempfile.NamedTemporaryFile(prefix='validata_{}_report_'.format(val_code), suffix='.pdf') as tmpfile:
        tmp_pdf_report = Path(tmpfile.name)
317 318 319 320 321

    cmd = ['chromium', '--headless', '--disable-gpu',
           '--print-to-pdf={}'.format(str(tmp_pdf_report)), validation_url]
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if result.returncode != 0:
322 323
        flash_error(err_prefix)
        log.error("Command %r returned an error: %r", cmd, result.stdout.decode('utf-8'))
324 325
        if tmp_pdf_report.exists():
            tmp_pdf_report.unlink()
326
        return redirect(url_for('scdl_validator', val_code=val_code))
327

Pierre Dittgen's avatar
Pierre Dittgen committed
328 329
    pdf_filename = 'Rapport de validation {} {}.pdf'.format(val_code,
                                                            datetime.now().strftime('%d-%m-%Y %Hh%M'))
330
    response = make_response(tmp_pdf_report.read_bytes())
Pierre Dittgen's avatar
Pierre Dittgen committed
331
    response.headers.set('Content-disposition', 'attachment', filename=pdf_filename)
332 333 334 335 336 337 338 339
    response.headers.set('Content-type', 'application/pdf')
    response.headers.set('Content-length', tmp_pdf_report.stat().st_size)

    tmp_pdf_report.unlink()

    return response


340 341 342 343 344 345 346 347 348 349
@app.route('/validators/<val_code>', methods=['GET', 'POST'])
def scdl_validator(val_code):
    """ Validator page """

    if not ValidatorHelper.schema_exist(val_code):
        flash_error('Validateur [{}] inconnu'.format(val_code))
        return redirect(url_for('home'))

    if request.method == 'GET':

Pierre Dittgen's avatar
Pierre Dittgen committed
350
        val_info = ValidatorHelper.schema_info(val_code)
351 352 353 354
        input_param = request.args.get('input')

        # First form display
        if input_param is None or input_param not in ('url', 'example'):
355
            return render_template('validation_form.html', title=val_info['title'],
356 357 358 359 360
                                   val_info=val_info,
                                   breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'}, ])

        # Process URL
        else:
361 362
            url_param = request.args.get('url')
            if url_param is None or url_param == '':
363 364
                flash_error("Vous n'avez pas indiqué d'url à valider")
                return redirect(url_for('scdl_validator', val_code=val_code))
365 366
            try:
                return validate(val_code, ValidataSource('url', url_param, url_param))
367 368 369 370
            except tabulator.exceptions.FormatError as e:
                flash_error('Erreur : Format de ressource non supporté')
                log.info(e)
                return redirect(url_for('scdl_validator', val_code=val_code))
371 372 373 374
            except tabulator.exceptions.HTTPError as e:
                flash_error('Erreur : impossible d\'accéder au fichier source en ligne')
                log.info(e)
                return redirect(url_for('scdl_validator', val_code=val_code))
375 376 377 378

    else:  # POST
        input_param = request.form.get('input')
        if input_param is None:
379
            flash_error('Aucun fichier à valider')
380 381 382 383 384 385 386 387
            return redirect(url_for('scdl_validator', val_code=val_code))

        # File validation
        if input_param == 'file':
            f = request.files.get('file')
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
                return redirect(url_for('scdl_validator', val_code=val_code))
388

389
            return validate(val_code, ValidataSource('file', f.filename, bytes_data(f)))
390 391

        return 'Bizarre, vous avez dit bizarre ?'