views.py 15.2 KB
Newer Older
1
2
3
"""
    Routes
"""
4
import copy
Pierre Dittgen's avatar
Pierre Dittgen committed
5
import itertools
6
import json
Christophe Benz's avatar
Christophe Benz committed
7
import logging
8
import subprocess
Christophe Benz's avatar
Christophe Benz committed
9
import tempfile
10
from datetime import datetime
11
from io import BytesIO
12
from operator import itemgetter
13
14
from pathlib import Path
from urllib.parse import quote_plus
15

16
from backports.datetime_fromisoformat import MonkeyPatch
Pierre Dittgen's avatar
Pierre Dittgen committed
17
from commonmark import commonmark
18
from flask import make_response, redirect, render_template, request, url_for
19

20
import tabulator
21
from validata_core import csv_helpers, messages
Pierre Dittgen's avatar
Pierre Dittgen committed
22
from validata_core.loaders import custom_loaders
23
from validata_ui import app
Pierre Dittgen's avatar
Pierre Dittgen committed
24
from validata_ui.ui_util import flash_error, flash_warning
25
from validata_ui.validata_util import ValidataSource
26
from validata_ui.validate_helper import ValidatorHelper
27

28
29
MonkeyPatch.patch_fromisoformat()

Christophe Benz's avatar
Christophe Benz committed
30
31
log = logging.getLogger(__name__)

32
33

def extract_source_data(source: ValidataSource, preview_rows_nb=5):
34
    """ Computes table preview """
35
36
37

    def stringify(val):
        """ Transform value into string """
Pierre Dittgen's avatar
Pierre Dittgen committed
38
        return '' if val is None else str(val)
39

40
41
    header = None
    rows = []
Pierre Dittgen's avatar
Pierre Dittgen committed
42
    nb_rows = 0
43

44
    options = {}
45
    if source.format == "csv":
46
        options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
47
                                                          custom_loaders=custom_loaders).delimiter
48
    with tabulator.Stream(source.source, format=source.format, scheme=source.scheme, custom_loaders=custom_loaders,
49
                          **options) as stream:
50
51
        for row in stream:
            if header is None:
52
                header = ['' if v is None else v for v in row]
53
            else:
54
                rows.append(list(map(stringify, row)))
Pierre Dittgen's avatar
Pierre Dittgen committed
55
                nb_rows += 1
56
    preview_rows_nb = min(preview_rows_nb, nb_rows)
57
58
    return {'header': header,
            'rows_nb': nb_rows,
59
60
61
            'data_rows': rows,
            'preview_rows_nb': preview_rows_nb,
            'preview_rows': rows[:preview_rows_nb]}
62
63


Pierre Dittgen's avatar
Pierre Dittgen committed
64
65
def improve_errors(errors):
    """Add context to errors, converts markdown content to HTML"""
66

Pierre Dittgen's avatar
Pierre Dittgen committed
67
68
    def improve_err(err):
        """Adds context info based on row-nb presence and converts content to HTML"""
69

Pierre Dittgen's avatar
Pierre Dittgen committed
70
71
72
73
        # Context
        update_keys = {
            'context': 'body' if 'row-number' in err and not err['row-number'] is None else 'table',
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
74

Pierre Dittgen's avatar
Pierre Dittgen committed
75
        # markdown to HTML (with default values for 'title' and 'content')
Pierre Dittgen's avatar
Pierre Dittgen committed
76

Pierre Dittgen's avatar
Pierre Dittgen committed
77
78
79
        # Set default title if no title
        if not 'title' in err:
            update_keys['title'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
80

Pierre Dittgen's avatar
Pierre Dittgen committed
81
82
83
84
        # Convert message to markdown only if no content
        # => for pre-checks errors
        if 'message' in err and not 'content' in err:
            update_keys['message'] = commonmark(err['message'])
Pierre Dittgen's avatar
Pierre Dittgen committed
85

Pierre Dittgen's avatar
Pierre Dittgen committed
86
87
88
        # Else, default message
        elif not 'message' in err or err['message'] is None:
            update_keys['message'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
89

Pierre Dittgen's avatar
Pierre Dittgen committed
90
91
92
        # Message content
        md_content = '*content soon available*' if not 'content' in err else err['content']
        update_keys['content'] = commonmark(md_content)
93

Pierre Dittgen's avatar
Pierre Dittgen committed
94
        return {**err, **update_keys}
95

Pierre Dittgen's avatar
Pierre Dittgen committed
96
    return list(map(improve_err, errors))
97
98


Pierre Dittgen's avatar
Pierre Dittgen committed
99
def create_validata_ui_report(validata_core_report, schema):
100
101
102
103
104
105
    """ Creates an error report easier to handle and display in templates:
        - only one table
        - errors are contextualized
        - error-counts is ok
        - errors are grouped by lines
        - errors are separated into "structure" and "body"
106
        - error messages are improved
107
    """
Pierre Dittgen's avatar
Pierre Dittgen committed
108
    report = copy.deepcopy(validata_core_report)
109
110
111
112
113
114
115
116
117

    # One table is enough
    del report['table-count']
    report['table'] = report['tables'][0]
    del report['tables']
    del report['table']['error-count']
    del report['table']['time']
    del report['table']['valid']
    del report['valid']
118
119
120
    # use _ instead of - to ease information picking in jinja2 template
    report['table']['row_count'] = report['table']['row-count']

Pierre Dittgen's avatar
Pierre Dittgen committed
121
122
123
124
    # Handy col_count info
    headers = report['table'].get('headers', [])
    report['table']['col_count'] = len(headers)

Pierre Dittgen's avatar
Pierre Dittgen committed
125
    # Computes column info
126
    schema_fields = schema.get('fields', [])
Pierre Dittgen's avatar
Pierre Dittgen committed
127
128
129
130
    fields_dict = {f['name']: (f.get('title', 'titre non défini'), f.get('description', '')) for f in schema_fields}
    report['table']['headers_title'] = [fields_dict[h][0] if h in fields_dict else 'colonne inconnue' for h in headers]
    report['table']['headers_description'] = [fields_dict[h][1]
                                              if h in fields_dict else 'Cette colonne n\'est pas définie dans le schema' for h in headers]
131

132
    # Provide better (french) messages
Pierre Dittgen's avatar
Pierre Dittgen committed
133
134
    errors = improve_errors(report['table']['errors'])
    del report['table']['errors']
135

136
137
138
139
140
141
142
    # Count errors
    report['error_count'] = len(errors)
    del report['error-count']

    # Then group them in 2 groups : structure and body
    report['table']['errors'] = {'structure': [], 'body': []}
    for err in errors:
143
        if err['tag'] == 'structure':
144
145
146
147
            report['table']['errors']['structure'].append(err)
        else:
            report['table']['errors']['body'].append(err)

Pierre Dittgen's avatar
Pierre Dittgen committed
148
    # Checks if there are structure errors different to invalid-column-delimiter
Pierre Dittgen's avatar
Pierre Dittgen committed
149
150
151
    structure_errors = report['table']['errors']['structure']
    report['table']['do_display_body_errors'] = len(structure_errors) == 0 or \
        all(err['code'] == 'invalid-column-delimiter' for err in structure_errors)
152

Pierre Dittgen's avatar
Pierre Dittgen committed
153
154
155
156
157
158
159
160
161
    # Checks if a column comparison is needed
    header_errors = ('missing-headers', 'extra-headers', 'wrong-headers-order')
    structure_errors = [{**err, 'in_column_comp': err['code'] in header_errors} for err in structure_errors]
    report['table']['errors']['structure'] = structure_errors
    column_comparison_needed = any(err['in_column_comp'] == True for err in structure_errors)
    column_comparison_table = []
    if column_comparison_needed:
        column_comparison_table = []
        field_names = [f['name'] for f in schema_fields]
162
        has_case_errors = False
Pierre Dittgen's avatar
Pierre Dittgen committed
163
164
        for t in itertools.zip_longest(headers, field_names, fillvalue=''):
            status = 'ok' if t[0] == t[1] else 'ko'
165
166
            if not has_case_errors and status == 'ko' and t[0].lower() == t[1].lower():
                has_case_errors = True
Pierre Dittgen's avatar
Pierre Dittgen committed
167
            column_comparison_table.append((*t, status))
168
169
170
171
172
        info = {}
        info['table'] = column_comparison_table
        info['has_missing'] = len(headers) < len(field_names)
        info['has_case_errors'] = has_case_errors
        report['table']['column_comparison_info'] = info
Pierre Dittgen's avatar
Pierre Dittgen committed
173
174
    report['table']['column_comparison_needed'] = column_comparison_needed

Pierre Dittgen's avatar
Pierre Dittgen committed
175
176
177
178
    # Group body errors by row id
    rows = []
    current_row_id = 0
    for err in report['table']['errors']['body']:
179
180
        if not 'row-number' in err:
            print('ERR', err)
Pierre Dittgen's avatar
Pierre Dittgen committed
181
182
183
184
185
186
187
188
189
190
191
192
193
        row_id = err['row-number']
        del err['row-number']
        del err['context']
        if row_id != current_row_id:
            current_row_id = row_id
            rows.append({'row_id': current_row_id, 'errors': {}})

        column_id = err.get('column-number')
        if column_id is not None:
            del err['column-number']
            rows[-1]['errors'][column_id] = err
        else:
            rows[-1]['errors']['row'] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
194
    report['table']['errors']['body_by_rows'] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
195

196
197
198
199
200
201
202
203
204
    # Sort by error names in statistics
    stats = report['table']['error-stats']
    code_title_map = messages.ERROR_MESSAGE_DEFAULT_TITLE
    for key in ('structure-errors', 'value-errors'):
        # convert dict into tuples with french title instead of error code
        # and sorts by title
        stats[key]['count-by-code'] = sorted([(code_title_map.get(k, k), v) for k, v in stats[key]['count-by-code'].items()],
                                             key=itemgetter(0))

Pierre Dittgen's avatar
Pierre Dittgen committed
205
206
207
    return report


208
def validate(schema_code, source: ValidataSource):
209
210
    """ Validate source and display report """

Pierre Dittgen's avatar
Pierre Dittgen committed
211
    try:
Pierre Dittgen's avatar
Pierre Dittgen committed
212
        validata_core_report = ValidatorHelper.validate(
213
214
215
216
            schema_code=schema_code,
            force_strings=True,
            **source.get_tabulator_params(),
        )
Pierre Dittgen's avatar
Pierre Dittgen committed
217
218
219
    except tabulator.exceptions.FormatError:
        flash_error('Erreur : format de fichier non supporté')
        return redirect(url_for('scdl_validator', val_code=schema_code))
220

221
222
223
224
225
226
227
228
229
    # source_errors = [err for err in validata_core_report['tables'][0]['errors'] if err['code'] == 'source-error']
    source_errors = [err for err in validata_core_report['tables'][0]['errors'] if err['code'] == 'source-error']
    if source_errors:
        err = source_errors[0]
        msg = "l'encodage du fichier est invalide. Veuillez le corriger" if 'charmap' in err[
            'message'] else err['message']
        flash_error('Erreur de source : {}'.format(msg))
        return redirect(url_for('scdl_validator', val_code=schema_code))

230
    source_data = extract_source_data(source)
231

Pierre Dittgen's avatar
Pierre Dittgen committed
232
    # handle report date
233
    report_datetime = datetime.fromisoformat(validata_core_report['date']).astimezone()
234

Pierre Dittgen's avatar
Pierre Dittgen committed
235
236
    # Enhance validata_core_report
    validata_report = create_validata_ui_report(validata_core_report, ValidatorHelper.schema(schema_code).descriptor)
Pierre Dittgen's avatar
Pierre Dittgen committed
237

Pierre Dittgen's avatar
Pierre Dittgen committed
238
    # Display report to the user
Pierre Dittgen's avatar
Pierre Dittgen committed
239
    val_info = ValidatorHelper.schema_info(schema_code)
240
    return render_template('validation_report.html', title='Rapport de validation',
241
                           val_info=ValidatorHelper.schema_info(schema_code), report=validata_report,
242
                           validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'),
243
                           source=source, source_type=source.type, source_data=source_data,
244
                           print_mode=request.args.get('print', 'false') == 'true',
245
                           report_str=json.dumps(validata_report, sort_keys=True, indent=2),
Pierre Dittgen's avatar
Pierre Dittgen committed
246
                           breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'},
Pierre Dittgen's avatar
Pierre Dittgen committed
247
248
                                        {'url': url_for('scdl_validator', val_code=schema_code),
                                         'title': val_info['title']}])
249
250


251
252
253
254
255
256
257
258
def bytes_data(f):
    """ Gets bytes data from Werkzeug FileStorage instance """
    iob = BytesIO()
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


259
260
261
262
263
264
# Routes


@app.route('/')
def home():
    """ Home page """
Pierre Dittgen's avatar
Pierre Dittgen committed
265
    validators = ValidatorHelper.schema_info_list()
Christophe Benz's avatar
Christophe Benz committed
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
    external_validators = [
        {
            "title": "INSPIRE",
            "description": "proposé par la Commission Européenne pour tester des ressources géographiques (données, services ou métadonnées)",
            "url": "http://inspire-sandbox.jrc.ec.europa.eu/validator/"
        },
        {
            "title": "BAL",
            "description": "proposé par la mission Etalab pour tester des données Adresse produites localement (format BAL 1.1)",
            "url": "https://adresse.data.gouv.fr/bases-locales/validateur"
        },
        {
            "title": "CVDTC",
            "description": "proposé par l'AFIMB dans le cadre du projet Chouette pour tester des données de transport collectif (GTFS ou NEPTUNE)",
            "url": "http://www.conversion-validation-donnees-tc.org/"
        },
    ]
Christophe Benz's avatar
Christophe Benz committed
283
    flash_warning('Ce service est fourni en mode beta - certains problèmes peuvent subsister - nous mettons tout en œuvre pour améliorer son fonctionnement en continu.')
Christophe Benz's avatar
Christophe Benz committed
284
    return render_template('home.html', title='Accueil', validators=validators, external_validators=external_validators)
285
286
287
288
289
290
291
292


@app.route('/validators')
def validators():
    """ No validators page """
    return redirect(url_for('home'))


293
294
295
@app.route('/pdf/<val_code>')
def pdf_report(val_code):
    """PDF report generation"""
296
    err_prefix = 'Erreur de génération du rapport PDF'
297
298

    if not ValidatorHelper.schema_exist(val_code):
299
300
        flash_error(err_prefix + ': schéma inconnu')
        return redirect(url_for('scdl_validator', val_code=val_code))
301
302
303

    url_param = request.args.get('url')
    if not url_param:
304
305
        flash_error(err_prefix + ': URL non fournie')
        return redirect(url_for('scdl_validator', val_code=val_code))
306
307
308
309

    validation_url = '{}?input=url&print=true&url={}'.format(url_for('scdl_validator', val_code=val_code, _external=True),
                                                             quote_plus(url_param))

Christophe Benz's avatar
Christophe Benz committed
310
311
    with tempfile.NamedTemporaryFile(prefix='validata_{}_report_'.format(val_code), suffix='.pdf') as tmpfile:
        tmp_pdf_report = Path(tmpfile.name)
312
313
314
315
316

    cmd = ['chromium', '--headless', '--disable-gpu',
           '--print-to-pdf={}'.format(str(tmp_pdf_report)), validation_url]
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if result.returncode != 0:
317
318
        flash_error(err_prefix)
        log.error("Command %r returned an error: %r", cmd, result.stdout.decode('utf-8'))
319
320
        if tmp_pdf_report.exists():
            tmp_pdf_report.unlink()
321
        return redirect(url_for('scdl_validator', val_code=val_code))
322

Pierre Dittgen's avatar
Pierre Dittgen committed
323
324
    pdf_filename = 'Rapport de validation {} {}.pdf'.format(val_code,
                                                            datetime.now().strftime('%d-%m-%Y %Hh%M'))
Christophe Benz's avatar
Christophe Benz committed
325
    response = make_response(tmp_pdf_report.read_bytes())
Pierre Dittgen's avatar
Pierre Dittgen committed
326
    response.headers.set('Content-disposition', 'attachment', filename=pdf_filename)
327
328
329
330
331
332
333
334
    response.headers.set('Content-type', 'application/pdf')
    response.headers.set('Content-length', tmp_pdf_report.stat().st_size)

    tmp_pdf_report.unlink()

    return response


335
336
337
338
339
340
341
342
343
344
@app.route('/validators/<val_code>', methods=['GET', 'POST'])
def scdl_validator(val_code):
    """ Validator page """

    if not ValidatorHelper.schema_exist(val_code):
        flash_error('Validateur [{}] inconnu'.format(val_code))
        return redirect(url_for('home'))

    if request.method == 'GET':

Pierre Dittgen's avatar
Pierre Dittgen committed
345
        val_info = ValidatorHelper.schema_info(val_code)
346
347
348
349
        input_param = request.args.get('input')

        # First form display
        if input_param is None or input_param not in ('url', 'example'):
350
            return render_template('validation_form.html', title=val_info['title'],
351
352
353
354
355
                                   val_info=val_info,
                                   breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'}, ])

        # Process URL
        else:
356
357
            url_param = request.args.get('url')
            if url_param is None or url_param == '':
358
359
                flash_error("Vous n'avez pas indiqué d'url à valider")
                return redirect(url_for('scdl_validator', val_code=val_code))
360
361
362
363
364
365
            try:
                return validate(val_code, ValidataSource('url', url_param, url_param))
            except tabulator.exceptions.HTTPError as e:
                flash_error('Erreur : impossible d\'accéder au fichier source en ligne')
                log.info(e)
                return redirect(url_for('scdl_validator', val_code=val_code))
366
367
368
369

    else:  # POST
        input_param = request.form.get('input')
        if input_param is None:
Pierre Dittgen's avatar
Pierre Dittgen committed
370
            flash_error('Aucun fichier à valider')
371
372
373
374
375
376
377
378
            return redirect(url_for('scdl_validator', val_code=val_code))

        # File validation
        if input_param == 'file':
            f = request.files.get('file')
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
                return redirect(url_for('scdl_validator', val_code=val_code))
379

380
            return validate(val_code, ValidataSource('file', f.filename, bytes_data(f)))
381
382

        return 'Bizarre, vous avez dit bizarre ?'