views.py 13.3 KB
Newer Older
1 2 3 4
#!/usr/bin/env python3
"""
    Routes
"""
5
import copy
Pierre Dittgen's avatar
Pierre Dittgen committed
6
import itertools
7
import json
8 9
import subprocess
import time
10
from datetime import datetime
11
from io import BytesIO
12
from operator import itemgetter
13 14
from pathlib import Path
from urllib.parse import quote_plus
15

16
from backports.datetime_fromisoformat import MonkeyPatch
Pierre Dittgen's avatar
Pierre Dittgen committed
17
from commonmark import commonmark
18
from flask import make_response, redirect, render_template, request, url_for
19

20
import tabulator
21
from validata_core import csv_helpers, messages
Pierre Dittgen's avatar
Pierre Dittgen committed
22
from validata_core.loaders import custom_loaders
23
from validata_ui import app
Pierre Dittgen's avatar
Pierre Dittgen committed
24
from validata_ui.ui_util import flash_error, flash_warning
25
from validata_ui.validata_util import ValidataSource
26
from validata_ui.validate_helper import ValidatorHelper
27

28 29
MonkeyPatch.patch_fromisoformat()

30 31

def extract_source_data(source: ValidataSource, preview_rows_nb=5):
32
    """ Computes table preview """
33 34 35

    def stringify(val):
        """ Transform value into string """
Pierre Dittgen's avatar
Pierre Dittgen committed
36
        return '' if val is None else str(val)
37

38 39
    header = None
    rows = []
Pierre Dittgen's avatar
Pierre Dittgen committed
40
    nb_rows = 0
41

42
    options = {}
43
    if source.format == "csv":
44
        options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
45
                                                          custom_loaders=custom_loaders).delimiter
46
    with tabulator.Stream(source.source, format=source.format, scheme=source.scheme, custom_loaders=custom_loaders,
47
                          **options) as stream:
48 49
        for row in stream:
            if header is None:
50
                header = ['' if v is None else v for v in row]
51
            else:
52
                rows.append(list(map(stringify, row)))
Pierre Dittgen's avatar
Pierre Dittgen committed
53
                nb_rows += 1
54
    preview_rows_nb = min(preview_rows_nb, nb_rows)
55 56
    return {'header': header,
            'rows_nb': nb_rows,
57 58 59
            'data_rows': rows,
            'preview_rows_nb': preview_rows_nb,
            'preview_rows': rows[:preview_rows_nb]}
60 61


Pierre Dittgen's avatar
Pierre Dittgen committed
62 63
def improve_errors(errors):
    """Add context to errors, converts markdown content to HTML"""
64

Pierre Dittgen's avatar
Pierre Dittgen committed
65 66
    def improve_err(err):
        """Adds context info based on row-nb presence and converts content to HTML"""
67

Pierre Dittgen's avatar
Pierre Dittgen committed
68 69 70 71
        # Context
        update_keys = {
            'context': 'body' if 'row-number' in err and not err['row-number'] is None else 'table',
        }
Pierre Dittgen's avatar
Pierre Dittgen committed
72

Pierre Dittgen's avatar
Pierre Dittgen committed
73
        # markdown to HTML (with default values for 'title' and 'content')
Pierre Dittgen's avatar
Pierre Dittgen committed
74

Pierre Dittgen's avatar
Pierre Dittgen committed
75 76 77
        # Set default title if no title
        if not 'title' in err:
            update_keys['title'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
78

Pierre Dittgen's avatar
Pierre Dittgen committed
79 80 81 82
        # Convert message to markdown only if no content
        # => for pre-checks errors
        if 'message' in err and not 'content' in err:
            update_keys['message'] = commonmark(err['message'])
Pierre Dittgen's avatar
Pierre Dittgen committed
83

Pierre Dittgen's avatar
Pierre Dittgen committed
84 85 86
        # Else, default message
        elif not 'message' in err or err['message'] is None:
            update_keys['message'] = '[{}]'.format(err['code'])
Pierre Dittgen's avatar
Pierre Dittgen committed
87

Pierre Dittgen's avatar
Pierre Dittgen committed
88 89 90
        # Message content
        md_content = '*content soon available*' if not 'content' in err else err['content']
        update_keys['content'] = commonmark(md_content)
91

Pierre Dittgen's avatar
Pierre Dittgen committed
92
        return {**err, **update_keys}
93

Pierre Dittgen's avatar
Pierre Dittgen committed
94
    return list(map(improve_err, errors))
95 96


Pierre Dittgen's avatar
Pierre Dittgen committed
97
def create_validata_ui_report(validata_core_report, schema):
98 99 100 101 102 103
    """ Creates an error report easier to handle and display in templates:
        - only one table
        - errors are contextualized
        - error-counts is ok
        - errors are grouped by lines
        - errors are separated into "structure" and "body"
104
        - error messages are improved
105
    """
Pierre Dittgen's avatar
Pierre Dittgen committed
106
    report = copy.deepcopy(validata_core_report)
107 108 109 110 111 112 113 114 115

    # One table is enough
    del report['table-count']
    report['table'] = report['tables'][0]
    del report['tables']
    del report['table']['error-count']
    del report['table']['time']
    del report['table']['valid']
    del report['valid']
116 117 118
    # use _ instead of - to ease information picking in jinja2 template
    report['table']['row_count'] = report['table']['row-count']

Pierre Dittgen's avatar
Pierre Dittgen committed
119 120 121 122
    # Handy col_count info
    headers = report['table'].get('headers', [])
    report['table']['col_count'] = len(headers)

Pierre Dittgen's avatar
Pierre Dittgen committed
123
    # Computes column info
124
    schema_fields = schema.get('fields', [])
Pierre Dittgen's avatar
Pierre Dittgen committed
125 126 127 128
    fields_dict = {f['name']: (f.get('title', 'titre non défini'), f.get('description', '')) for f in schema_fields}
    report['table']['headers_title'] = [fields_dict[h][0] if h in fields_dict else 'colonne inconnue' for h in headers]
    report['table']['headers_description'] = [fields_dict[h][1]
                                              if h in fields_dict else 'Cette colonne n\'est pas définie dans le schema' for h in headers]
129

130
    # Provide better (french) messages
Pierre Dittgen's avatar
Pierre Dittgen committed
131 132
    errors = improve_errors(report['table']['errors'])
    del report['table']['errors']
133

134 135 136 137 138 139 140
    # Count errors
    report['error_count'] = len(errors)
    del report['error-count']

    # Then group them in 2 groups : structure and body
    report['table']['errors'] = {'structure': [], 'body': []}
    for err in errors:
141
        if err['tag'] == 'structure':
142 143 144 145
            report['table']['errors']['structure'].append(err)
        else:
            report['table']['errors']['body'].append(err)

Pierre Dittgen's avatar
Pierre Dittgen committed
146
    # Checks if there are structure errors different to invalid-column-delimiter
Pierre Dittgen's avatar
Pierre Dittgen committed
147 148 149
    structure_errors = report['table']['errors']['structure']
    report['table']['do_display_body_errors'] = len(structure_errors) == 0 or \
        all(err['code'] == 'invalid-column-delimiter' for err in structure_errors)
150

Pierre Dittgen's avatar
Pierre Dittgen committed
151 152 153 154 155 156 157 158 159
    # Checks if a column comparison is needed
    header_errors = ('missing-headers', 'extra-headers', 'wrong-headers-order')
    structure_errors = [{**err, 'in_column_comp': err['code'] in header_errors} for err in structure_errors]
    report['table']['errors']['structure'] = structure_errors
    column_comparison_needed = any(err['in_column_comp'] == True for err in structure_errors)
    column_comparison_table = []
    if column_comparison_needed:
        column_comparison_table = []
        field_names = [f['name'] for f in schema_fields]
160
        has_case_errors = False
Pierre Dittgen's avatar
Pierre Dittgen committed
161 162
        for t in itertools.zip_longest(headers, field_names, fillvalue=''):
            status = 'ok' if t[0] == t[1] else 'ko'
163 164
            if not has_case_errors and status == 'ko' and t[0].lower() == t[1].lower():
                has_case_errors = True
Pierre Dittgen's avatar
Pierre Dittgen committed
165
            column_comparison_table.append((*t, status))
166 167 168 169 170
        info = {}
        info['table'] = column_comparison_table
        info['has_missing'] = len(headers) < len(field_names)
        info['has_case_errors'] = has_case_errors
        report['table']['column_comparison_info'] = info
Pierre Dittgen's avatar
Pierre Dittgen committed
171 172
    report['table']['column_comparison_needed'] = column_comparison_needed

Pierre Dittgen's avatar
Pierre Dittgen committed
173 174 175 176
    # Group body errors by row id
    rows = []
    current_row_id = 0
    for err in report['table']['errors']['body']:
177 178
        if not 'row-number' in err:
            print('ERR', err)
Pierre Dittgen's avatar
Pierre Dittgen committed
179 180 181 182 183 184 185 186 187 188 189 190 191
        row_id = err['row-number']
        del err['row-number']
        del err['context']
        if row_id != current_row_id:
            current_row_id = row_id
            rows.append({'row_id': current_row_id, 'errors': {}})

        column_id = err.get('column-number')
        if column_id is not None:
            del err['column-number']
            rows[-1]['errors'][column_id] = err
        else:
            rows[-1]['errors']['row'] = err
Pierre Dittgen's avatar
Pierre Dittgen committed
192
    report['table']['errors']['body_by_rows'] = rows
Pierre Dittgen's avatar
Pierre Dittgen committed
193

194 195 196 197 198 199 200 201 202
    # Sort by error names in statistics
    stats = report['table']['error-stats']
    code_title_map = messages.ERROR_MESSAGE_DEFAULT_TITLE
    for key in ('structure-errors', 'value-errors'):
        # convert dict into tuples with french title instead of error code
        # and sorts by title
        stats[key]['count-by-code'] = sorted([(code_title_map.get(k, k), v) for k, v in stats[key]['count-by-code'].items()],
                                             key=itemgetter(0))

Pierre Dittgen's avatar
Pierre Dittgen committed
203 204 205
    return report


206
def validate(schema_code, source: ValidataSource):
207 208
    """ Validate source and display report """

209
    try:
Pierre Dittgen's avatar
Pierre Dittgen committed
210
        validata_core_report = ValidatorHelper.validate(
211 212 213 214
            schema_code=schema_code,
            force_strings=True,
            **source.get_tabulator_params(),
        )
215 216 217
    except tabulator.exceptions.FormatError:
        flash_error('Erreur : format de fichier non supporté')
        return redirect(url_for('scdl_validator', val_code=schema_code))
218 219

    source_data = extract_source_data(source)
220

Pierre Dittgen's avatar
Pierre Dittgen committed
221
    # handle report date
222
    report_datetime = datetime.fromisoformat(validata_core_report['date']).astimezone()
223

Pierre Dittgen's avatar
Pierre Dittgen committed
224 225
    # Enhance validata_core_report
    validata_report = create_validata_ui_report(validata_core_report, ValidatorHelper.schema(schema_code).descriptor)
Pierre Dittgen's avatar
Pierre Dittgen committed
226

Pierre Dittgen's avatar
Pierre Dittgen committed
227
    # Display report to the user
Pierre Dittgen's avatar
Pierre Dittgen committed
228
    val_info = ValidatorHelper.schema_info(schema_code)
229
    return render_template('validation_report.html', title='Rapport de validation',
230
                           val_info=ValidatorHelper.schema_info(schema_code), report=validata_report,
231
                           validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'),
232
                           source=source, source_type=source.type, source_data=source_data,
233
                           print_mode=request.args.get('print', 'false') == 'true',
234
                           report_str=json.dumps(validata_report, sort_keys=True, indent=2),
Pierre Dittgen's avatar
Pierre Dittgen committed
235
                           breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'},
Pierre Dittgen's avatar
Pierre Dittgen committed
236 237
                                        {'url': url_for('scdl_validator', val_code=schema_code),
                                         'title': val_info['title']}])
238 239


240 241 242 243 244 245 246 247
def bytes_data(f):
    """ Gets bytes data from Werkzeug FileStorage instance """
    iob = BytesIO()
    f.save(iob)
    iob.seek(0)
    return iob.getvalue()


248 249 250 251 252 253
# Routes


@app.route('/')
def home():
    """ Home page """
Pierre Dittgen's avatar
Pierre Dittgen committed
254
    validators = ValidatorHelper.schema_info_list()
Pierre Dittgen's avatar
Pierre Dittgen committed
255
    flash_warning('Ce service est fourni en mode beta - certains problèmes peuvent subsister - nous mettons tout en œuvre pour améliorer son fonctionnement en continu')
256 257 258 259 260 261 262 263 264
    return render_template('home.html', title='Accueil', validators=validators)


@app.route('/validators')
def validators():
    """ No validators page """
    return redirect(url_for('home'))


265 266 267
@app.route('/pdf/<val_code>')
def pdf_report(val_code):
    """PDF report generation"""
268
    err_prefix = 'Erreur de génération du rapport PDF'
269 270

    if not ValidatorHelper.schema_exist(val_code):
271 272
        flash_error(err_prefix + ': schéma inconnu')
        return redirect(url_for('scdl_validator', val_code=val_code))
273 274 275

    url_param = request.args.get('url')
    if not url_param:
276 277
        flash_error(err_prefix + ': URL non fournie')
        return redirect(url_for('scdl_validator', val_code=val_code))
278 279 280 281 282 283 284 285 286 287 288

    validation_url = '{}?input=url&print=true&url={}'.format(url_for('scdl_validator', val_code=val_code, _external=True),
                                                             quote_plus(url_param))

    # temporary pdf_report.pdf filename
    tmp_pdf_report = Path('/tmp') / 'validata_{}_{}_pdf_report.pdf'.format(val_code, str(time.time()))

    cmd = ['chromium', '--headless', '--disable-gpu',
           '--print-to-pdf={}'.format(str(tmp_pdf_report)), validation_url]
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if result.returncode != 0:
289 290
        flash_error(err_prefix)
        log.error("Command %r returned an error: %r", cmd, result.stdout.decode('utf-8'))
291 292
        if tmp_pdf_report.exists():
            tmp_pdf_report.unlink()
293
        return redirect(url_for('scdl_validator', val_code=val_code))
294 295 296 297 298 299 300 301 302 303 304

    response = make_response(tmp_pdf_report.open('rb').read())
    response.headers.set('Content-disposition', 'attachment', filename='report.pdf')
    response.headers.set('Content-type', 'application/pdf')
    response.headers.set('Content-length', tmp_pdf_report.stat().st_size)

    tmp_pdf_report.unlink()

    return response


305 306 307 308 309 310 311 312 313 314
@app.route('/validators/<val_code>', methods=['GET', 'POST'])
def scdl_validator(val_code):
    """ Validator page """

    if not ValidatorHelper.schema_exist(val_code):
        flash_error('Validateur [{}] inconnu'.format(val_code))
        return redirect(url_for('home'))

    if request.method == 'GET':

Pierre Dittgen's avatar
Pierre Dittgen committed
315
        val_info = ValidatorHelper.schema_info(val_code)
316 317 318 319
        input_param = request.args.get('input')

        # First form display
        if input_param is None or input_param not in ('url', 'example'):
320
            return render_template('validation_form.html', title=val_info['title'],
321 322 323 324 325
                                   val_info=val_info,
                                   breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'}, ])

        # Process URL
        else:
326 327
            url_param = request.args.get('url')
            if url_param is None or url_param == '':
328 329
                flash_error("Vous n'avez pas indiqué d'url à valider")
                return redirect(url_for('scdl_validator', val_code=val_code))
330
            return validate(val_code, ValidataSource('url', url_param, url_param))
331 332 333 334

    else:  # POST
        input_param = request.form.get('input')
        if input_param is None:
335
            flash_error('Aucun fichier à valider')
336 337 338 339 340 341 342 343
            return redirect(url_for('scdl_validator', val_code=val_code))

        # File validation
        if input_param == 'file':
            f = request.files.get('file')
            if f is None:
                flash_warning("Vous n'avez pas indiqué de fichier à valider")
                return redirect(url_for('scdl_validator', val_code=val_code))
344

345
            return validate(val_code, ValidataSource('file', f.filename, bytes_data(f)))
346 347

        return 'Bizarre, vous avez dit bizarre ?'