Commit 84c0a3a8 authored by Pierre Dittgen's avatar Pierre Dittgen

wip

parent af1a8f31
...@@ -6,3 +6,6 @@ SECRET_KEY="dev" ...@@ -6,3 +6,6 @@ SECRET_KEY="dev"
# Comment the two following lines to disable "badge" generation. # Comment the two following lines to disable "badge" generation.
BADGE_CONFIG_URL="https://git.opendatafrance.net/validata/validata-badge/raw/master/badge_conf.toml" BADGE_CONFIG_URL="https://git.opendatafrance.net/validata/validata-badge/raw/master/badge_conf.toml"
SHIELDS_IO_BASE_URL="https://img.shields.io/" SHIELDS_IO_BASE_URL="https://img.shields.io/"
# Validata API endpoint
API_VALIDATE_ENDPOINT=http://127.0.0.1:5600/validate
\ No newline at end of file
## 0.1.0 -> next
Non-breaking changes:
- New feature: validate a CSV against a schema URL
- UI now depends on validata-api, no more on validata-core
## 0.0.1 -> 0.1.0 ## 0.0.1 -> 0.1.0
Non-breaking changes: Non-breaking changes:
......
...@@ -5,15 +5,17 @@ from urllib.parse import quote_plus ...@@ -5,15 +5,17 @@ from urllib.parse import quote_plus
import flask import flask
import jinja2 import jinja2
import validata_core #import validata_core
# Let this import after app initialisation # Let this import after app initialisation
from . import config from . import config
from .validate_helper import ValidatorHelper #from .validate_helper import ValidatorHelper
# Schemas settings # Schemas settings
schemas_config = validata_core.get_schemas_config() # schemas_config = validata_core.get_schemas_config()
ValidatorHelper.init(schemas_config) # ValidatorHelper.init(schemas_config)
# TODO: load config.toml
# Flask things # Flask things
app = flask.Flask(__name__) app = flask.Flask(__name__)
......
...@@ -12,6 +12,9 @@ load_dotenv() ...@@ -12,6 +12,9 @@ load_dotenv()
SECRET_KEY = os.environ.get("SECRET_KEY") or None SECRET_KEY = os.environ.get("SECRET_KEY") or None
API_VALIDATE_ENDPOINT = os.environ.get("API_VALIDATE_ENDPOINT") or None
if API_VALIDATE_ENDPOINT is None:
log.error("API_VALIDATE_ENDPOINT environment variable is not set, validation is not possible")
BADGE_CONFIG_URL = os.environ.get("BADGE_CONFIG_URL") or None BADGE_CONFIG_URL = os.environ.get("BADGE_CONFIG_URL") or None
BADGE_CONFIG = None BADGE_CONFIG = None
......
{% extends "base_template.html" %} {% block title %}{{ title }}{% endblock %} {%
block head %}
{{ super() }}
{% endblock %} {% block content %}
<h1 class="my-4">Validateur personnalisé</h1>
{#
{% set cols_my_classes = 'my-md-0 my-4' %}
<div class="row">
<div class="col-md-4 {{ cols_my_classes }}">
<div class="card bg-faded">
<div class="card-body">
<h5 class="card-title">
Schéma {{ val_info.code }}
{% if val_info.version %}
<span class="badge badge-primary">{{ val_info.version }}</span>
{% else %}
<span
class="badge badge-primary"
title="Schéma en cours de développement"
>dev</span
>
{% endif %}
</h5>
<h6 class="card-subtitle mb-2 text-muted">
{{ val_info.description }}
</h6>
{% if val_info.author or val_info.contributor %}
<p class="text">
{% if val_info.author %} Auteur : {{ val_info.author }}
{% endif %} {% if val_info.contributor %}
<br />Contributeur(s) : {{ val_info.contributor }}
{% endif %}
</p>
{% endif %}
</div>
</div>
</div>
</div>
#}
<div class="row">
<div class="col-md-8 {{ cols_my_classes }}">
<!-- Tab validator -->
{#
<p class="text">Validez ici le fichier de votre choix</p>
<ul class="nav nav-tabs" id="myTab" role="tablist">
<li class="nav-item">
<a
class="nav-link active"
id="file-tab"
data-toggle="tab"
href="#file"
role="tab"
aria-controls="file"
aria-selected="true"
>Fichier</a
>
</li>
<li class="nav-item">
<a
class="nav-link"
id="url-tab"
data-toggle="tab"
href="#url"
role="tab"
aria-controls="url"
aria-selected="false"
>URL</a
>
</li>
</ul>
{% set padding_class = 'p-3' %}
<div class="tab-content" id="myTabContent">
<div
class="tab-pane fade show active {{ padding_class }}"
id="file"
role="tabpanel"
aria-labelledby="file-tab"
>#}
<h2>Valider un fichier</h2>
<div>
<form method="POST" enctype="multipart/form-data">
<input type="hidden" name="input" value="file" />
<div class="form-group">
<label for="schema">Indiquez l'URL du schema de validation</label>
<input
name="schema"
type="url"
class="form-control"
aria-describedby="urlHelp"
placeholder="https://..."
/>
<div class="form-group">
<label for="file"
>Choisissez un fichier tabulaire à valider (.xlsx, .xls, .ods,
.csv, .tsv, etc.)</label
>
<input
type="file"
class="form-control-file"
name="file"
id="file"
accept=".csv, .xls, .xlsx, .ods"
/>
</div>
<button type="submit" class="btn btn-primary">Valider</button>
</form>
</div>
<h2>Valider une URL</h2>
<div
class="tab-pane {{ padding_class }}"
id="url"
role="tabpanel"
aria-labelledby="url-tab"
>
<form method="GET">
<input type="hidden" name="input" value="url" />
<div class="form-group">
<label for="schema">Indiquez l'URL du schema de validation</label>
<input
name="schema"
type="url"
class="form-control"
id="schema"
aria-describedby="urlHelp"
placeholder="https://..."
/>
<div class="form-group">
<label for="url">Indiquez l'URL de la table à valider</label>
<input
name="url"
type="url"
class="form-control"
id="url"
aria-describedby="urlHelp"
placeholder="https://..."
/>
</div>
<button type="submit" class="btn btn-primary">Valider</button>
</form>
</div>
</div>
</div>
</div>
{% endblock %} {% block footer %} {% endblock %}
{% extends "base_template.html" %} {% extends "base_template.html" %} {% block title %}{{ title }}{% endblock %} {%
{% block title %}{{ title }}{% endblock %} block head %}
{% block head %}
{{ super() }} {{ super() }}
{% endblock %} {% endblock %} {% block content %}
{% block content %}
<h1 class="my-4">Validez vos jeux de données</h1> <h1 class="my-4">Validez vos jeux de données</h1>
<h2>Validateurs <span abbr="Socle commun des données locales">SCDL</span></h2> <h2>Validateurs <span abbr="Socle commun des données locales">SCDL</span></h2>
<div class="row my-4"> <div class="row my-4">
{% for val in validators %} {% for val in validators %}
<div class="col-sm-4 col-md-3 mb-4"> <div class="col-sm-4 col-md-3 mb-4">
<div class="card text-center h-100"> <div class="card text-center h-100">
<div class="card-body d-flex flex-column"> <div class="card-body d-flex flex-column">
<h4 class="card-title">{{ val.title }}</h4> <h4 class="card-title">{{ val.title }}</h4>
<p class="card-text">{{ val.description }}</p> <p class="card-text">{{ val.description }}</p>
<a href="{{ url_for('scdl_validator', val_code=val.code) }}" class="btn btn-primary mt-auto">Choisir</a> <a
</div> href="{{ url_for('scdl_validator', val_code=val.code) }}"
</div> class="btn btn-primary mt-auto"
>Choisir</a
>
</div>
</div> </div>
{% endfor %} </div>
{% endfor %}
</div> </div>
<h2>Validateurs externes</h2> <h2>Validateurs externes</h2>
<div class="row my-4"> <div class="row my-4">
{% for val in external_validators %} {% for val in external_validators %}
<div class="col-sm-4 col-md-3 mb-4"> <div class="col-sm-4 col-md-3 mb-4">
<div class="card text-center h-100"> <div class="card text-center h-100">
<div class="card-body d-flex flex-column"> <div class="card-body d-flex flex-column">
<h4 class="card-title">{{ val.title }}</h4> <h4 class="card-title">{{ val.title }}</h4>
<p class="card-text">{{ val.description }}</p> <p class="card-text">{{ val.description }}</p>
<a href="{{ val.url }}" class="btn btn-primary mt-auto" target="_blank"> <a href="{{ val.url }}" class="btn btn-primary mt-auto" target="_blank">
Voir Voir
<i class="fas fa-external-link-alt ml-1"></i> <i class="fas fa-external-link-alt ml-1"></i>
</a> </a>
</div> </div>
</div>
</div> </div>
{% endfor %} </div>
{% endfor %}
</div> </div>
{% endblock %} <h2>Validateur personnalisé</h2>
\ No newline at end of file <p>
<a href="{{ url_for('custom_validator') }}"
>Choisissez le schéma qui vous convient</a
>
</p>
{% endblock %}
...@@ -3,10 +3,8 @@ ...@@ -3,10 +3,8 @@
import logging import logging
from io import BytesIO from io import BytesIO
import validata_core
from validata_core.source_helpers import build_tabulator_params from validata_core.source_helpers import build_tabulator_params
from .validate_helper import ValidatorHelper
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -14,9 +12,9 @@ log = logging.getLogger(__name__) ...@@ -14,9 +12,9 @@ log = logging.getLogger(__name__)
class ValidataSource(): class ValidataSource():
""" Handy class to handle different sort of data source """ """ Handy class to handle different sort of data source """
def __init__(self, type, name, source): def __init__(self, type_, name, source):
""" Initialization """ """ Initialization """
self.type = type self.type = type_
self.name = name self.name = name
self.source = source self.source = source
...@@ -25,10 +23,17 @@ class ValidataSource(): ...@@ -25,10 +23,17 @@ class ValidataSource():
self.format = info.get('format') self.format = info.get('format')
self.scheme = info.get('scheme') self.scheme = info.get('scheme')
def get_tabulator_params(self): def is_url(self):
""" Creates source ready to be ingested by tabulator """ return self.type == 'url'
return {'source': self.source, 'format': self.format, 'scheme': self.scheme} def get_url(self):
return self.source
def get_filename(self):
return self.name
def get_filecontent(self):
return self.source
def bytes_data(f): def bytes_data(f):
......
...@@ -61,9 +61,6 @@ class ValidatorHelper: ...@@ -61,9 +61,6 @@ class ValidatorHelper:
return [cls.schema_info(code) for code in sorted(cls.schema_dict.keys())] return [cls.schema_info(code) for code in sorted(cls.schema_dict.keys())]
@classmethod @classmethod
def validate(cls, schema_code, **options): def validate(cls, schema_url, **options):
""" Try to retrieve cached schema from `schema_code`, otherwise pass `schema_code` it as-is """ """Validate"""
schema = cls.schema(schema_code) return cls.validator.validate(schema=schema_url, **options)
if schema is None:
schema = schema_code
return cls.validator.validate(schema=schema, **options)
...@@ -2,26 +2,26 @@ ...@@ -2,26 +2,26 @@
Routes Routes
""" """
import copy import copy
import io
import itertools import itertools
import json import json
import logging import logging
import subprocess import subprocess
import tempfile import tempfile
from datetime import datetime from datetime import datetime
from io import BytesIO
from operator import itemgetter from operator import itemgetter
from pathlib import Path from pathlib import Path
from urllib.parse import quote_plus from urllib.parse import quote_plus
import requests
from backports.datetime_fromisoformat import MonkeyPatch from backports.datetime_fromisoformat import MonkeyPatch
from flask import make_response, redirect, render_template, request, url_for
import tabulator
import validata_core
from commonmark import commonmark from commonmark import commonmark
from flask import make_response, redirect, render_template, request, url_for
from validata_core import compute_badge, csv_helpers, messages from validata_core import compute_badge, csv_helpers, messages
from validata_core.loaders import custom_loaders from validata_core.loaders import custom_loaders
import tabulator
from . import app, config from . import app, config
from .ui_util import flash_error, flash_warning from .ui_util import flash_error, flash_warning
from .validata_util import ValidataSource from .validata_util import ValidataSource
...@@ -43,6 +43,12 @@ def extract_source_data(source: ValidataSource, preview_rows_nb=5): ...@@ -43,6 +43,12 @@ def extract_source_data(source: ValidataSource, preview_rows_nb=5):
rows = [] rows = []
nb_rows = 0 nb_rows = 0
# if source.scheme == 'file':
# source.scheme = 'bytes'
import ipdb
ipdb.set_trace()
options = {} options = {}
if source.format == "csv": if source.format == "csv":
options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme, options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
...@@ -98,7 +104,7 @@ def improve_errors(errors): ...@@ -98,7 +104,7 @@ def improve_errors(errors):
return list(map(improve_err, errors)) return list(map(improve_err, errors))
def create_validata_ui_report(validata_core_report, schema): def create_validata_ui_report(validata_core_report, schema_dict):
""" Creates an error report easier to handle and display in templates: """ Creates an error report easier to handle and display in templates:
- only one table - only one table
- errors are contextualized - errors are contextualized
...@@ -125,8 +131,8 @@ def create_validata_ui_report(validata_core_report, schema): ...@@ -125,8 +131,8 @@ def create_validata_ui_report(validata_core_report, schema):
report['table']['col_count'] = len(headers) report['table']['col_count'] = len(headers)
# Computes column info # Computes column info
schema_fields = schema.get('fields', []) fields_dict = {f['name']: (f.get('title', 'titre non défini'), f.get('description', ''))
fields_dict = {f['name']: (f.get('title', 'titre non défini'), f.get('description', '')) for f in schema_fields} for f in schema_dict.get('fields', [])}
report['table']['headers_title'] = [fields_dict[h][0] if h in fields_dict else 'colonne inconnue' for h in headers] report['table']['headers_title'] = [fields_dict[h][0] if h in fields_dict else 'colonne inconnue' for h in headers]
report['table']['headers_description'] = [fields_dict[h][1] report['table']['headers_description'] = [fields_dict[h][1]
if h in fields_dict else 'Cette colonne n\'est pas définie dans le schema' for h in headers] if h in fields_dict else 'Cette colonne n\'est pas définie dans le schema' for h in headers]
...@@ -160,7 +166,7 @@ def create_validata_ui_report(validata_core_report, schema): ...@@ -160,7 +166,7 @@ def create_validata_ui_report(validata_core_report, schema):
column_comparison_table = [] column_comparison_table = []
if column_comparison_needed: if column_comparison_needed:
column_comparison_table = [] column_comparison_table = []
field_names = [f['name'] for f in schema_fields] field_names = [f['name'] for f in schema_dict.get('fields', [])]
has_case_errors = False has_case_errors = False
for t in itertools.zip_longest(headers, field_names, fillvalue=''): for t in itertools.zip_longest(headers, field_names, fillvalue=''):
status = 'ok' if t[0] == t[1] else 'ko' status = 'ok' if t[0] == t[1] else 'ko'
...@@ -235,28 +241,41 @@ def get_badge_url_and_message(badge): ...@@ -235,28 +241,41 @@ def get_badge_url_and_message(badge):
config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg) config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg)
def validate(schema_code, source: ValidataSource): def validate(schema_url, source: ValidataSource):
""" Validate source and display report """ """ Validate source and display report """
if config.API_VALIDATE_ENDPOINT is None:
flash_error("No Validate endpoint defined :-(")
return redirect(url_for("custom_validator"))
api_url = config.API_VALIDATE_ENDPOINT
headers = {"Accept": "application/json"}
try: try:
validata_core_report = ValidatorHelper.validate(
schema_code=schema_code,
force_strings=True,
**source.get_tabulator_params()
)
# Validator.validate() doesn't throw FormatError if source.format is None if source.is_url():
# Just do it manually params = {
if source.format is None: "schema": schema_url,
raise tabulator.exceptions.FormatError() "url": source.get_url(),
}
req = requests.get(api_url, params=params, headers=headers)
else:
files = {'file': (source.name, io.BytesIO(source.source))}
data = {'schema': schema_url}
req = requests.post(api_url, data=data, files=files, headers=headers)
except tabulator.exceptions.FormatError: if not req.ok:
flash_error('Erreur : format de fichier non supporté') flash_error("{}: :(".format(req.status_code))
return redirect(url_for('scdl_validator', val_code=schema_code)) return redirect(url_for("home"))
except validata_core.MissingHeaderError: json_response = req.json()
flash_error("Erreur : impossible d'extraire les données d'entête du fichier source") validata_core_report = json_response['report']
return redirect(url_for('scdl_validator', val_code=schema_code)) schema_dict = json_response['schema']
except requests.ConnectionError as err:
logging.exception(err)
flash_error(str(err))
return redirect(url_for('home'))
# Computes badge from report and badge configuration # Computes badge from report and badge configuration
badge = compute_badge(validata_core_report, config.BADGE_CONFIG) badge = compute_badge(validata_core_report, config.BADGE_CONFIG)
...@@ -268,7 +287,7 @@ def validate(schema_code, source: ValidataSource): ...@@ -268,7 +287,7 @@ def validate(schema_code, source: ValidataSource):
msg = "l'encodage du fichier est invalide. Veuillez le corriger" if 'charmap' in err[ msg = "l'encodage du fichier est invalide. Veuillez le corriger" if 'charmap' in err[
'message'] else err['message'] 'message'] else err['message']
flash_error('Erreur de source : {}'.format(msg)) flash_error('Erreur de source : {}'.format(msg))
return redirect(url_for('scdl_validator', val_code=schema_code)) return redirect(url_for('custom_validator'))
source_data = extract_source_data(source) source_data = extract_source_data(source)
...@@ -276,25 +295,24 @@ def validate(schema_code, source: ValidataSource): ...@@ -276,25 +295,24 @@ def validate(schema_code, source: ValidataSource):
report_datetime = datetime.fromisoformat(validata_core_report['date']).astimezone() report_datetime = datetime.fromisoformat(validata_core_report['date']).astimezone()
# Enhance validata_core_report # Enhance validata_core_report
validata_report = create_validata_ui_report(validata_core_report, ValidatorHelper.schema(schema_code).descriptor) validata_report = create_validata_ui_report(validata_core_report, schema_dict)
# Display report to the user # Display report to the user
val_info = ValidatorHelper.schema_info(schema_code)
return render_template('validation_report.html', title='Rapport de validation', return render_template('validation_report.html', title='Rapport de validation',
val_info=ValidatorHelper.schema_info(schema_code), report=validata_report, val_info={}, report=validata_report,
validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'), validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'),
source=source, source_type=source.type, source_data=source_data, source=source, source_type=source.type, source_data=source_data,
print_mode=request.args.get('print', 'false') == 'true', print_mode=request.args.get('print', 'false') == 'true',
badge_url=badge_url, badge_msg=badge_msg, badge_url=badge_url, badge_msg=badge_msg,
report_str=json.dumps(validata_report, sort_keys=True, indent=2), report_str=json.dumps(validata_report, sort_keys=True, indent=2),
breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'}, breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'},
{'url': url_for('scdl_validator', val_code=schema_code), {'url': url_for('custom_validator'),
'title': val_info['title']}]) 'title': "Rapport de validation"}])
def bytes_data(f): def bytes_data(f):
""" Gets bytes data from Werkzeug FileStorage instance """ """ Gets bytes data from Werkzeug FileStorage instance """
iob = BytesIO() iob = io.BytesIO()
f.save(iob) f.save(iob)
iob.seek(0) iob.seek(0)
return iob.getvalue() return iob.getvalue()