Commit aca3cd31 authored by Pierre Dittgen's avatar Pierre Dittgen

schema can be expressed as name,ref or url

parent f8ba2b03
[
{
"sections": [
{
"code": "scdl",
"title": "Validateurs SCDL",
"catalog": [
{
"code": "scdl-adresses",
"title": "Adresses locales",
"description": "Liste des adresses locales d'une collectivité",
"schema_url": "https://git.opendatafrance.net/scdl/adresses/raw/v1.1.3/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-adresses.html"
},
{
"code": "scdl-catalogue",
"title": "Catalogue simplifié",
"description": "Liste des jeux de données publiés en open data par une collectivité",
"schema_url": "https://git.opendatafrance.net/scdl/catalogue/raw/v0.1.1/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-catalogue.html"
},
{
"code": "scdl-deliberations",
"title": "Délibérations",
"description": "Liste des délibérations adoptées par une assemblée locale",
"schema_url": "https://git.opendatafrance.net/scdl/deliberations/raw/v2.1.2/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-deliberations.html"
},
{
"code": "scdl-equipements",
"title": "Equipements",
"description": "Liste des équipements collectifs publics d'une collectivité",
"schema_url": "https://git.opendatafrance.net/scdl/equipements/raw/v0.1.0/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-equipements.html"
},
{
"code": "scdl-irve",
"title": "Infrastructures de recharge de véhicule électrique",
"description": "Liste des infrastructures de recharge de véhicules électriques d'une collectivité",
"schema_url": "https://raw.githubusercontent.com/OpenDataFrance/schema.data.gouv.fr/master/irve/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-irve.html"
},
{
"code": "scdl-marches-publics",
"title": "Marchés publics",
"description": "Liste des marchés publics attribués par une collectivité",
"schema_url": "https://git.opendatafrance.net/scdl/marches-publics/raw/v2.0.0/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-marches-publics.html"
},
{
"code": "scdl-prenoms",
"title": "Prénoms des nouveaux-nés",
"description": "Liste des prénoms des nouveaux-nés déclarés à l'état-civil",
"schema_url": "https://github.com/OpenDataFrance/liste-prenoms-nouveaux-nes/raw/v1.1.3/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-prenoms.html"
},
{
"code": "scdl-subventions",
"title": "Subventions",
"description": "Liste des subventions publiques attribuées par une collectivité",
"schema_url": "https://git.opendatafrance.net/scdl/subventions/raw/v2.0.0/schema.json",
"doc_url": "https://scdl.opendatafrance.net/docs/schemas/scdl-subventions.html"
}
]
"code": "scdl",
"title": "Validateurs SCDL",
"catalog": "https://git.opendatafrance.net/scdl/schema-catalog/raw/master/schema_catalog.toml"
},
{
"code": "external",
"title": "Validateurs externes",
"catalog": [
{
"code": "inspire",
"type": "external",
"title": "INSPIRE",
"description": "proposé par la Commission Européenne pour tester des ressources géographiques (données, services ou métadonnées)",
"website": "http://inspire-sandbox.jrc.ec.europa.eu/validator/"
},
{
"code": "bal",
"type": "external",
"title": "BAL",
"description": "proposé par la mission Etalab pour tester des données Adresse produites localement (format BAL 1.1)",
"website": "https://adresse.data.gouv.fr/bases-locales/validateur"
},
{
"code": "cvdtc",
"type": "external",
"title": "CVDTC",
"description": "proposé par l'AFIMB dans le cadre du projet Chouette pour tester des données de transport collectif (GTFS ou NEPTUNE)",
"website": "http://www.conversion-validation-donnees-tc.org/"
}
]
"code": "external",
"title": "Validateurs externes",
"catalog": [
{
"name": "inspire",
"type": "external",
"title": "INSPIRE",
"description": "proposé par la Commission Européenne pour tester des ressources géographiques (données, services ou métadonnées)",
"website": "http://inspire-sandbox.jrc.ec.europa.eu/validator/"
},
{
"name": "bal",
"type": "external",
"title": "BAL",
"description": "proposé par la mission Etalab pour tester des données Adresse produites localement (format BAL 1.1)",
"website": "https://adresse.data.gouv.fr/bases-locales/validateur"
},
{
"name": "cvdtc",
"type": "external",
"title": "CVDTC",
"description": "proposé par l'AFIMB dans le cadre du projet Chouette pour tester des données de transport collectif (GTFS ou NEPTUNE)",
"website": "http://www.conversion-validation-donnees-tc.org/"
}
]
}
]
\ No newline at end of file
]
}
......@@ -3,19 +3,34 @@ import os
from pathlib import Path
from urllib.parse import quote_plus
import opendataschema
import flask
import jinja2
import requests
import tableschema
from cachetools.func import ttl_cache
# Let this import after app initialisation
from . import config
# TODO: load config.toml
@ttl_cache(maxsize=20*1024, ttl=5*60)
def download_with_cache(url):
return requests.get(url)
@ttl_cache(maxsize=20*1024, ttl=5*60)
def schema_from_url(url):
return tableschema.Schema(url)
# load config.json and catalog_schema_toml
ui_config = json.load(config.UI_CONFIG_FILE.open('rt', encoding='utf-8')) if config.UI_CONFIG_FILE else []
schema_info_map = {}
for section in ui_config:
for val in section['catalog']:
if 'schema_url' in val:
schema_info_map[val['schema_url']] = val
# super ugly way to access catalog_toml url
schema_catalog_url = ui_config['sections'][0]['catalog']
table_schema_catalog = opendataschema.TableSchemaCatalog(schema_catalog_url, download_func=download_with_cache)
# Flask things
app = flask.Flask(__name__)
......
......@@ -4,7 +4,7 @@ block head %}
{% endblock %} {% block content %}
<h1 class="my-4">Validez vos jeux de données</h1>
{% for section in config %}
{% for section in config.sections %}
<h2>{{section.title}}</h2>
<div class="row my-4">
{% for val in section.catalog %}
......@@ -31,7 +31,7 @@ block head %}
<h4 class="card-title">{{ val.title }}</h4>
<p class="card-text">{{ val.description }}</p>
<a
href="{{ url_for('custom_validator') }}?schema={{val.schema_url|urlencode}}"
href="{{ url_for('custom_validator') }}?schema_name={{val.name}}"
class="btn btn-primary mt-auto"
>Choisir</a
>
......@@ -49,10 +49,10 @@ block head %}
<div class="form-group">
<label for="schema">Indiquez ici l'URL du schéma de validation à utiliser</label>
<input
name="schema"
name="schema_url"
type="url"
class="form-control"
id="schema"
id="schema_url"
aria-describedby="urlHelp"
placeholder="https://..."
/>
......
<h5 class="card-title">
Schéma
{% if val_info.title %}
« {{ val_info.title }} »
{% if schema_info.title %}
« {{ schema_info.title }} »
{% endif %}
{#
<span class="badge badge-primary">{{ val_info.version }}</span>
#}
</h5>
{% if val_info.description %}
<h6 class="card-subtitle mb-2 text-muted">{{ val_info.description }}</h6>
{% if schema_info.description %}
<h6 class="card-subtitle mb-2 text-muted">{{ schema_info.description }}</h6>
{% endif %}
{% if val_info.author or val_info.contributor %}
{% if schema_info.author or schema_info.contributor %}
<p>
{% if val_info.author %}
Auteur : {{ val_info.author }}
{% if schema_info.author %}
Auteur : {{ schema_info.author }}
{% endif %}
{% if val_info.contributor %}
<br />Contributeur(s) : {{ val_info.contributor }}
{% if schema_info.contributor %}
<br />Contributeur(s) : {{ schema_info.contributor }}
{% endif %}
</p>
{% endif %}
<p>
{% if val_info.doc_url %}
<a href="{{ val_info.doc_url }}" target="_blank" class="card-link">Documentation</a>
{% if schema_info.doc_url %}
<a href="{{ schema_info.doc_url }}" target="_blank" class="card-link">Documentation</a>
{% else %}
<a href="{{ schema_url }}" target="_blank" class="card-link">{{ schema_url }}</a>
<a href="{{ schema_info.url }}" target="_blank" class="card-link">{{ schema_info.url }}</a>
{% endif %}
</p>
......@@ -26,19 +26,15 @@ block head %}
<a class="nav-link" id="url-tab" data-toggle="tab" href="#url" role="tab" aria-controls="url"
aria-selected="false">URL</a>
</li>
{#
<li class="nav-item">
<a class="nav-link" id="examples-tab" data-toggle="tab" href="#example" role="tab" aria-controls="examples"
aria-selected="false">Exemples</a>
</li>
#}
</ul>
{% set padding_class = 'p-3' %}
<div class="tab-content" id="myTabContent">
<div class="tab-pane fade show active {{ padding_class }}" id="file" role="tabpanel" aria-labelledby="file-tab">
<form method="POST" enctype="multipart/form-data">
<input type="hidden" name="input" value="file" />
<input type="hidden" name="schema" value="{{ schema_url }}" />
{% for key, value in schema_params.items() %}
<input type="hidden" name="{{ key }}" value="{{ value }}" />
{% endfor %}
<div class="form-group">
<label for="file">Choisissez un fichier tabulaire à valider (.xlsx, .xls, .ods, .csv, .tsv,
etc.)</label>
......@@ -50,7 +46,9 @@ block head %}
<div class="tab-pane fade {{ padding_class }}" id="url" role="tabpanel" aria-labelledby="url-tab">
<form method="GET">
<input type="hidden" name="input" value="url" />
<input type="hidden" name="schema" value="{{ schema_url }}" />
{% for key, value in schema_params.items() %}
<input type="hidden" name="{{ key }}" value="{{ value }}" />
{% endfor %}
<div class="form-group">
<label for="url">Indiquez l'URL de la table à valider</label>
<input name="url" type="url" class="form-control" id="url" aria-describedby="urlHelp"
......@@ -60,23 +58,6 @@ block head %}
<button type="submit" class="btn btn-primary">Valider</button>
</form>
</div>
{#
<div class="tab-pane fade {{ padding_class }}" id="example" role="tabpanel" aria-labelledby="examples-tab">
<form method="GET">
<input type="hidden" name="input" value="example" />
<div class="form-group">
<label for="url">Choisissez l'exemple à valider</label>
<select name="url" id="example" class="form-control">
<option value="">...</option>
{% for example in val_info.examples %}
<option value="{{ example.url }}">{{ example.name }}</option>
{% endfor %}
</select>
</div>
<button type="submit" class="btn btn-primary">Valider</button>
</form>
</div>
#}
</div>
</div>
</div>
......
......@@ -36,7 +36,7 @@
{% endif %}
{% if source.type == 'url' %}
<p class="hidden-print">
<a href="{{ url_for('pdf_report') }}?schema={{schema_url|urlencode}}&url={{source.name|urlencode}}" target="_blank">
<a href="{{ pdf_report_url }}&url={{source.name|urlencode}}" target="_blank">
Télécharger en PDF
</a>
</p>
......
......@@ -11,9 +11,10 @@ import tempfile
from datetime import datetime
from operator import itemgetter
from pathlib import Path
from urllib.parse import quote_plus
from urllib.parse import quote_plus, urlencode
import requests
import tableschema
from backports.datetime_fromisoformat import MonkeyPatch
from commonmark import commonmark
from flask import make_response, redirect, render_template, request, url_for
......@@ -22,7 +23,7 @@ from validata_core.loaders import custom_loaders
import tabulator
from . import app, config, ui_config, schema_info_map
from . import app, config, ui_config, table_schema_catalog, schema_from_url
from .ui_util import flash_error, flash_warning
from .validata_util import ValidataSource
......@@ -31,6 +32,56 @@ MonkeyPatch.patch_fromisoformat()
log = logging.getLogger(__name__)
class SchemaInstance():
"""Handly class to handle schema information"""
def __init__(self, url=None, name=None, ref=None, spec=None):
"""This function is not intended to be called directly
but via from_parameters() static method!"""
self.url = url
self.name = name
self.ref = ref
self.spec = spec
@staticmethod
def from_parameters(parameter_dict, table_schema_catalog):
"""Initializes schema instance from requests dict and tableschema catalog (for name ref)
"""
schema_url, schema_name, schema_ref = None, None, None
# From schema_url
if 'schema_url' in parameter_dict:
schema_url = parameter_dict["schema_url"]
# from schema_name (and schema_ref)
elif 'schema_name' in parameter_dict:
schema_name = parameter_dict['schema_name']
schema_ref = parameter_dict.get('schema_ref')
# Unknown schema name?
table_schema_reference = table_schema_catalog.references.get(schema_name)
if table_schema_reference is None:
return None
schema_url = table_schema_reference.get_schema_url()
# else???
else:
return None
return SchemaInstance(schema_url, schema_name, schema_ref, schema_from_url(schema_url))
def request_parameters(self):
if self.name:
return {
'schema_name': self.name,
'schema_ref': '' if self.ref is None else self.ref
}
return {
'schema_url': self.url
}
def extract_source_data(source: ValidataSource, preview_rows_nb=5):
""" Computes table preview """
......@@ -234,27 +285,29 @@ def get_badge_url_and_message(badge):
config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg)
def validate(schema_url, source: ValidataSource):
def validate(schema_instance: SchemaInstance, source: ValidataSource):
""" Validate source and display report """
# Validation is done through http call to validata-api
if config.API_VALIDATE_ENDPOINT is None:
flash_error("No Validate endpoint defined :-(")
return redirect(url_for("custom_validator"))
api_url = config.API_VALIDATE_ENDPOINT
# Useful to receive response as JSON
headers = {"Accept": "application/json"}
try:
if source.is_url():
params = {
"schema": schema_url,
"schema": schema_instance.url,
"url": source.get_url(),
}
req = requests.get(api_url, params=params, headers=headers)
else:
files = {'file': (source.name, io.BytesIO(source.source))}
data = {'schema': schema_url}
data = {"schema": schema_instance.url}
req = requests.post(api_url, data=data, files=files, headers=headers)
# 400
......@@ -298,11 +351,12 @@ def validate(schema_url, source: ValidataSource):
validata_report = create_validata_ui_report(validata_core_report, schema_dict)
# Display report to the user
validator_form_url = url_for("custom_validator")+'?schema='+quote_plus(schema_url)
val_info, validator_title = compute_validator_info(schema_url)
validator_form_url = compute_validation_form_url(schema_instance)
schema_info, validator_title = compute_schema_info(schema_instance.spec)
pdf_report_url = url_for('pdf_report')+'?'+urlencode(schema_instance.request_parameters())
return render_template('validation_report.html', title='Rapport de validation',
val_info=val_info, report=validata_report,
schema_url=schema_url,
schema_info=schema_info, report=validata_report,
pdf_report_url=pdf_report_url,
validation_date=report_datetime.strftime('le %d/%m/%Y à %Hh%M'),
source=source, source_type=source.type, source_data=source_data,
print_mode=request.args.get('print', 'false') == 'true',
......@@ -321,6 +375,24 @@ def bytes_data(f):
return iob.getvalue()
def hydrate_ui_config(ui_config, table_schema_catalog):
hydrated_ui_config = ui_config.copy()
table_schema_ref_list = []
for name, ref in sorted(table_schema_catalog.references.items(), key=itemgetter(0)):
table_schema = ref.get_table_schema()
info = {
"name": name,
**{k: v for k, v in table_schema.descriptor.items() if k != 'fields'}
}
table_schema_ref_list.append(info)
# TODO: change this hard-coded affectation
hydrated_ui_config['sections'][0]['catalog'] = table_schema_ref_list
return hydrated_ui_config
# Routes
......@@ -328,13 +400,7 @@ def bytes_data(f):
def home():
""" Home page """
flash_warning('Ce service est fourni en mode beta - certains problèmes peuvent subsister - nous mettons tout en œuvre pour améliorer son fonctionnement en continu.')
return render_template('home.html', title='Accueil', config=ui_config)
@app.route('/validators')
def validators():
""" No validators page """
return redirect(url_for('home'))
return render_template('home.html', title='Accueil', config=hydrate_ui_config(ui_config, table_schema_catalog))
@app.route('/pdf')
......@@ -347,17 +413,26 @@ def pdf_report():
flash_error(err_prefix + ': URL non fournie')
return redirect(url_for('home'))
schema_param = request.args.get('schema')
if not schema_param:
flash_error(err_prefix + ': URL de schema non fournie')
schema_instance = SchemaInstance.from_parameters(request.args, table_schema_catalog)
if schema_instance is None:
flash_error(err_prefix + ': Information de schema non fournie')
return redirect(url_for('home'))
validation_url = '{}?input=url&print=true&url={}&schema={}'.format(url_for('custom_validator', _external=True),
quote_plus(url_param), quote_plus(schema_param))
# Compute pdf url report
base_url = url_for('custom_validator', _external=True)
parameter_dict = {
'input': 'url',
'print': 'true',
'url': url_param,
**schema_instance.request_parameters()
}
validation_url = base_url + '?' + urlencode(parameter_dict)
# Create temp file to save validation report
with tempfile.NamedTemporaryFile(prefix='validata_{}_report_'.format(datetime.now().timestamp()), suffix='.pdf') as tmpfile:
tmp_pdf_report = Path(tmpfile.name)
# Use chromium headless to generate PDF from validation report page
cmd = ['chromium', '--headless', '--disable-gpu',
'--print-to-pdf={}'.format(str(tmp_pdf_report)), validation_url]
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
......@@ -368,6 +443,7 @@ def pdf_report():
tmp_pdf_report.unlink()
return redirect(url_for('home'))
# Send PDF report
pdf_filename = 'Rapport de validation {}.pdf'.format(datetime.now().strftime('%d-%m-%Y %Hh%M'))
response = make_response(tmp_pdf_report.read_bytes())
response.headers.set('Content-disposition', 'attachment', filename=pdf_filename)
......@@ -379,81 +455,89 @@ def pdf_report():
return response
def compute_validator_info(schema_url):
def compute_schema_info(table_schema: tableschema.Schema):
"""Factor code for validator form page"""
val_info = None
title = "Schéma personnalisé"
if schema_url in schema_info_map:
val_info = schema_info_map.get(schema_url)
title = "Schéma « {} »".format(val_info['title'])
return val_info, title
schema_info = {k: v for k, v in table_schema.descriptor.items() if k != 'fields'}
title = "Schéma « {} »".format(schema_info.get('title'))
return schema_info, title
def compute_validation_form_url(schema_instance: SchemaInstance):
"""Computes validation form url with schema URL parameter"""
url = url_for('custom_validator')
param_list = ['{}={}'.format(k, quote_plus(v))
for k, v in schema_instance.request_parameters().items()]
return "{}?{}".format(url, '&'.join(param_list))
@app.route('/validators/form', methods=['GET', 'POST'])
@app.route('/table_schema', methods=['GET', 'POST'])
def custom_validator():
""" Validator page """
"""Validator form"""
# Check that validata-api URL is set
if config.API_VALIDATE_ENDPOINT is None:
flash_error("URL de connexion à l'API non indiquée :-(")
return redirect(url_for('home'))
def validation_form_url(schema_url):
"""Computes validation form url with schema URL parameter"""
return "{}?schema={}".format(url_for('custom_validator'), quote_plus(schema_url))
if request.method == 'GET':
# input is a hidden form parameter to know
# if this is the initial page display or if the validation has been asked for
input_param = request.args.get('input')
# url of resource to be validated
url_param = request.args.get("url")
schema_param = request.args.get("schema")
if schema_param is None or schema_param == '':
flash_error("Vous n'avez pas indiqué d'url de schéma")
schema_instance = SchemaInstance.from_parameters(request.args, table_schema_catalog)
if schema_instance is None:
flash_error("Aucun schéma passé en paramètre")
return redirect(url_for('home'))
# First form display
if input_param is None:
val_info, title = compute_validator_info(schema_param)
schema_info, title = compute_schema_info(schema_instance.spec)
return render_template('validation_form.html', title=title,
val_info=val_info, schema_url=schema_param,
schema_info=schema_info,
schema_params=schema_instance.request_parameters(),
breadcrumbs=[{'url': url_for('home'), 'title': 'Accueil'}, ])
# Process URL
else:
if url_param is None or url_param == '':
flash_error("Vous n'avez pas indiqué d'url à valider")
return redirect(validation_form_url(schema_param))
return redirect(compute_validation_form_url(schema_instance))
try:
return validate(schema_param, ValidataSource('url', url_param, url_param))
return validate(schema_instance, ValidataSource('url', url_param, url_param))
except tabulator.exceptions.FormatError as e:
flash_error('Erreur : Format de ressource non supporté')
log.info(e)
return redirect(validation_form_url(schema_param))
return redirect(compute_validation_form_url(schema_instance))
except tabulator.exceptions.HTTPError as e:
flash_error('Erreur : impossible d\'accéder au fichier source en ligne')
log.info(e)
return redirect(validation_form_url(schema_param))
return redirect(compute_validation_form_url(schema_instance))
else: # POST
schema_param = request.form.get('schema')
if schema_param is None:
schema_instance = SchemaInstance.from_parameters(request.form, table_schema_catalog)
if schema_instance is None:
flash_error('Aucun schéma défini')
return redirect(url_for('home'))
input_param = request.form.get('input')
if input_param is None:
flash_error("Vous n'avez pas indiqué de fichier à valider")
return redirect(validation_form_url(schema_param))
return redirect(compute_validation_form_url(schema_instance))
# File validation
if input_param == 'file':
f = request.files.get('file')
if f is None:
flash_warning("Vous n'avez pas indiqué de fichier à valider")
return redirect(validation_form_url(schema_param))
return redirect(compute_validation_form_url(schema_instance))
b_content = bytes_data(f)
return validate(schema_param, ValidataSource('file', f.filename, b_content))
return validate(schema_instance, ValidataSource('file', f.filename, b_content))
return 'Bizarre, vous avez dit bizarre ?'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment