Commit 310ee17c authored by Pierre Dittgen's avatar Pierre Dittgen
Browse files

wip - homepage is ok

parent 11d64bce
......@@ -68,10 +68,7 @@ setup(
'sentry-sdk[flask]==0.14.3',
'toml',
'tabulator',
'opendataschema >= 0.5.5, < 0.6',
'validata_core >= 0.5.0, < 0.6',
],
)
......@@ -6,11 +6,11 @@ from pathlib import Path
import cachecontrol
import flask
import frictionless
import jinja2
import opendataschema
import pkg_resources
import requests
import tableschema
from commonmark import commonmark
from . import config
......@@ -21,13 +21,13 @@ log = logging.getLogger(__name__)
def generate_schema_from_url_func(session):
"""Generates a function that encloses session"""
def tableschema_from_url(url):
def fetch_schema(url):
response = session.get(url)
response.raise_for_status()
descriptor = response.json()
return tableschema.Schema(descriptor, strict=True)
return frictionless.Schema(descriptor)
return tableschema_from_url
return fetch_schema
class SchemaCatalogRegistry:
......@@ -49,7 +49,7 @@ class SchemaCatalogRegistry:
caching_session = cachecontrol.CacheControl(requests.Session())
tableschema_from_url = generate_schema_from_url_func(caching_session)
fetch_schema = generate_schema_from_url_func(caching_session)
# And load schema catalogs which URLs are found in homepage_config.json
schema_catalog_registry = SchemaCatalogRegistry(caching_session)
......
......@@ -150,8 +150,8 @@
<p>Prévisualisation du fichier source
{% if report.repair_actions %}avec structure réparée{% endif %}
{% if not report.table.errors.body %}
(affichage de {{ source_data.preview_rows_nb }}
ligne{% if source_data.preview_rows_nb > 1 %}s{% endif %}
(affichage de {{ source_data.preview_rows_count }}
ligne{% if source_data.preview_rows_count > 1 %}s{% endif %}
sur {{ source_data.rows_nb }} au total)
{% endif %}
</p>
......
......@@ -17,7 +17,7 @@ class ValidataResource(ABC):
self.type = type_
@abstractmethod
def build_tabulator_stream_args(self):
def build_stream_args(self):
"""return (source, option_dict)"""
pass
......@@ -31,7 +31,7 @@ class URLValidataResource(ValidataResource):
self.url = url
self.filename = Path(urlparse(url).path).name
def build_tabulator_stream_args(self):
def build_stream_args(self):
"""URL implementation"""
return (self.url, {})
......@@ -54,7 +54,7 @@ class UploadedFileValidataResource(ValidataResource):
return ext[1:]
return None
def build_tabulator_stream_args(self):
def build_stream_args(self):
"""Uploaded file implementation"""
options = {
'scheme': 'stream',
......
......@@ -13,19 +13,17 @@ from operator import itemgetter
from pathlib import Path
from urllib.parse import urlencode, urljoin
import datapackage
import frictionless
import jsonschema
import requests
import tableschema
import tabulator
from backports.datetime_fromisoformat import MonkeyPatch
from commonmark import commonmark
from flask import abort, make_response, redirect, render_template, request, url_for
from opendataschema import GitSchemaReference, by_commit_date
from validata_core import messages, repair
from validata_core.helpers import translate_error_code
from . import app, config, schema_catalog_registry, tableschema_from_url
from . import app, config, schema_catalog_registry, fetch_schema
from .ui_util import flash_error, flash_warning
from .validata_util import UploadedFileValidataResource, URLValidataResource, ValidataResource, strip_accents
......@@ -106,15 +104,11 @@ class SchemaInstance:
abort(redirect(url_for('home')))
try:
self.schema = tableschema_from_url(self.url)
self.schema = fetch_schema(self.url)
except json.JSONDecodeError as e:
log.exception(e)
flash_error("Le format du schéma n'est pas reconnu")
abort(redirect(url_for('home')))
except datapackage.exceptions.ValidationError as e:
log.exception(e)
flash_error("Le schéma {} comporte des erreurs".format(self.url))
abort(redirect(url_for('home')))
except Exception as e:
log.exception(e)
flash_error("Impossible de récupérer le schéma")
......@@ -150,7 +144,7 @@ def extract_source_data(source: ValidataResource, schema_descriptor, preview_row
for i, h in enumerate(source_header):
if h in duplicate_header_names:
column_name_to_indices[h].append(i)
col_indices = set()
for v in column_name_to_indices.values():
col_indices.update(v[1:])
......@@ -161,51 +155,54 @@ def extract_source_data(source: ValidataResource, schema_descriptor, preview_row
rows = []
nb_rows = 0
tabulator_source, tabulator_options = source.build_tabulator_stream_args()
table_source, table_options = source.build_stream_args()
# Gets original source, only to get headers
# Gets original source, header and rows
source_header = None
with tabulator.Stream(tabulator_source, **tabulator_options) as stream:
for row in stream:
if source_header is None:
source_header = ['' if v is None else v for v in row]
break
with frictionless.Table(table_source, **table_options) as table:
source_header = table.header
source_rows = list(table.data_stream)
# Repair source
tabulator_source, tabulator_options = source.build_tabulator_stream_args()
fixed_source, repair_report = repair(tabulator_source, schema_descriptor, **tabulator_options)
with tabulator.Stream(fixed_source, {**tabulator_options, 'scheme': 'stream', 'format': 'inline'}) as stream:
for row in stream:
if header is None:
header = ['' if v is None else v for v in row]
else:
rows.append(list(map(stringify, row)))
nb_rows += 1
preview_rows_nb = min(preview_rows_nb, nb_rows)
# tabulator_source, tabulator_options = source.build_stream_args()
# fixed_source, repair_report = repair(tabulator_source, schema_descriptor, **tabulator_options)
# with tabulator.Stream(fixed_source, {**tabulator_options, 'scheme': 'stream', 'format': 'inline'}) as stream:
# for row in stream:
# if header is None:
# header = ['' if v is None else v for v in row]
# else:
# rows.append(list(map(stringify, row)))
# nb_rows += 1
# preview_rows_nb = min(preview_rows_nb, nb_rows)
# Computes original_headers display
# wrong headers order: display all headers as in error
if any([err.code == 'wrong-headers-order' for err in repair_report]):
source_header_info = [(h, True) for h in source_header]
# if any([err.code == 'wrong-headers-order' for err in repair_report]):
# source_header_info = [(h, True) for h in source_header]
# else display header error for:
# - blank-header
# - unknown-header
# - duplicate-header
else:
schema_field_names = [f['name'] for f in schema_descriptor.get('fields') or []]
duplicate_header_names = [err._message_substitutions['column-name'] for err in repair_report if err.code == 'duplicate-header']
duplicate_col_indices = compute_duplicate_header_column_indices(source_header, duplicate_header_names)
# # else display header error for:
# # - blank-header
# # - unknown-header
# # - duplicate-header
# else:
# schema_field_names = [f['name'] for f in schema_descriptor.get('fields') or []]
# duplicate_header_names = [err._message_substitutions['column-name'] for err in repair_report if err.code == 'duplicate-header']
# duplicate_col_indices = compute_duplicate_header_column_indices(source_header, duplicate_header_names)
# source_header_info = [(h, not h or h not in schema_field_names or i in duplicate_col_indices) for i, h in enumerate(source_header)]
source_header_info = [(h, not h or h not in schema_field_names or i in duplicate_col_indices) for i, h in enumerate(source_header)]
# TODO: Change False into True if there are errors associated to columns
source_header_info = [(colname, False) for colname in source_header]
rows_count = len(source_rows)
previous_rows_count = min(preview_rows_nb, rows_count)
return {
'source_header_info': source_header_info,
'header': header,
'rows_nb': nb_rows,
'data_rows': rows,
'preview_rows_nb': preview_rows_nb,
'preview_rows': rows[:preview_rows_nb]
'header': source_header,
'rows_nb': rows_count,
'data_rows': source_rows,
'preview_rows_count': previous_rows_count,
'preview_rows': source_rows[:preview_rows_count]
}
......@@ -644,7 +641,7 @@ def home():
# Iterate on all sections
for section in config.HOMEPAGE_CONFIG['sections']:
# section with only links to external validators
if "links" in section:
yield section
......@@ -669,7 +666,7 @@ def home():
'name': schema_reference.name
}
try:
table_schema = tableschema_from_url(schema_reference.get_schema_url())
table_schema = fetch_schema(schema_reference.get_schema_url())
except json.JSONDecodeError:
schema_info['err'] = True
schema_info['title'] = f"le format du schéma « {schema_info['name']} » n'est pas reconnu"
......@@ -680,11 +677,11 @@ def home():
schema_info['err'] = True
schema_info['title'] = f"le schéma « {schema_info['name']} » n'est pas disponible"
else:
schema_info['title'] = table_schema.descriptor.get("title") or schema_info['name']
schema_info['title'] = table_schema.get("title") or schema_info['name']
schema_info_list.append(schema_info)
schema_info_list = sorted(
schema_info_list, key=lambda sc: strip_accents(sc['title'].lower()))
yield {
**{k: v for k, v in section.items() if k != 'catalog'},
"catalog": schema_info_list,
......@@ -742,12 +739,12 @@ def pdf_report():
return response
def extract_schema_metadata(table_schema: tableschema.Schema):
def extract_schema_metadata(table_schema: frictionless.Schema):
"""Gets author, contibutor, version...metadata from schema header"""
return {k: v for k, v in table_schema.descriptor.items() if k != 'fields'}
return {k: v for k, v in table_schema.items() if k != 'fields'}
def compute_schema_info(table_schema: tableschema.Schema, schema_url):
def compute_schema_info(table_schema: frictionless.Schema, schema_url):
"""Factor code for validator form page"""
# Schema URL + schema metadata info
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment