Commit 9774caa5 authored by Pierre Dittgen's avatar Pierre Dittgen

One more step towards uploaded file management

parent c88a933d
......@@ -3,7 +3,7 @@
{
"code": "scdl",
"title": "Validateurs SCDL",
"catalog": "https://git.opendatafrance.net/scdl/catalog/raw/master/catalog.toml"
"catalog": "https://git.opendatafrance.net/scdl/catalog/raw/master/catalog.json"
},
{
"code": "external",
......
......@@ -30,7 +30,7 @@ ui_config = json.load(config.UI_CONFIG_FILE.open('rt', encoding='utf-8')) if con
# super ugly way to access catalog_toml url
# TODO: improve it
schema_catalog_url = ui_config['sections'][0]['catalog']
table_schema_catalog = opendataschema.TableSchemaCatalog(schema_catalog_url, download_func=download_with_cache)
table_schema_catalog = opendataschema.SchemaCatalog(schema_catalog_url, download_func=download_with_cache)
# Flask things
......
""" Call validation code """
import logging
from abc import abstractmethod, ABC
from io import BytesIO
from validata_core.source_helpers import build_tabulator_params
from pathlib import Path
log = logging.getLogger(__name__)
class ValidataSource():
""" Handy class to handle different sort of data source """
class ValidataResource(ABC):
"""A resource to validate: url or uploaded file"""
def __init__(self, type_, name, source):
""" Initialization """
def __init__(self, type_):
self.type = type_
self.name = name
self.source = source
info = build_tabulator_params(type, name, source)
self.source = info.get('source')
self.format = info.get('format')
self.scheme = info.get('scheme')
@abstractmethod
def build_tabulator_stream_args(self):
"""return (source, option_dict)"""
pass
class URLValidataResource(ValidataResource):
"""URL resource"""
def __init__(self, url):
"""Built from URL"""
super().__init__('url')
self.url = url
def build_tabulator_stream_args(self):
"""URL implementation"""
return (self.url, {})
def is_url(self):
return self.type == 'url'
def get_url(self):
return self.source
class UploadedFileValidataResource(ValidataResource):
"""Uploaded file resource"""
def get_filename(self):
return self.name
def __init__(self, filename, bytes_content):
"""Built from file name and content"""
super().__init__('file')
self.filename = filename
self.content = bytes_content
def get_filecontent(self):
return self.source
def build_reader(self):
return BytesIO(self.content)
def __detect_format(self):
ext = Path(self.filename).suffix
if ext in ('.csv', '.tsv', '.ods', '.xls', '.xlsx'):
return ext[1:]
return None
def bytes_data(f):
"""Gets bytes data from Werkzeug FileStorage instance"""
iob = BytesIO()
f.save(iob)
iob.seek(0)
return iob.getvalue()
def build_tabulator_stream_args(self):
"""Uploaded file implementation"""
return (self.build_reader(), {
'format': self.__detect_format()})
......@@ -18,14 +18,13 @@ import tableschema
from backports.datetime_fromisoformat import MonkeyPatch
from commonmark import commonmark
from flask import make_response, redirect, render_template, request, url_for
from validata_core import compute_badge, csv_helpers, messages
from validata_core.loaders import custom_loaders
from validata_core import compute_badge, messages
import tabulator
from . import app, config, ui_config, table_schema_catalog, schema_from_url
from .ui_util import flash_error, flash_warning
from .validata_util import ValidataSource
from .validata_util import ValidataResource, URLValidataResource, UploadedFileValidataResource
MonkeyPatch.patch_fromisoformat()
......@@ -87,7 +86,7 @@ class SchemaInstance():
}
def extract_source_data(source: ValidataSource, preview_rows_nb=5):
def extract_source_data(source: ValidataResource, preview_rows_nb=5):
""" Computes table preview """
def stringify(val):
......@@ -98,12 +97,11 @@ def extract_source_data(source: ValidataSource, preview_rows_nb=5):
rows = []
nb_rows = 0
options = {}
if source.format == "csv":
options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
custom_loaders=custom_loaders).delimiter
with tabulator.Stream(source.source, format=source.format, scheme=source.scheme, custom_loaders=custom_loaders,
**options) as stream:
# if source.format == "csv":
# options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
# custom_loaders=custom_loaders).delimiter
tabulator_source, tabulator_options = source.build_tabulator_stream_args()
with tabulator.Stream(tabulator_source, tabulator_options) as stream:
for row in stream:
if header is None:
header = ['' if v is None else v for v in row]
......@@ -290,7 +288,7 @@ def get_badge_url_and_message(badge):
config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg)
def validate(schema_instance: SchemaInstance, source: ValidataSource):
def validate(schema_instance: SchemaInstance, source: ValidataResource):
""" Validate source and display report """
# Validation is done through http call to validata-api
......@@ -303,15 +301,15 @@ def validate(schema_instance: SchemaInstance, source: ValidataSource):
headers = {"Accept": "application/json"}
try:
if source.is_url():
if source.type == 'url':
params = {
"schema": schema_instance.url,
"url": source.get_url(),
"url": source.url,
}
req = requests.get(api_url, params=params, headers=headers)
else:
files = {'file': (source.name, io.BytesIO(source.source))}
files = {'file': (source.filename, source.build_reader())}
data = {"schema": schema_instance.url}
req = requests.post(api_url, data=data, files=files, headers=headers)
......@@ -387,7 +385,7 @@ def hydrate_ui_config(ui_config, table_schema_catalog):
table_schema_ref_list = []
for name, ref in sorted(table_schema_catalog.references.items(), key=itemgetter(0)):
table_schema = ref.get_table_schema()
table_schema = schema_from_url(ref.get_schema_url())
info = {
"name": name,
**{k: v for k, v in table_schema.descriptor.items() if k != 'fields'}
......@@ -518,7 +516,7 @@ def custom_validator():
flash_error("Vous n'avez pas indiqué d'url à valider")
return redirect(compute_validation_form_url(schema_instance))
try:
return validate(schema_instance, ValidataSource('url', url_param, url_param))
return validate(schema_instance, URLValidataResource(url_param))
except tabulator.exceptions.FormatError as e:
flash_error('Erreur : Format de ressource non supporté')
log.info(e)
......@@ -547,7 +545,6 @@ def custom_validator():
flash_warning("Vous n'avez pas indiqué de fichier à valider")
return redirect(compute_validation_form_url(schema_instance))
b_content = bytes_data(f)
return validate(schema_instance, ValidataSource('file', f.filename, b_content))
return validate(schema_instance, UploadedFileValidataResource(f.filename, bytes_data(f)))
return 'Bizarre, vous avez dit bizarre ?'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment