Commit 9774caa5 authored by Pierre Dittgen's avatar Pierre Dittgen

One more step towards uploaded file management

parent c88a933d
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
{ {
"code": "scdl", "code": "scdl",
"title": "Validateurs SCDL", "title": "Validateurs SCDL",
"catalog": "https://git.opendatafrance.net/scdl/catalog/raw/master/catalog.toml" "catalog": "https://git.opendatafrance.net/scdl/catalog/raw/master/catalog.json"
}, },
{ {
"code": "external", "code": "external",
......
...@@ -30,7 +30,7 @@ ui_config = json.load(config.UI_CONFIG_FILE.open('rt', encoding='utf-8')) if con ...@@ -30,7 +30,7 @@ ui_config = json.load(config.UI_CONFIG_FILE.open('rt', encoding='utf-8')) if con
# super ugly way to access catalog_toml url # super ugly way to access catalog_toml url
# TODO: improve it # TODO: improve it
schema_catalog_url = ui_config['sections'][0]['catalog'] schema_catalog_url = ui_config['sections'][0]['catalog']
table_schema_catalog = opendataschema.TableSchemaCatalog(schema_catalog_url, download_func=download_with_cache) table_schema_catalog = opendataschema.SchemaCatalog(schema_catalog_url, download_func=download_with_cache)
# Flask things # Flask things
......
""" Call validation code """ """ Call validation code """
import logging import logging
from abc import abstractmethod, ABC
from io import BytesIO from io import BytesIO
from pathlib import Path
from validata_core.source_helpers import build_tabulator_params
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class ValidataSource(): class ValidataResource(ABC):
""" Handy class to handle different sort of data source """ """A resource to validate: url or uploaded file"""
def __init__(self, type_, name, source): def __init__(self, type_):
""" Initialization """
self.type = type_ self.type = type_
self.name = name
self.source = source
info = build_tabulator_params(type, name, source) @abstractmethod
self.source = info.get('source') def build_tabulator_stream_args(self):
self.format = info.get('format') """return (source, option_dict)"""
self.scheme = info.get('scheme') pass
class URLValidataResource(ValidataResource):
"""URL resource"""
def __init__(self, url):
"""Built from URL"""
super().__init__('url')
self.url = url
def build_tabulator_stream_args(self):
"""URL implementation"""
return (self.url, {})
def is_url(self):
return self.type == 'url'
def get_url(self): class UploadedFileValidataResource(ValidataResource):
return self.source """Uploaded file resource"""
def get_filename(self): def __init__(self, filename, bytes_content):
return self.name """Built from file name and content"""
super().__init__('file')
self.filename = filename
self.content = bytes_content
def get_filecontent(self): def build_reader(self):
return self.source return BytesIO(self.content)
def __detect_format(self):
ext = Path(self.filename).suffix
if ext in ('.csv', '.tsv', '.ods', '.xls', '.xlsx'):
return ext[1:]
return None
def bytes_data(f): def build_tabulator_stream_args(self):
"""Gets bytes data from Werkzeug FileStorage instance""" """Uploaded file implementation"""
iob = BytesIO() return (self.build_reader(), {
f.save(iob) 'format': self.__detect_format()})
iob.seek(0)
return iob.getvalue()
...@@ -18,14 +18,13 @@ import tableschema ...@@ -18,14 +18,13 @@ import tableschema
from backports.datetime_fromisoformat import MonkeyPatch from backports.datetime_fromisoformat import MonkeyPatch
from commonmark import commonmark from commonmark import commonmark
from flask import make_response, redirect, render_template, request, url_for from flask import make_response, redirect, render_template, request, url_for
from validata_core import compute_badge, csv_helpers, messages from validata_core import compute_badge, messages
from validata_core.loaders import custom_loaders
import tabulator import tabulator
from . import app, config, ui_config, table_schema_catalog, schema_from_url from . import app, config, ui_config, table_schema_catalog, schema_from_url
from .ui_util import flash_error, flash_warning from .ui_util import flash_error, flash_warning
from .validata_util import ValidataSource from .validata_util import ValidataResource, URLValidataResource, UploadedFileValidataResource
MonkeyPatch.patch_fromisoformat() MonkeyPatch.patch_fromisoformat()
...@@ -87,7 +86,7 @@ class SchemaInstance(): ...@@ -87,7 +86,7 @@ class SchemaInstance():
} }
def extract_source_data(source: ValidataSource, preview_rows_nb=5): def extract_source_data(source: ValidataResource, preview_rows_nb=5):
""" Computes table preview """ """ Computes table preview """
def stringify(val): def stringify(val):
...@@ -98,12 +97,11 @@ def extract_source_data(source: ValidataSource, preview_rows_nb=5): ...@@ -98,12 +97,11 @@ def extract_source_data(source: ValidataSource, preview_rows_nb=5):
rows = [] rows = []
nb_rows = 0 nb_rows = 0
options = {} # if source.format == "csv":
if source.format == "csv": # options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme,
options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme, # custom_loaders=custom_loaders).delimiter
custom_loaders=custom_loaders).delimiter tabulator_source, tabulator_options = source.build_tabulator_stream_args()
with tabulator.Stream(source.source, format=source.format, scheme=source.scheme, custom_loaders=custom_loaders, with tabulator.Stream(tabulator_source, tabulator_options) as stream:
**options) as stream:
for row in stream: for row in stream:
if header is None: if header is None:
header = ['' if v is None else v for v in row] header = ['' if v is None else v for v in row]
...@@ -290,7 +288,7 @@ def get_badge_url_and_message(badge): ...@@ -290,7 +288,7 @@ def get_badge_url_and_message(badge):
config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg) config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg)
def validate(schema_instance: SchemaInstance, source: ValidataSource): def validate(schema_instance: SchemaInstance, source: ValidataResource):
""" Validate source and display report """ """ Validate source and display report """
# Validation is done through http call to validata-api # Validation is done through http call to validata-api
...@@ -303,15 +301,15 @@ def validate(schema_instance: SchemaInstance, source: ValidataSource): ...@@ -303,15 +301,15 @@ def validate(schema_instance: SchemaInstance, source: ValidataSource):
headers = {"Accept": "application/json"} headers = {"Accept": "application/json"}
try: try:
if source.is_url(): if source.type == 'url':
params = { params = {
"schema": schema_instance.url, "schema": schema_instance.url,
"url": source.get_url(), "url": source.url,
} }
req = requests.get(api_url, params=params, headers=headers) req = requests.get(api_url, params=params, headers=headers)
else: else:
files = {'file': (source.name, io.BytesIO(source.source))} files = {'file': (source.filename, source.build_reader())}
data = {"schema": schema_instance.url} data = {"schema": schema_instance.url}
req = requests.post(api_url, data=data, files=files, headers=headers) req = requests.post(api_url, data=data, files=files, headers=headers)
...@@ -387,7 +385,7 @@ def hydrate_ui_config(ui_config, table_schema_catalog): ...@@ -387,7 +385,7 @@ def hydrate_ui_config(ui_config, table_schema_catalog):
table_schema_ref_list = [] table_schema_ref_list = []
for name, ref in sorted(table_schema_catalog.references.items(), key=itemgetter(0)): for name, ref in sorted(table_schema_catalog.references.items(), key=itemgetter(0)):
table_schema = ref.get_table_schema() table_schema = schema_from_url(ref.get_schema_url())
info = { info = {
"name": name, "name": name,
**{k: v for k, v in table_schema.descriptor.items() if k != 'fields'} **{k: v for k, v in table_schema.descriptor.items() if k != 'fields'}
...@@ -518,7 +516,7 @@ def custom_validator(): ...@@ -518,7 +516,7 @@ def custom_validator():
flash_error("Vous n'avez pas indiqué d'url à valider") flash_error("Vous n'avez pas indiqué d'url à valider")
return redirect(compute_validation_form_url(schema_instance)) return redirect(compute_validation_form_url(schema_instance))
try: try:
return validate(schema_instance, ValidataSource('url', url_param, url_param)) return validate(schema_instance, URLValidataResource(url_param))
except tabulator.exceptions.FormatError as e: except tabulator.exceptions.FormatError as e:
flash_error('Erreur : Format de ressource non supporté') flash_error('Erreur : Format de ressource non supporté')
log.info(e) log.info(e)
...@@ -547,7 +545,6 @@ def custom_validator(): ...@@ -547,7 +545,6 @@ def custom_validator():
flash_warning("Vous n'avez pas indiqué de fichier à valider") flash_warning("Vous n'avez pas indiqué de fichier à valider")
return redirect(compute_validation_form_url(schema_instance)) return redirect(compute_validation_form_url(schema_instance))
b_content = bytes_data(f) return validate(schema_instance, UploadedFileValidataResource(f.filename, bytes_data(f)))
return validate(schema_instance, ValidataSource('file', f.filename, b_content))
return 'Bizarre, vous avez dit bizarre ?' return 'Bizarre, vous avez dit bizarre ?'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment