From 9774caa5ebb15e4fa0394b10e23fa1335c07e905 Mon Sep 17 00:00:00 2001 From: Pierre Dittgen Date: Thu, 13 Jun 2019 17:00:38 +0200 Subject: [PATCH] One more step towards uploaded file management --- config.json.example | 2 +- validata_ui/__init__.py | 2 +- validata_ui/validata_util.py | 67 +++++++++++++++++++++--------------- validata_ui/views.py | 33 ++++++++---------- 4 files changed, 57 insertions(+), 47 deletions(-) diff --git a/config.json.example b/config.json.example index 111596e..712b927 100644 --- a/config.json.example +++ b/config.json.example @@ -3,7 +3,7 @@ { "code": "scdl", "title": "Validateurs SCDL", - "catalog": "https://git.opendatafrance.net/scdl/catalog/raw/master/catalog.toml" + "catalog": "https://git.opendatafrance.net/scdl/catalog/raw/master/catalog.json" }, { "code": "external", diff --git a/validata_ui/__init__.py b/validata_ui/__init__.py index be37986..ccd0471 100644 --- a/validata_ui/__init__.py +++ b/validata_ui/__init__.py @@ -30,7 +30,7 @@ ui_config = json.load(config.UI_CONFIG_FILE.open('rt', encoding='utf-8')) if con # super ugly way to access catalog_toml url # TODO: improve it schema_catalog_url = ui_config['sections'][0]['catalog'] -table_schema_catalog = opendataschema.TableSchemaCatalog(schema_catalog_url, download_func=download_with_cache) +table_schema_catalog = opendataschema.SchemaCatalog(schema_catalog_url, download_func=download_with_cache) # Flask things diff --git a/validata_ui/validata_util.py b/validata_ui/validata_util.py index b0218f6..504e1f0 100644 --- a/validata_ui/validata_util.py +++ b/validata_ui/validata_util.py @@ -1,44 +1,57 @@ """ Call validation code """ import logging +from abc import abstractmethod, ABC from io import BytesIO - -from validata_core.source_helpers import build_tabulator_params - +from pathlib import Path log = logging.getLogger(__name__) -class ValidataSource(): - """ Handy class to handle different sort of data source """ +class ValidataResource(ABC): + """A resource to validate: url or uploaded file""" - def __init__(self, type_, name, source): - """ Initialization """ + def __init__(self, type_): self.type = type_ - self.name = name - self.source = source - info = build_tabulator_params(type, name, source) - self.source = info.get('source') - self.format = info.get('format') - self.scheme = info.get('scheme') + @abstractmethod + def build_tabulator_stream_args(self): + """return (source, option_dict)""" + pass + + +class URLValidataResource(ValidataResource): + """URL resource""" + + def __init__(self, url): + """Built from URL""" + super().__init__('url') + self.url = url + + def build_tabulator_stream_args(self): + """URL implementation""" + return (self.url, {}) - def is_url(self): - return self.type == 'url' - def get_url(self): - return self.source +class UploadedFileValidataResource(ValidataResource): + """Uploaded file resource""" - def get_filename(self): - return self.name + def __init__(self, filename, bytes_content): + """Built from file name and content""" + super().__init__('file') + self.filename = filename + self.content = bytes_content - def get_filecontent(self): - return self.source + def build_reader(self): + return BytesIO(self.content) + def __detect_format(self): + ext = Path(self.filename).suffix + if ext in ('.csv', '.tsv', '.ods', '.xls', '.xlsx'): + return ext[1:] + return None -def bytes_data(f): - """Gets bytes data from Werkzeug FileStorage instance""" - iob = BytesIO() - f.save(iob) - iob.seek(0) - return iob.getvalue() + def build_tabulator_stream_args(self): + """Uploaded file implementation""" + return (self.build_reader(), { + 'format': self.__detect_format()}) diff --git a/validata_ui/views.py b/validata_ui/views.py index 2820901..781cdca 100644 --- a/validata_ui/views.py +++ b/validata_ui/views.py @@ -18,14 +18,13 @@ import tableschema from backports.datetime_fromisoformat import MonkeyPatch from commonmark import commonmark from flask import make_response, redirect, render_template, request, url_for -from validata_core import compute_badge, csv_helpers, messages -from validata_core.loaders import custom_loaders +from validata_core import compute_badge, messages import tabulator from . import app, config, ui_config, table_schema_catalog, schema_from_url from .ui_util import flash_error, flash_warning -from .validata_util import ValidataSource +from .validata_util import ValidataResource, URLValidataResource, UploadedFileValidataResource MonkeyPatch.patch_fromisoformat() @@ -87,7 +86,7 @@ class SchemaInstance(): } -def extract_source_data(source: ValidataSource, preview_rows_nb=5): +def extract_source_data(source: ValidataResource, preview_rows_nb=5): """ Computes table preview """ def stringify(val): @@ -98,12 +97,11 @@ def extract_source_data(source: ValidataSource, preview_rows_nb=5): rows = [] nb_rows = 0 - options = {} - if source.format == "csv": - options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme, - custom_loaders=custom_loaders).delimiter - with tabulator.Stream(source.source, format=source.format, scheme=source.scheme, custom_loaders=custom_loaders, - **options) as stream: + # if source.format == "csv": + # options['delimiter'] = csv_helpers.detect_dialect(source.source, format=source.format, scheme=source.scheme, + # custom_loaders=custom_loaders).delimiter + tabulator_source, tabulator_options = source.build_tabulator_stream_args() + with tabulator.Stream(tabulator_source, tabulator_options) as stream: for row in stream: if header is None: header = ['' if v is None else v for v in row] @@ -290,7 +288,7 @@ def get_badge_url_and_message(badge): config.SHIELDS_IO_BASE_URL, quote_plus(msg), color), msg) -def validate(schema_instance: SchemaInstance, source: ValidataSource): +def validate(schema_instance: SchemaInstance, source: ValidataResource): """ Validate source and display report """ # Validation is done through http call to validata-api @@ -303,15 +301,15 @@ def validate(schema_instance: SchemaInstance, source: ValidataSource): headers = {"Accept": "application/json"} try: - if source.is_url(): + if source.type == 'url': params = { "schema": schema_instance.url, - "url": source.get_url(), + "url": source.url, } req = requests.get(api_url, params=params, headers=headers) else: - files = {'file': (source.name, io.BytesIO(source.source))} + files = {'file': (source.filename, source.build_reader())} data = {"schema": schema_instance.url} req = requests.post(api_url, data=data, files=files, headers=headers) @@ -387,7 +385,7 @@ def hydrate_ui_config(ui_config, table_schema_catalog): table_schema_ref_list = [] for name, ref in sorted(table_schema_catalog.references.items(), key=itemgetter(0)): - table_schema = ref.get_table_schema() + table_schema = schema_from_url(ref.get_schema_url()) info = { "name": name, **{k: v for k, v in table_schema.descriptor.items() if k != 'fields'} @@ -518,7 +516,7 @@ def custom_validator(): flash_error("Vous n'avez pas indiqué d'url à valider") return redirect(compute_validation_form_url(schema_instance)) try: - return validate(schema_instance, ValidataSource('url', url_param, url_param)) + return validate(schema_instance, URLValidataResource(url_param)) except tabulator.exceptions.FormatError as e: flash_error('Erreur : Format de ressource non supporté') log.info(e) @@ -547,7 +545,6 @@ def custom_validator(): flash_warning("Vous n'avez pas indiqué de fichier à valider") return redirect(compute_validation_form_url(schema_instance)) - b_content = bytes_data(f) - return validate(schema_instance, ValidataSource('file', f.filename, b_content)) + return validate(schema_instance, UploadedFileValidataResource(f.filename, bytes_data(f))) return 'Bizarre, vous avez dit bizarre ?' -- GitLab