From 47c3ecbf8a0cb65169ab0f286fcf13e172b14b08 Mon Sep 17 00:00:00 2001 From: Pierre Dittgen Date: Fri, 21 Jun 2019 17:12:30 +0200 Subject: [PATCH] Create schema_catalog on demand --- validata_ui/__init__.py | 32 +++++++++++++++++++++++++++----- validata_ui/views.py | 25 +++++++++++++++---------- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/validata_ui/__init__.py b/validata_ui/__init__.py index a4a6e80..ade4979 100644 --- a/validata_ui/__init__.py +++ b/validata_ui/__init__.py @@ -1,6 +1,7 @@ import json import logging import os +import re from pathlib import Path from urllib.parse import quote_plus @@ -29,20 +30,41 @@ def generate_schema_from_url_func(session): return tableschema_from_url +def is_http_url(ref) -> bool: + return isinstance(ref, str) and re.match("https?://", ref) + + +class SchemaCatalogRegistry: + """Retain section_name -> catalog url matching + and creates SchemaCatalog instance on demand""" + + def __init__(self, session): + self.session = session + self.url_map = {} + + def add_ref(self, name, url): + self.url_map[name] = url + + def build_schema_catalog(self, name): + if name in self.url_map: + catalog_url = self.url_map[name] + return opendataschema.SchemaCatalog(catalog_url, session=self.session) + return None + + caching_session = cachecontrol.CacheControl(requests.Session()) tableschema_from_url = generate_schema_from_url_func(caching_session) - # And load schema catalogs which URLs are found in homepage_config.json -schema_catalog_map = {} +schema_catalog_registry = SchemaCatalogRegistry(caching_session) if config.HOMEPAGE_CONFIG: log.info("Initializing homepage sections...") for section in config.HOMEPAGE_CONFIG['sections']: name = section['name'] log.info('Initializing homepage section "{}"...'.format(name)) - catalog = section.get('catalog') - if catalog and isinstance(catalog, str): - schema_catalog_map[name] = opendataschema.SchemaCatalog(catalog, session=caching_session) + catalog_ref = section.get('catalog') + if is_http_url(catalog_ref): + schema_catalog_registry.add_ref(name, catalog_ref) log.info("...done") # Flask things diff --git a/validata_ui/views.py b/validata_ui/views.py index 50f1bdb..b89b0cf 100644 --- a/validata_ui/views.py +++ b/validata_ui/views.py @@ -23,7 +23,7 @@ from validata_core import messages from opendataschema import GitSchemaReference -from . import app, config, schema_catalog_map, tableschema_from_url +from . import app, config, schema_catalog_registry, tableschema_from_url from .ui_util import flash_error, flash_warning from .validata_util import UploadedFileValidataResource, URLValidataResource, ValidataResource @@ -32,10 +32,15 @@ MonkeyPatch.patch_fromisoformat() log = logging.getLogger(__name__) +def get_schema_catalog(section_name): + """Return a schema catalog associated to a section_name""" + return schema_catalog_registry.build_schema_catalog(section_name) + + class SchemaInstance: """Handy class to handle schema information""" - def __init__(self, parameter_dict, schema_catalog_map): + def __init__(self, parameter_dict): """Initializes schema instance from requests dict and tableschema catalog (for name ref)""" self.section_name = None self.section_title = None @@ -66,7 +71,7 @@ class SchemaInstance: self.section_title = self.find_section_title(self.section_name) # Look for schema catalog first - table_schema_catalog = schema_catalog_map.get(self.section_name) + table_schema_catalog = get_schema_catalog(self.section_name) if table_schema_catalog is None: abort(400, "Section '{}' non trouvée dans la configuration".format(self.section_name)) @@ -413,16 +418,16 @@ def bytes_data(f): return iob.getvalue() -def homepage_config_with_schema_metadata(ui_config, schema_catalog_map): +def homepage_config_with_schema_metadata(ui_config): """Replace catalog url within ui_config by schema references containing schema metadata properties""" extended_ui_config = ui_config.copy() for section in extended_ui_config['sections']: section_name = section['name'] - if section_name not in schema_catalog_map: + schema_catalog = get_schema_catalog(section_name) + if schema_catalog is None: continue - schema_catalog = schema_catalog_map[section_name] schema_list = [] for ref in schema_catalog.references: # Loads default table schema for each schema reference @@ -441,7 +446,7 @@ def homepage_config_with_schema_metadata(ui_config, schema_catalog_map): @app.route('/') def home(): """ Home page """ - home_config = homepage_config_with_schema_metadata(config.HOMEPAGE_CONFIG, schema_catalog_map) + home_config = homepage_config_with_schema_metadata(config.HOMEPAGE_CONFIG) return render_template('home.html', config=home_config) @@ -455,7 +460,7 @@ def pdf_report(): flash_error(err_prefix + ' : URL non fournie') return redirect(url_for('home')) - schema_instance = SchemaInstance(request.args, schema_catalog_map) + schema_instance = SchemaInstance(request.args) # Compute pdf url report base_url = url_for('custom_validator', _external=True) @@ -532,7 +537,7 @@ def custom_validator(): # url of resource to be validated url_param = request.args.get("url") - schema_instance = SchemaInstance(request.args, schema_catalog_map) + schema_instance = SchemaInstance(request.args) # First form display if input_param is None: @@ -561,7 +566,7 @@ def custom_validator(): elif request.method == 'POST': - schema_instance = SchemaInstance(request.form, schema_catalog_map) + schema_instance = SchemaInstance(request.form) input_param = request.form.get('input') if input_param is None: -- GitLab