Commit 1bacf715 authored by Christophe Benz's avatar Christophe Benz
Browse files

Internalize schema and checks resolution

parent fc63009b
......@@ -29,6 +29,7 @@ setup(
install_requires=[
'goodtables',
'importlib_resources',
'requests',
'tabulator',
'toml',
'toolz',
......
import importlib.util
import json
import logging
from pathlib import Path
import importlib_resources
import requests
import toml
from tableschema import Schema
from toolz import thread_first, update_in
import goodtables
import importlib_resources
from . import loaders, pre_checks, messages
from . import loaders, messages, pre_checks
log = logging.getLogger(__name__)
......@@ -61,8 +64,70 @@ def improve_messages(report, schema):
return report
def validate(source, schema, checks=None, pre_checks_conf=None, **options):
"""Validate a `source` applying pre-checks and checks."""
def load_schema_and_checks(schema_value, checks):
schemas_config = get_schemas_config()
schema_config = schemas_config.get(schema_value)
schema = resolve_schema(schema_value, schema_config)
if isinstance(schema_value, str) and schema_config is not None:
# `schema` is an SCDL tag
assert checks is None, checks
checks_url = schema_config["goodtables_checks_json_url"]
goodtables_checks_json = requests.get(checks_url).json()
else:
# `schema` is a file path
assert isinstance(checks, Path), checks
with checks.open() as fp:
goodtables_checks_json = json.load(fp)
pre_checks_conf, checks = build_checks(goodtables_checks_json)
return schema, pre_checks_conf, checks
def build_checks(goodtables_checks_json):
pre_checks_conf = []
checks = ['structure', 'schema']
# pre-checks
pre_checks_conf = goodtables_checks_json.get('pre_checks')
# custom checks
custom_checks_conf = goodtables_checks_json.get('custom_checks')
if custom_checks_conf is not None:
for custom_check_conf in custom_checks_conf:
checks.append({custom_check_conf['name']: custom_check_conf['params']})
return pre_checks_conf, checks
def resolve_schema(value, schema_config=None):
"""Return a `tableschema.Schema` instance from `value` which can be either:
- a `pathlib.Path`
- a `str` containing either:
- a file path
- an URL
- a SCDL tag as described in `schemas.toml` (i.e. `scdl-prenoms`)
- a `dict` representing the schema in JSON
- a `tableschema.Schema` instance
"""
if isinstance(value, Path):
value = str(value)
if isinstance(value, str) and schema_config is not None:
# `value` is a SCDL tag (i.e. `scdl-prenoms`)
value = schema_config["schema_json_url"]
schema = Schema(value)
return schema
def validate(source, schema, checks=None, **options):
"""Validate a `source` applying pre-checks and checks.
`schema` is resolved using `resolve_schema`.
"""
schema, pre_checks_conf, checks = load_schema_and_checks(schema, checks)
schema_descriptor = schema.descriptor
report = None
inspector = goodtables.Inspector(
......@@ -85,7 +150,7 @@ def validate(source, schema, checks=None, pre_checks_conf=None, **options):
log.debug("error: %r", dict(error))
with pre_check.get_fixed_stream() as fixed_stream:
rows = list(fixed_stream)
report = inspector.inspect(source=rows, schema=schema,
report = inspector.inspect(source=rows, schema=schema_descriptor,
**{**options, "format": "inline", "scheme": None})
log.debug("report: %r", report)
report = add_error(report, table_index=0, error=dict(error))
......@@ -93,9 +158,9 @@ def validate(source, schema, checks=None, pre_checks_conf=None, **options):
if report is None:
# If no pre-checks have been executed, or all pre-checks were successful, no report was computed,
# so fallback to a normal validation, without pre-checks.
report = inspector.inspect(source=source, schema=schema, **options)
report = inspector.inspect(source=source, schema=schema_descriptor, **options)
# Translate error messages
report = improve_messages(report, schema)
report = improve_messages(report, schema_descriptor)
return report
......@@ -12,7 +12,7 @@ from pathlib import Path
from tableschema import Schema
from . import validate
from . import get_schemas_config, validate
def cli():
......@@ -24,8 +24,10 @@ def cli():
parser.add_argument('--schema', help='URL or path to table schema JSON file')
args = parser.parse_args()
if args.checks is not None and not args.checks.exists():
parser.error("Custom check file {!r} not found!".format(str(args.checks)))
schemas_config = get_schemas_config()
schema_config = schemas_config.get(args.schema)
if schema_config is not None and args.checks is not None:
parser.error("When using a SCDL tag, checks are defined in `schemas.toml`. Don't use --checks option.")
numeric_level = getattr(logging, args.log.upper(), None)
if not isinstance(numeric_level, int):
......@@ -36,30 +38,10 @@ def cli():
stream=sys.stderr, # script outputs data
)
# Load pre-checks and custom checks configuration.
pre_checks_conf = []
checks = ['structure', 'schema']
if args.checks is not None:
with args.checks.open() as fp:
goodtables_checks_json = json.load(fp)
# pre-checks
pre_checks_conf = goodtables_checks_json.get('pre_checks')
# custom checks
custom_checks_conf = goodtables_checks_json.get('custom_checks')
if custom_checks_conf is not None:
for custom_check_conf in custom_checks_conf:
checks.append({custom_check_conf['name']: custom_check_conf['params']})
# Don't use `json.load` to let `Schema` handle "file-path or URL" pattern.
schema_descriptor = Schema(args.schema).descriptor
report = validate(
source=args.source,
checks=checks,
pre_checks_conf=pre_checks_conf,
schema=schema_descriptor,
checks=args.checks,
schema=args.schema,
)
json.dump(report, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment