Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Alexandre Bulté
Validata Core
Commits
1bacf715
Commit
1bacf715
authored
Nov 20, 2018
by
Christophe Benz
Browse files
Internalize schema and checks resolution
parent
fc63009b
Changes
3
Hide whitespace changes
Inline
Side-by-side
setup.py
View file @
1bacf715
...
...
@@ -29,6 +29,7 @@ setup(
install_requires
=
[
'goodtables'
,
'importlib_resources'
,
'requests'
,
'tabulator'
,
'toml'
,
'toolz'
,
...
...
validata_core/__init__.py
View file @
1bacf715
import
importlib.util
import
json
import
logging
from
pathlib
import
Path
import
importlib_resources
import
requests
import
toml
from
tableschema
import
Schema
from
toolz
import
thread_first
,
update_in
import
goodtables
import
importlib_resources
from
.
import
loaders
,
pre_checks
,
message
s
from
.
import
loaders
,
messages
,
pre_check
s
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -61,8 +64,70 @@ def improve_messages(report, schema):
return
report
def
validate
(
source
,
schema
,
checks
=
None
,
pre_checks_conf
=
None
,
**
options
):
"""Validate a `source` applying pre-checks and checks."""
def
load_schema_and_checks
(
schema_value
,
checks
):
schemas_config
=
get_schemas_config
()
schema_config
=
schemas_config
.
get
(
schema_value
)
schema
=
resolve_schema
(
schema_value
,
schema_config
)
if
isinstance
(
schema_value
,
str
)
and
schema_config
is
not
None
:
# `schema` is an SCDL tag
assert
checks
is
None
,
checks
checks_url
=
schema_config
[
"goodtables_checks_json_url"
]
goodtables_checks_json
=
requests
.
get
(
checks_url
).
json
()
else
:
# `schema` is a file path
assert
isinstance
(
checks
,
Path
),
checks
with
checks
.
open
()
as
fp
:
goodtables_checks_json
=
json
.
load
(
fp
)
pre_checks_conf
,
checks
=
build_checks
(
goodtables_checks_json
)
return
schema
,
pre_checks_conf
,
checks
def
build_checks
(
goodtables_checks_json
):
pre_checks_conf
=
[]
checks
=
[
'structure'
,
'schema'
]
# pre-checks
pre_checks_conf
=
goodtables_checks_json
.
get
(
'pre_checks'
)
# custom checks
custom_checks_conf
=
goodtables_checks_json
.
get
(
'custom_checks'
)
if
custom_checks_conf
is
not
None
:
for
custom_check_conf
in
custom_checks_conf
:
checks
.
append
({
custom_check_conf
[
'name'
]:
custom_check_conf
[
'params'
]})
return
pre_checks_conf
,
checks
def
resolve_schema
(
value
,
schema_config
=
None
):
"""Return a `tableschema.Schema` instance from `value` which can be either:
- a `pathlib.Path`
- a `str` containing either:
- a file path
- an URL
- a SCDL tag as described in `schemas.toml` (i.e. `scdl-prenoms`)
- a `dict` representing the schema in JSON
- a `tableschema.Schema` instance
"""
if
isinstance
(
value
,
Path
):
value
=
str
(
value
)
if
isinstance
(
value
,
str
)
and
schema_config
is
not
None
:
# `value` is a SCDL tag (i.e. `scdl-prenoms`)
value
=
schema_config
[
"schema_json_url"
]
schema
=
Schema
(
value
)
return
schema
def
validate
(
source
,
schema
,
checks
=
None
,
**
options
):
"""Validate a `source` applying pre-checks and checks.
`schema` is resolved using `resolve_schema`.
"""
schema
,
pre_checks_conf
,
checks
=
load_schema_and_checks
(
schema
,
checks
)
schema_descriptor
=
schema
.
descriptor
report
=
None
inspector
=
goodtables
.
Inspector
(
...
...
@@ -85,7 +150,7 @@ def validate(source, schema, checks=None, pre_checks_conf=None, **options):
log
.
debug
(
"error: %r"
,
dict
(
error
))
with
pre_check
.
get_fixed_stream
()
as
fixed_stream
:
rows
=
list
(
fixed_stream
)
report
=
inspector
.
inspect
(
source
=
rows
,
schema
=
schema
,
report
=
inspector
.
inspect
(
source
=
rows
,
schema
=
schema
_descriptor
,
**
{
**
options
,
"format"
:
"inline"
,
"scheme"
:
None
})
log
.
debug
(
"report: %r"
,
report
)
report
=
add_error
(
report
,
table_index
=
0
,
error
=
dict
(
error
))
...
...
@@ -93,9 +158,9 @@ def validate(source, schema, checks=None, pre_checks_conf=None, **options):
if
report
is
None
:
# If no pre-checks have been executed, or all pre-checks were successful, no report was computed,
# so fallback to a normal validation, without pre-checks.
report
=
inspector
.
inspect
(
source
=
source
,
schema
=
schema
,
**
options
)
report
=
inspector
.
inspect
(
source
=
source
,
schema
=
schema
_descriptor
,
**
options
)
# Translate error messages
report
=
improve_messages
(
report
,
schema
)
report
=
improve_messages
(
report
,
schema
_descriptor
)
return
report
validata_core/cli.py
View file @
1bacf715
...
...
@@ -12,7 +12,7 @@ from pathlib import Path
from
tableschema
import
Schema
from
.
import
validate
from
.
import
get_schemas_config
,
validate
def
cli
():
...
...
@@ -24,8 +24,10 @@ def cli():
parser
.
add_argument
(
'--schema'
,
help
=
'URL or path to table schema JSON file'
)
args
=
parser
.
parse_args
()
if
args
.
checks
is
not
None
and
not
args
.
checks
.
exists
():
parser
.
error
(
"Custom check file {!r} not found!"
.
format
(
str
(
args
.
checks
)))
schemas_config
=
get_schemas_config
()
schema_config
=
schemas_config
.
get
(
args
.
schema
)
if
schema_config
is
not
None
and
args
.
checks
is
not
None
:
parser
.
error
(
"When using a SCDL tag, checks are defined in `schemas.toml`. Don't use --checks option."
)
numeric_level
=
getattr
(
logging
,
args
.
log
.
upper
(),
None
)
if
not
isinstance
(
numeric_level
,
int
):
...
...
@@ -36,30 +38,10 @@ def cli():
stream
=
sys
.
stderr
,
# script outputs data
)
# Load pre-checks and custom checks configuration.
pre_checks_conf
=
[]
checks
=
[
'structure'
,
'schema'
]
if
args
.
checks
is
not
None
:
with
args
.
checks
.
open
()
as
fp
:
goodtables_checks_json
=
json
.
load
(
fp
)
# pre-checks
pre_checks_conf
=
goodtables_checks_json
.
get
(
'pre_checks'
)
# custom checks
custom_checks_conf
=
goodtables_checks_json
.
get
(
'custom_checks'
)
if
custom_checks_conf
is
not
None
:
for
custom_check_conf
in
custom_checks_conf
:
checks
.
append
({
custom_check_conf
[
'name'
]:
custom_check_conf
[
'params'
]})
# Don't use `json.load` to let `Schema` handle "file-path or URL" pattern.
schema_descriptor
=
Schema
(
args
.
schema
).
descriptor
report
=
validate
(
source
=
args
.
source
,
checks
=
checks
,
pre_checks_conf
=
pre_checks_conf
,
schema
=
schema_descriptor
,
checks
=
args
.
checks
,
schema
=
args
.
schema
,
)
json
.
dump
(
report
,
sys
.
stdout
,
ensure_ascii
=
False
,
indent
=
2
,
sort_keys
=
True
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment