Commit 6296e5b1 authored by Christophe Benz's avatar Christophe Benz
Browse files

Internalize settings in validate function

parent 6b8f5315
...@@ -49,8 +49,7 @@ def validate_csv_str(**options): ...@@ -49,8 +49,7 @@ def validate_csv_str(**options):
def test_valid_delimiter(): def test_valid_delimiter():
source = """A,B,C source = """A,B,C
a,b,c""" a,b,c"""
inspector = Inspector() report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 0 assert report['tables'][0]['error-count'] == 0
assert len(report['tables'][0]['errors']) == 0 assert len(report['tables'][0]['errors']) == 0
...@@ -58,8 +57,7 @@ a,b,c""" ...@@ -58,8 +57,7 @@ a,b,c"""
def test_invalid_delimiter_semicolon(): def test_invalid_delimiter_semicolon():
source = """A;B;C source = """A;B;C
a;b;c""" a;b;c"""
inspector = Inspector() report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter' assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
...@@ -70,8 +68,7 @@ a;b;c""" ...@@ -70,8 +68,7 @@ a;b;c"""
def test_invalid_delimiter_percent(): def test_invalid_delimiter_percent():
source = """A%B%C source = """A%B%C
a%b%c""" a%b%c"""
inspector = Inspector() report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter' assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
...@@ -82,9 +79,7 @@ a%b%c""" ...@@ -82,9 +79,7 @@ a%b%c"""
def test_missing_header_start(): def test_missing_header_start():
source = """B,C source = """B,C
b,c""" b,c"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -94,9 +89,7 @@ b,c""" ...@@ -94,9 +89,7 @@ b,c"""
def test_missing_header_middle(): def test_missing_header_middle():
source = """A,C source = """A,C
a,c""" a,c"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -106,9 +99,7 @@ a,c""" ...@@ -106,9 +99,7 @@ a,c"""
def test_missing_header_end(): def test_missing_header_end():
source = """A,B source = """A,B
a,b""" a,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -118,9 +109,7 @@ a,b""" ...@@ -118,9 +109,7 @@ a,b"""
def test_missing_and_extra_header_end(): def test_missing_and_extra_header_end():
source = """A,B,Z source = """A,B,Z
a,b,z""" a,b,z"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2 assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2 assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -132,9 +121,7 @@ a,b,z""" ...@@ -132,9 +121,7 @@ a,b,z"""
def test_missing_and_extra_header_middle(): def test_missing_and_extra_header_middle():
source = """A,Z,B source = """A,Z,B
a,z,b""" a,z,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2 assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2 assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -146,9 +133,7 @@ a,z,b""" ...@@ -146,9 +133,7 @@ a,z,b"""
def test_missing_and_extra_header_multiple(): def test_missing_and_extra_header_multiple():
source = """A,Z source = """A,Z
a,z""" a,z"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2 assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2 assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -160,9 +145,7 @@ a,z""" ...@@ -160,9 +145,7 @@ a,z"""
def test_extra_header(): def test_extra_header():
source = """A,B,C,D source = """A,B,C,D
a,b,c,d""" a,b,c,d"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'extra-header' assert report['tables'][0]['errors'][0]['code'] == 'extra-header'
...@@ -172,9 +155,7 @@ a,b,c,d""" ...@@ -172,9 +155,7 @@ a,b,c,d"""
def test_missing_and_extra_headers_multiple(): def test_missing_and_extra_headers_multiple():
source = """A,Z,D source = """A,Z,D
a,z,d""" a,z,d"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 3 assert report['tables'][0]['error-count'] == 3
assert len(report['tables'][0]['errors']) == 3 assert len(report['tables'][0]['errors']) == 3
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -188,9 +169,7 @@ a,z,d""" ...@@ -188,9 +169,7 @@ a,z,d"""
def test_header_order(): def test_header_order():
source = """A,C,B source = """A,C,B
a,c,b""" a,c,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1 assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1 assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'wrong-headers-order' assert report['tables'][0]['errors'][0]['code'] == 'wrong-headers-order'
...@@ -199,9 +178,7 @@ a,c,b""" ...@@ -199,9 +178,7 @@ a,c,b"""
def test_missing_and_duplicate_headers(): def test_missing_and_duplicate_headers():
source = """A,A,B source = """A,A,B
a,a,b""" a,a,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2 assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2 assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers' assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
...@@ -214,9 +191,7 @@ a,a,b""" ...@@ -214,9 +191,7 @@ a,a,b"""
def test_invalid_delimiter_and_missing_header(): def test_invalid_delimiter_and_missing_header():
source = """A;C source = """A;C
a;c""" a;c"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}], report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2 assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2 assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter' assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......
...@@ -4,10 +4,15 @@ from pathlib import Path ...@@ -4,10 +4,15 @@ from pathlib import Path
from toolz import thread_first, update_in from toolz import thread_first, update_in
import goodtables
from . import pre_checks from . import pre_checks
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
VALIDATA_MAX_ROWS = 100000
# Import all custom checks classes # Import all custom checks classes
cs_dir = Path(__file__).parent / 'custom_checks' cs_dir = Path(__file__).parent / 'custom_checks'
for check_file in cs_dir.glob('*.py'): for check_file in cs_dir.glob('*.py'):
...@@ -36,10 +41,18 @@ def add_error(report, table_index, error): ...@@ -36,10 +41,18 @@ def add_error(report, table_index, error):
)))) ))))
def validate(source, schema, inspector, pre_checks_conf=None, **options): def validate(source, schema, checks=None, pre_checks_conf=None, **options):
"""Validate a `source` with a given `inspector`, applying pre-checks from `pre_checks_conf` if given.""" """Validate a `source` applying pre-checks and checks."""
report = None report = None
inspector = goodtables.Inspector(
checks=(['structure', 'schema'] if checks is None else checks) + [{'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'],
row_limit=VALIDATA_MAX_ROWS)
# Validata design choices
options = {**options, "force_strings": True}
for pre_check_conf in pre_checks_conf or []: for pre_check_conf in pre_checks_conf or []:
pre_check_name = pre_check_conf['name'] pre_check_name = pre_check_conf['name']
pre_check_options = {**options, **pre_check_conf.get('params', {})} pre_check_options = {**options, **pre_check_conf.get('params', {})}
......
...@@ -10,11 +10,8 @@ import logging ...@@ -10,11 +10,8 @@ import logging
import sys import sys
from pathlib import Path from pathlib import Path
from goodtables import Inspector
from validata_validate import validate from validata_validate import validate
VALIDATA_MAX_ROWS = 100000
def main(): def main():
parser = argparse.ArgumentParser(description=__doc__, parser = argparse.ArgumentParser(description=__doc__,
...@@ -53,12 +50,10 @@ def main(): ...@@ -53,12 +50,10 @@ def main():
for custom_check_conf in custom_checks_conf: for custom_check_conf in custom_checks_conf:
checks.append({custom_check_conf['name']: custom_check_conf['params']}) checks.append({custom_check_conf['name']: custom_check_conf['params']})
inspector = Inspector(checks=checks, row_limit=VALIDATA_MAX_ROWS, order_fields=True)
report = validate( report = validate(
source=args.source, source=args.source,
checks=checks,
pre_checks_conf=pre_checks_conf, pre_checks_conf=pre_checks_conf,
inspector=inspector,
schema=args.schema, schema=args.schema,
) )
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment