Commit 6296e5b1 authored by Christophe Benz's avatar Christophe Benz
Browse files

Internalize settings in validate function

parent 6b8f5315
......@@ -49,8 +49,7 @@ def validate_csv_str(**options):
def test_valid_delimiter():
source = """A,B,C
a,b,c"""
inspector = Inspector()
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 0
assert len(report['tables'][0]['errors']) == 0
......@@ -58,8 +57,7 @@ a,b,c"""
def test_invalid_delimiter_semicolon():
source = """A;B;C
a;b;c"""
inspector = Inspector()
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......@@ -70,8 +68,7 @@ a;b;c"""
def test_invalid_delimiter_percent():
source = """A%B%C
a%b%c"""
inspector = Inspector()
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......@@ -82,9 +79,7 @@ a%b%c"""
def test_missing_header_start():
source = """B,C
b,c"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -94,9 +89,7 @@ b,c"""
def test_missing_header_middle():
source = """A,C
a,c"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -106,9 +99,7 @@ a,c"""
def test_missing_header_end():
source = """A,B
a,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -118,9 +109,7 @@ a,b"""
def test_missing_and_extra_header_end():
source = """A,B,Z
a,b,z"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -132,9 +121,7 @@ a,b,z"""
def test_missing_and_extra_header_middle():
source = """A,Z,B
a,z,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -146,9 +133,7 @@ a,z,b"""
def test_missing_and_extra_header_multiple():
source = """A,Z
a,z"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -160,9 +145,7 @@ a,z"""
def test_extra_header():
source = """A,B,C,D
a,b,c,d"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'extra-header'
......@@ -172,9 +155,7 @@ a,b,c,d"""
def test_missing_and_extra_headers_multiple():
source = """A,Z,D
a,z,d"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 3
assert len(report['tables'][0]['errors']) == 3
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -188,9 +169,7 @@ a,z,d"""
def test_header_order():
source = """A,C,B
a,c,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'wrong-headers-order'
......@@ -199,9 +178,7 @@ a,c,b"""
def test_missing_and_duplicate_headers():
source = """A,A,B
a,a,b"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -214,9 +191,7 @@ a,a,b"""
def test_invalid_delimiter_and_missing_header():
source = """A;C
a;c"""
inspector = Inspector(checks=['structure', 'schema', {'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'])
report = validate_csv_str(source=source, schema=schema1, inspector=inspector, pre_checks_conf=pre_checks_conf)
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......
......@@ -4,10 +4,15 @@ from pathlib import Path
from toolz import thread_first, update_in
import goodtables
from . import pre_checks
log = logging.getLogger(__name__)
VALIDATA_MAX_ROWS = 100000
# Import all custom checks classes
cs_dir = Path(__file__).parent / 'custom_checks'
for check_file in cs_dir.glob('*.py'):
......@@ -36,10 +41,18 @@ def add_error(report, table_index, error):
))))
def validate(source, schema, inspector, pre_checks_conf=None, **options):
"""Validate a `source` with a given `inspector`, applying pre-checks from `pre_checks_conf` if given."""
def validate(source, schema, checks=None, pre_checks_conf=None, **options):
"""Validate a `source` applying pre-checks and checks."""
report = None
inspector = goodtables.Inspector(
checks=(['structure', 'schema'] if checks is None else checks) + [{'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'],
row_limit=VALIDATA_MAX_ROWS)
# Validata design choices
options = {**options, "force_strings": True}
for pre_check_conf in pre_checks_conf or []:
pre_check_name = pre_check_conf['name']
pre_check_options = {**options, **pre_check_conf.get('params', {})}
......
......@@ -10,11 +10,8 @@ import logging
import sys
from pathlib import Path
from goodtables import Inspector
from validata_validate import validate
VALIDATA_MAX_ROWS = 100000
def main():
parser = argparse.ArgumentParser(description=__doc__,
......@@ -53,12 +50,10 @@ def main():
for custom_check_conf in custom_checks_conf:
checks.append({custom_check_conf['name']: custom_check_conf['params']})
inspector = Inspector(checks=checks, row_limit=VALIDATA_MAX_ROWS, order_fields=True)
report = validate(
source=args.source,
checks=checks,
pre_checks_conf=pre_checks_conf,
inspector=inspector,
schema=args.schema,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment