Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Alexandre Bulté
Validata Core
Commits
6296e5b1
Commit
6296e5b1
authored
Oct 01, 2018
by
Christophe Benz
Browse files
Internalize settings in validate function
parent
6b8f5315
Changes
3
Hide whitespace changes
Inline
Side-by-side
tests/tests.py
View file @
6296e5b1
...
...
@@ -49,8 +49,7 @@ def validate_csv_str(**options):
def
test_valid_delimiter
():
source
=
"""A,B,C
a,b,c"""
inspector
=
Inspector
()
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
0
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
0
...
...
@@ -58,8 +57,7 @@ a,b,c"""
def
test_invalid_delimiter_semicolon
():
source
=
"""A;B;C
a;b;c"""
inspector
=
Inspector
()
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'invalid-column-delimiter'
...
...
@@ -70,8 +68,7 @@ a;b;c"""
def
test_invalid_delimiter_percent
():
source
=
"""A%B%C
a%b%c"""
inspector
=
Inspector
()
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'invalid-column-delimiter'
...
...
@@ -82,9 +79,7 @@ a%b%c"""
def
test_missing_header_start
():
source
=
"""B,C
b,c"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -94,9 +89,7 @@ b,c"""
def
test_missing_header_middle
():
source
=
"""A,C
a,c"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -106,9 +99,7 @@ a,c"""
def
test_missing_header_end
():
source
=
"""A,B
a,b"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -118,9 +109,7 @@ a,b"""
def
test_missing_and_extra_header_end
():
source
=
"""A,B,Z
a,b,z"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -132,9 +121,7 @@ a,b,z"""
def
test_missing_and_extra_header_middle
():
source
=
"""A,Z,B
a,z,b"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -146,9 +133,7 @@ a,z,b"""
def
test_missing_and_extra_header_multiple
():
source
=
"""A,Z
a,z"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -160,9 +145,7 @@ a,z"""
def
test_extra_header
():
source
=
"""A,B,C,D
a,b,c,d"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'extra-header'
...
...
@@ -172,9 +155,7 @@ a,b,c,d"""
def
test_missing_and_extra_headers_multiple
():
source
=
"""A,Z,D
a,z,d"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
3
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
3
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -188,9 +169,7 @@ a,z,d"""
def
test_header_order
():
source
=
"""A,C,B
a,c,b"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'wrong-headers-order'
...
...
@@ -199,9 +178,7 @@ a,c,b"""
def
test_missing_and_duplicate_headers
():
source
=
"""A,A,B
a,a,b"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
...
...
@@ -214,9 +191,7 @@ a,a,b"""
def
test_invalid_delimiter_and_missing_header
():
source
=
"""A;C
a;c"""
inspector
=
Inspector
(
checks
=
[
'structure'
,
'schema'
,
{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
])
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
inspector
=
inspector
,
pre_checks_conf
=
pre_checks_conf
)
report
=
validate_csv_str
(
source
=
source
,
schema
=
schema1
,
pre_checks_conf
=
pre_checks_conf
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'invalid-column-delimiter'
...
...
validata_validate/__init__.py
View file @
6296e5b1
...
...
@@ -4,10 +4,15 @@ from pathlib import Path
from
toolz
import
thread_first
,
update_in
import
goodtables
from
.
import
pre_checks
log
=
logging
.
getLogger
(
__name__
)
VALIDATA_MAX_ROWS
=
100000
# Import all custom checks classes
cs_dir
=
Path
(
__file__
).
parent
/
'custom_checks'
for
check_file
in
cs_dir
.
glob
(
'*.py'
):
...
...
@@ -36,10 +41,18 @@ def add_error(report, table_index, error):
))))
def
validate
(
source
,
schema
,
inspector
,
pre_checks_conf
=
None
,
**
options
):
"""Validate a `source`
with a given `inspector`, applying pre-checks from `pre_checks_conf` if given
."""
def
validate
(
source
,
schema
,
checks
=
None
,
pre_checks_conf
=
None
,
**
options
):
"""Validate a `source`
applying pre-checks and checks
."""
report
=
None
inspector
=
goodtables
.
Inspector
(
checks
=
([
'structure'
,
'schema'
]
if
checks
is
None
else
checks
)
+
[{
'extra-or-missing-header'
:
{}}],
skip_checks
=
[
'non-matching-header'
,
'missing-header'
],
row_limit
=
VALIDATA_MAX_ROWS
)
# Validata design choices
options
=
{
**
options
,
"force_strings"
:
True
}
for
pre_check_conf
in
pre_checks_conf
or
[]:
pre_check_name
=
pre_check_conf
[
'name'
]
pre_check_options
=
{
**
options
,
**
pre_check_conf
.
get
(
'params'
,
{})}
...
...
validate.py
View file @
6296e5b1
...
...
@@ -10,11 +10,8 @@ import logging
import
sys
from
pathlib
import
Path
from
goodtables
import
Inspector
from
validata_validate
import
validate
VALIDATA_MAX_ROWS
=
100000
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
,
...
...
@@ -53,12 +50,10 @@ def main():
for
custom_check_conf
in
custom_checks_conf
:
checks
.
append
({
custom_check_conf
[
'name'
]:
custom_check_conf
[
'params'
]})
inspector
=
Inspector
(
checks
=
checks
,
row_limit
=
VALIDATA_MAX_ROWS
,
order_fields
=
True
)
report
=
validate
(
source
=
args
.
source
,
checks
=
checks
,
pre_checks_conf
=
pre_checks_conf
,
inspector
=
inspector
,
schema
=
args
.
schema
,
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment