Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Validata
Validata Core
Commits
cc0d132d
Commit
cc0d132d
authored
Dec 08, 2020
by
Pierre Dittgen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use sync_schema=True to ignore header errors
parent
8fb10e1f
Pipeline
#2393
passed with stage
in 1 minute and 44 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
35 additions
and
142 deletions
+35
-142
tests/test_core.py
tests/test_core.py
+33
-141
validata_core/__init__.py
validata_core/__init__.py
+2
-1
No files found.
tests/test_core.py
View file @
cc0d132d
...
...
@@ -2,6 +2,7 @@ import datetime
from
io
import
BytesIO
from
typing
import
Any
,
List
import
frictionless
import
pytest
from
openpyxl
import
Workbook
...
...
@@ -368,154 +369,45 @@ def test_empty_file(schema_abc):
assert
report
[
"tables"
][
0
][
"errors"
][
0
][
"note"
]
==
"the source is empty"
def
assert_no_report_errors
(
report
):
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
0
,
report
def
test_valid_delimiter
(
schema_abc
):
source
=
b
"""A,B,C
a,b,c"""
report
=
validate_csv_bytes
(
source
,
schema_abc
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_valid_delimiter_semicolon
(
schema_abc
):
source
=
b
"""A;B;C
a;b;c"""
report
=
validate_csv_bytes
(
source
,
schema_abc
)
assert_no_report_errors
(
report
)
def
test_invalid_delimiter_percent
(
schema_abc
):
source
=
b
"""A%B%C
a%b%c"""
report
=
validate_csv_bytes
(
source
,
schema_abc
)
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
!=
0
# def test_missing_header_start(schema_abc):
# source = b"""B,C
# b,c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['A']
# def test_missing_header_middle(schema_abc):
# source = b"""A,C
# a,c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['B']
assert
report
.
valid
# def test_missing_header_end(schema_abc):
# source = b"""A,B
# a,b"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['C']
# def test_missing_and_extra_header_end(schema_abc):
# source = b"""A,B,Z
# a,b,z"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# def test_missing_and_extra_header_middle(schema_abc):
# source = b"""A,Z,B
# a,z,b"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_missing_and_extra_header_multiple(schema_abc):
# source = b"""A,Z
# a,z"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['B', 'C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header_start(schema_abc):
# source = b"""X,A,B,C
# x,a,b,c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['X']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header(schema_abc):
# source = b"""A,B,C,D
# a,b,c,d"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['D']
def
test_missing_required_column
(
schema_abc
):
source
=
[[
"B"
,
"C"
],[
"b"
,
"c"
]]
report
=
validate
(
source
,
schema_abc
)
# def test_extra_multiple(schema_abc):
# source = b"""A,B,C,X,Y
# a,b,c,x,y"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['error-stats']['count'] == 2
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['X', 'Y']
# def test_missing_and_extra_headers_multiple(schema_abc):
# source = b"""A,Z,D
# a,z,d"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['B', 'C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['D', 'Z']
# def test_header_order(schema_abc):
# source = b"""A,C,B
# a,c,b"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'wrong-headers-order'
# def test_invalid_delimiter_and_missing_header(schema_abc):
# source = b"""A;C
# a;c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
# assert report['tables'][0]['errors'][0]['message-data']['detected'] == ';'
# assert report['tables'][0]['errors'][0]['message-data']['expected'] == ','
# assert report['tables'][0]['errors'][1]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['B']
def
test_missing_column
(
schema_abc
):
source
=
[
[
'A'
,
'C'
],
[
'a'
,
'c'
],
]
schema
=
{
"fields"
:
[
{
"name"
:
"A"
,
"constraints"
:
{
"required"
:
True
}},
{
"name"
:
"B"
},
{
"name"
:
"C"
},
]
}
report
=
validate
(
source
,
schema
)
assert
report
.
valid
def
test_valid_custom_check_siren
(
schema_siren
):
source
=
[[
"id"
,
"siren"
],[
1
,
"529173189"
]]
report
=
validate
(
source
,
schema_siren
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_invalid_custom_check_siren
(
schema_siren
):
...
...
@@ -527,7 +419,7 @@ def test_invalid_custom_check_siren(schema_siren):
def
test_valid_custom_check_siret
(
schema_siret
):
source
=
[[
"id"
,
"numero_siret"
],[
1
,
"83014132100026"
]]
report
=
validate
(
source
,
schema_siret
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_invalid_custom_check_siret
(
schema_siret
):
...
...
@@ -540,19 +432,19 @@ def test_invalid_custom_check_siret(schema_siret):
def
test_valid_custom_check_year_interval_1
(
schema_year_interval
):
source
=
[[
"projet"
,
"annee"
],[
"Validata"
,
"2018/2020"
]]
report
=
validate
(
source
,
schema_year_interval
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_valid_custom_check_year_interval_2
(
schema_year_interval_allow_year_only
):
source
=
[[
"projet"
,
"annee"
],[
"Validata"
,
"2018/2020"
]]
report
=
validate
(
source
,
schema_year_interval_allow_year_only
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_valid_custom_check_year_interval_3
(
schema_year_interval_allow_year_only
):
source
=
[[
"projet"
,
"annee"
],[
"Validata"
,
"2020"
]]
report
=
validate
(
source
,
schema_year_interval_allow_year_only
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_invalid_custom_check_year_interval_1
(
schema_year_interval
):
...
...
@@ -579,19 +471,19 @@ def test_invalid_custom_check_year_interval_3(schema_year_interval):
def
test_valid_custom_sum_columns_value_1
(
schema_sum_columns_value_ok
):
source
=
[[
'charges'
,
'chauffage'
,
'salaires'
,
'fraisdebouche'
],
[
12000
,
600
,
4000
,
7400
]]
report
=
validate
(
source
,
schema_sum_columns_value_ok
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_valid_custom_sum_columns_value_2
(
schema_sum_columns_value_ok
):
source
=
[[
"charges"
,
"chauffage"
,
"salaires"
,
"fraisdebouche"
],[
12000
,
600
,
None
,
7400
]]
report
=
validate
(
source
,
schema_sum_columns_value_ok
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_valid_custom_sum_columns_value_3
(
schema_sum_columns_value_ok
):
source
=
[[
"charges"
,
"chauffage"
,
"salaires"
,
"fraisdebouche"
],[
None
,
600
,
4000
,
7400
]]
report
=
validate
(
source
,
schema_sum_columns_value_ok
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_valid_custom_sum_columns_value_3
(
schema_sum_columns_value_ok
):
...
...
@@ -611,7 +503,7 @@ def test_valid_nomenclature_actes_value(schema_nomenclature_actes_value):
source
=
[[
"acte"
],[
"Fonction publique/foobar"
]]
report
=
validate
(
source
,
schema_nomenclature_actes_value
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_invalid_nomenclature_actes_value_1
(
schema_nomenclature_actes_value
):
...
...
@@ -636,14 +528,14 @@ def test_cohesive_columns_values_1(schema_cohesive_columns):
source
=
[[
"id"
,
"col1"
,
"col2"
],[
1
,
None
,
None
]]
report
=
validate
(
source
,
schema_cohesive_columns
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_cohesive_columns_values_2
(
schema_cohesive_columns
):
source
=
[[
"id"
,
"col1"
,
"col2"
],[
1
,
"foo"
,
"bar"
]]
report
=
validate
(
source
,
schema_cohesive_columns
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_cohesive_columns_values_3
(
schema_cohesive_columns
):
...
...
@@ -667,13 +559,13 @@ def test_cohesive_columns_values_4(schema_cohesive_columns):
def
test_compare_columns_value_1
(
schema_compare_columns
):
source
=
[[
"depenses"
,
"recettes"
],[
12000
,
15000
]]
report
=
validate
(
source
,
schema_compare_columns
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_compare_columns_value_2
(
schema_compare_columns
):
source
=
[[
"depenses"
,
"recettes"
],[
12000
,
12000
]]
report
=
validate
(
source
,
schema_compare_columns
)
assert
_no_report_errors
(
report
)
assert
report
.
valid
def
test_compare_columns_value_3
(
schema_compare_columns
):
...
...
validata_core/__init__.py
View file @
cc0d132d
...
...
@@ -34,7 +34,8 @@ def validate(source, schema, **options):
# Merge options to pass to frictionless
validate_options
=
{
'query'
:
frictionless
.
Query
(
limit_rows
=
VALIDATA_MAX_ROWS
),
'extra_checks'
:
extra_checks
,
'extra_checks'
:
extra_checks
,
# add custom_checks if needed
'sync_schema'
:
True
,
# Don't care about missing, extra or unordered columns
**
{
k
:
v
for
k
,
v
in
options
.
items
()
if
k
in
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment