Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Validata
Validata Core
Commits
f8f37bcb
Commit
f8f37bcb
authored
Nov 26, 2020
by
Pierre Dittgen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Improve errors handling (wip)
parent
ce43d84f
Pipeline
#2362
failed with stage
in 1 minute and 22 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
54 deletions
+65
-54
tests/test_core.py
tests/test_core.py
+52
-43
validata_core/__init__.py
validata_core/__init__.py
+10
-4
validata_core/custom_checks/extra_or_missing_header.py
validata_core/custom_checks/extra_or_missing_header.py
+3
-7
No files found.
tests/test_core.py
View file @
f8f37bcb
...
...
@@ -388,57 +388,56 @@ def test_missing_header_start(schema_abc):
source
=
"""B,C
b,c"""
report
=
validate_csv_text
(
source
=
source
,
schema
=
schema_abc
)
import
ipdb
;
ipdb
.
set_trace
()
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'
message-data'
][
'header
s'
]
==
[
'A'
]
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'
cell
s'
]
==
[
'A'
]
#
def test_missing_header_middle(schema_abc):
#
source = """A,C
#
a,c"""
#
report = validate_csv_text(source=source, schema=schema_abc)
#
assert report['tables'][0]['error-count'] == 1
#
assert len(report['tables'][0]['errors']) == 1
#
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
#
assert report['tables'][0]['errors'][0]['
message-data']['header
s'] == ['B']
def
test_missing_header_middle
(
schema_abc
):
source
=
"""A,C
a,c"""
report
=
validate_csv_text
(
source
=
source
,
schema
=
schema_abc
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'
cell
s'
]
==
[
'B'
]
#
def test_missing_header_end(schema_abc):
#
source = """A,B
#
a,b"""
#
report = validate_csv_text(source=source, schema=schema_abc)
#
assert report['tables'][0]['error-count'] == 1
#
assert len(report['tables'][0]['errors']) == 1
#
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
#
assert report['tables'][0]['errors'][0]['
message-data']['header
s'] == ['C']
def
test_missing_header_end
(
schema_abc
):
source
=
"""A,B
a,b"""
report
=
validate_csv_text
(
source
=
source
,
schema
=
schema_abc
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
1
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
1
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'
cell
s'
]
==
[
'C'
]
# def test_missing_and_extra_header_end(schema_abc):
# source = """A,B,Z
# a,b,z"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 2
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def
test_missing_and_extra_header_end
(
schema_abc
):
source
=
"""A,B,Z
a,b,z"""
report
=
validate_csv_text
(
source
=
source
,
schema
=
schema_abc
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'cells'
]
==
[
'C'
]
assert
report
[
'tables'
][
0
][
'errors'
][
1
][
'code'
]
==
'extra-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
1
][
'cells'
]
==
[
'Z'
]
# def test_missing_and_extra_header_middle(schema_abc):
# source = """A,Z,B
# a,z,b"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 2
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def
test_missing_and_extra_header_middle
(
schema_abc
):
source
=
"""A,Z,B
a,z,b"""
report
=
validate_csv_text
(
source
=
source
,
schema
=
schema_abc
)
assert
report
[
'tables'
][
0
][
'error-count'
]
==
2
assert
len
(
report
[
'tables'
][
0
][
'errors'
])
==
2
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'code'
]
==
'missing-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
0
][
'cells'
]
==
[
'C'
]
assert
report
[
'tables'
][
0
][
'errors'
][
1
][
'code'
]
==
'extra-headers'
assert
report
[
'tables'
][
0
][
'errors'
][
1
][
'cells'
]
==
[
'Z'
]
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_missing_and_extra_header_multiple(schema_abc):
# source = """A,Z
# a,z"""
...
...
@@ -446,11 +445,21 @@ b,c"""
# assert report['tables'][0]['error-count'] == 2
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['
message-data']['header
s'] == ['B', 'C']
# assert report['tables'][0]['errors'][0]['
cell
s'] == ['B', 'C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['
message-data']['header
s'] == ['Z']
# assert report['tables'][0]['errors'][1]['
cell
s'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header_start(schema_abc):
# source = """X,A,B,C
# x,a,b,c"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['X']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header(schema_abc):
# source = """A,B,C,D
# a,b,c,d"""
...
...
@@ -458,7 +467,7 @@ b,c"""
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['
message-data']['header
s'] == ['D']
# assert report['tables'][0]['errors'][0]['
cell
s'] == ['D']
# def test_extra_multiple(schema_abc):
...
...
@@ -469,7 +478,7 @@ b,c"""
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['error-stats']['count'] == 2
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['
message-data']['header
s'] == ['X', 'Y']
# assert report['tables'][0]['errors'][0]['
cell
s'] == ['X', 'Y']
# def test_missing_and_extra_headers_multiple(schema_abc):
...
...
validata_core/__init__.py
View file @
f8f37bcb
...
...
@@ -90,7 +90,7 @@ def compute_error_statistics(errors, columns):
# Fill in error stats
for
err
in
errors
:
err_tag
=
err
[
'tag'
]
errors_nb
=
len
(
err
[
'
message-data'
][
'header
s'
])
\
errors_nb
=
len
(
err
[
'
cell
s'
])
\
if
err
[
'code'
]
in
(
'extra-headers'
,
'missing-headers'
)
else
1
errors_nb_dict
[
err_tag
]
+=
errors_nb
errors_dist_dict
[
err_tag
][
err
[
'code'
]]
+=
errors_nb
...
...
@@ -370,9 +370,15 @@ def validate(source, schema, with_repair=True, **options):
# TODO: merge options
inspector_options
=
{
**
{
# TODO: Fix `pick_errors` content
#'pick_errors': checks,
'skip_errors'
:
[
'non-matching-header'
,
'extra-header'
,
'missing-header'
,
"missing-cell"
],
# TODO: We hide "extra-cell" or "missing-cell" to make validata header errors
# work but at the risk that extra cells and missing cells are no more detected :(
'skip_errors'
:
[
'non-matching-header'
,
'extra-header'
,
'missing-header'
,
"missing-cell"
,
"extra-cell"
,
],
'query'
:
frictionless
.
Query
(
limit_rows
=
VALIDATA_MAX_ROWS
),
'extra_checks'
:
extra_checks
,
},
...
...
validata_core/custom_checks/extra_or_missing_header.py
View file @
f8f37bcb
...
...
@@ -40,7 +40,6 @@ class ExtraOrMissingHeader(Check):
header_set
=
set
(
header
)
default_error_params
=
{
"cells"
:
[],
"cell"
:
""
,
"field_name"
:
""
,
"field_number"
:
0
,
...
...
@@ -54,8 +53,7 @@ class ExtraOrMissingHeader(Check):
else
:
cols
=
", "
.
join
(
f
"'
{
col
}
'"
for
col
in
missing_headers
)
note
=
f
"les colonnes
{
cols
}
n'ont pas été trouvées dans le fichier"
yield
MissingHeadersError
(
note
=
note
,
**
default_error_params
)
return
yield
MissingHeadersError
(
note
=
note
,
cells
=
list
(
missing_headers
),
**
default_error_params
)
extra_headers
=
header_set
-
field_names_set
if
extra_headers
:
...
...
@@ -64,9 +62,7 @@ class ExtraOrMissingHeader(Check):
else
:
cols
=
", "
.
join
(
f
"'
{
col
}
'"
for
col
in
extra_headers
)
note
=
f
"les colonnes
{
cols
}
ne sont pas déclarées dans le schéma"
yield
ExtraHeadersError
(
note
=
note
,
**
default_error_params
)
return
yield
ExtraHeadersError
(
note
=
note
,
cells
=
list
(
extra_headers
),
**
default_error_params
)
if
field_names_set
==
header_set
and
field_names
!=
header
:
yield
WrongHeadersOrderError
(
note
=
""
,
**
default_error_params
)
return
yield
WrongHeadersOrderError
(
note
=
""
,
cells
=
[],
**
default_error_params
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment