Commit f8f37bcb authored by Pierre Dittgen's avatar Pierre Dittgen

Improve errors handling (wip)

parent ce43d84f
Pipeline #2362 failed with stage
in 1 minute and 22 seconds
......@@ -388,57 +388,56 @@ def test_missing_header_start(schema_abc):
source = """B,C
b,c"""
report = validate_csv_text(source=source, schema=schema_abc)
import ipdb; ipdb.set_trace()
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['A']
assert report['tables'][0]['errors'][0]['cells'] == ['A']
# def test_missing_header_middle(schema_abc):
# source = """A,C
# a,c"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['B']
def test_missing_header_middle(schema_abc):
source = """A,C
a,c"""
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['cells'] == ['B']
# def test_missing_header_end(schema_abc):
# source = """A,B
# a,b"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
def test_missing_header_end(schema_abc):
source = """A,B
a,b"""
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['cells'] == ['C']
# def test_missing_and_extra_header_end(schema_abc):
# source = """A,B,Z
# a,b,z"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 2
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def test_missing_and_extra_header_end(schema_abc):
source = """A,B,Z
a,b,z"""
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['cells'] == ['C']
assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# def test_missing_and_extra_header_middle(schema_abc):
# source = """A,Z,B
# a,z,b"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 2
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def test_missing_and_extra_header_middle(schema_abc):
source = """A,Z,B
a,z,b"""
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['cells'] == ['C']
assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_missing_and_extra_header_multiple(schema_abc):
# source = """A,Z
# a,z"""
......@@ -446,11 +445,21 @@ b,c"""
# assert report['tables'][0]['error-count'] == 2
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['B', 'C']
# assert report['tables'][0]['errors'][0]['cells'] == ['B', 'C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header_start(schema_abc):
# source = """X,A,B,C
# x,a,b,c"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['X']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header(schema_abc):
# source = """A,B,C,D
# a,b,c,d"""
......@@ -458,7 +467,7 @@ b,c"""
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['D']
# assert report['tables'][0]['errors'][0]['cells'] == ['D']
# def test_extra_multiple(schema_abc):
......@@ -469,7 +478,7 @@ b,c"""
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['error-stats']['count'] == 2
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['X', 'Y']
# assert report['tables'][0]['errors'][0]['cells'] == ['X', 'Y']
# def test_missing_and_extra_headers_multiple(schema_abc):
......
......@@ -90,7 +90,7 @@ def compute_error_statistics(errors, columns):
# Fill in error stats
for err in errors:
err_tag = err['tag']
errors_nb = len(err['message-data']['headers']) \
errors_nb = len(err['cells']) \
if err['code'] in ('extra-headers', 'missing-headers') else 1
errors_nb_dict[err_tag] += errors_nb
errors_dist_dict[err_tag][err['code']] += errors_nb
......@@ -370,9 +370,15 @@ def validate(source, schema, with_repair=True, **options):
# TODO: merge options
inspector_options = {
**{
# TODO: Fix `pick_errors` content
#'pick_errors': checks,
'skip_errors': ['non-matching-header', 'extra-header', 'missing-header', "missing-cell"],
# TODO: We hide "extra-cell" or "missing-cell" to make validata header errors
# work but at the risk that extra cells and missing cells are no more detected :(
'skip_errors': [
'non-matching-header',
'extra-header',
'missing-header',
"missing-cell",
"extra-cell",
],
'query': frictionless.Query(limit_rows=VALIDATA_MAX_ROWS),
'extra_checks': extra_checks,
},
......
......@@ -40,7 +40,6 @@ class ExtraOrMissingHeader(Check):
header_set = set(header)
default_error_params = {
"cells": [],
"cell": "",
"field_name": "",
"field_number":0,
......@@ -54,8 +53,7 @@ class ExtraOrMissingHeader(Check):
else:
cols = ", ".join(f"'{col}'" for col in missing_headers)
note = f"les colonnes {cols} n'ont pas été trouvées dans le fichier"
yield MissingHeadersError(note=note, **default_error_params)
return
yield MissingHeadersError(note=note, cells=list(missing_headers), **default_error_params)
extra_headers = header_set - field_names_set
if extra_headers:
......@@ -64,9 +62,7 @@ class ExtraOrMissingHeader(Check):
else:
cols = ", ".join(f"'{col}'" for col in extra_headers)
note = f"les colonnes {cols} ne sont pas déclarées dans le schéma"
yield ExtraHeadersError(note=note, **default_error_params)
return
yield ExtraHeadersError(note=note, cells=list(extra_headers), **default_error_params)
if field_names_set == header_set and field_names != header:
yield WrongHeadersOrderError(note="", **default_error_params)
return
yield WrongHeadersOrderError(note="", cells=[], **default_error_params)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment