Commit 8dca71a0 authored by Christophe Benz's avatar Christophe Benz
Browse files

Return one "extra-headers" error instead of multiple "extra-header"

parent cd753510
from goodtables import Inspector
from validata_validate import validate
schema1 = {
schema_abc = {
"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
"title": "Dummy schema",
"author": "Christophe Benz, Jailbreak",
"version": "0.0.1",
"created": "2018-09-27",
"fields": [
{
"name": "A",
......@@ -42,14 +38,14 @@ pre_checks_conf = [
]
def validate_csv_str(**options):
return validate(scheme='text', format='csv', **options)
def validate_csv_text(**options):
return validate(scheme='text', format='csv', pre_checks_conf=pre_checks_conf, **options)
def test_valid_delimiter():
source = """A,B,C
a,b,c"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 0
assert len(report['tables'][0]['errors']) == 0
......@@ -57,7 +53,7 @@ a,b,c"""
def test_invalid_delimiter_semicolon():
source = """A;B;C
a;b;c"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......@@ -68,7 +64,7 @@ a;b;c"""
def test_invalid_delimiter_percent():
source = """A%B%C
a%b%c"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......@@ -79,7 +75,7 @@ a%b%c"""
def test_missing_header_start():
source = """B,C
b,c"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -89,7 +85,7 @@ b,c"""
def test_missing_header_middle():
source = """A,C
a,c"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -99,7 +95,7 @@ a,c"""
def test_missing_header_end():
source = """A,B
a,b"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -109,67 +105,65 @@ a,b"""
def test_missing_and_extra_header_end():
source = """A,B,Z
a,b,z"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
assert report['tables'][0]['errors'][1]['code'] == 'extra-header'
assert report['tables'][0]['errors'][1]['column-number'] == 3
assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def test_missing_and_extra_header_middle():
source = """A,Z,B
a,z,b"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['C']
assert report['tables'][0]['errors'][1]['code'] == 'extra-header'
assert report['tables'][0]['errors'][1]['column-number'] == 2
assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def test_missing_and_extra_header_multiple():
source = """A,Z
a,z"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['B', 'C']
assert report['tables'][0]['errors'][1]['code'] == 'extra-header'
assert report['tables'][0]['errors'][1]['column-number'] == 2
assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['Z']
def test_extra_header():
source = """A,B,C,D
a,b,c,d"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'extra-header'
assert report['tables'][0]['errors'][0]['column-number'] == 4
assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['D']
def test_missing_and_extra_headers_multiple():
source = """A,Z,D
a,z,d"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
assert report['tables'][0]['error-count'] == 3
assert len(report['tables'][0]['errors']) == 3
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['B', 'C']
assert report['tables'][0]['errors'][1]['code'] == 'extra-header'
assert report['tables'][0]['errors'][1]['column-number'] == 2
assert report['tables'][0]['errors'][2]['code'] == 'extra-header'
assert report['tables'][0]['errors'][2]['column-number'] == 3
assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['D', 'Z']
def test_header_order():
source = """A,C,B
a,c,b"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'wrong-headers-order'
......@@ -178,7 +172,7 @@ a,c,b"""
def test_missing_and_duplicate_headers():
source = """A,A,B
a,a,b"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
......@@ -191,7 +185,7 @@ a,a,b"""
def test_invalid_delimiter_and_missing_header():
source = """A;C
a;c"""
report = validate_csv_str(source=source, schema=schema1, pre_checks_conf=pre_checks_conf)
report = validate_csv_text(source=source, schema=schema_abc)
assert report['tables'][0]['error-count'] == 2
assert len(report['tables'][0]['errors']) == 2
assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
......
......@@ -47,7 +47,7 @@ def validate(source, schema, checks=None, pre_checks_conf=None, **options):
inspector = goodtables.Inspector(
checks=(['structure', 'schema'] if checks is None else checks) + [{'extra-or-missing-header': {}}],
skip_checks=['non-matching-header', 'missing-header'],
skip_checks=['non-matching-header', 'extra-header', 'missing-header'],
row_limit=VALIDATA_MAX_ROWS)
# Validata design choices
......
......@@ -36,10 +36,8 @@ class ExtraOrMissingHeader(object):
extra_headers = headers_set - field_names_set
if extra_headers:
for cell in cells:
value = cell.get('value')
if value is not None and value in extra_headers:
errors.append(Error(code='extra-header', cell=cell))
errors.append(Error(code='extra-headers',
message_substitutions={"headers": sorted(extra_headers)}))
if field_names_set == headers_set and field_names != headers:
errors.append(Error(code='wrong-headers-order'))
......
......@@ -5,6 +5,21 @@
"name": "Wrong column delimiter",
"type": "source",
"context": "table"
},
"missing-headers": {
"name": "Missing headers",
"type": "structure",
"context": "head"
},
"extra-headers": {
"name": "Extra headers",
"type": "structure",
"context": "head"
},
"wrong-headers-order": {
"name": "Wrong headers order",
"type": "structure",
"context": "head"
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment