Commit cc0d132d authored by Pierre Dittgen's avatar Pierre Dittgen

Use sync_schema=True to ignore header errors

parent 8fb10e1f
Pipeline #2393 passed with stage
in 1 minute and 44 seconds
......@@ -2,6 +2,7 @@ import datetime
from io import BytesIO
from typing import Any, List
import frictionless
import pytest
from openpyxl import Workbook
......@@ -368,154 +369,45 @@ def test_empty_file(schema_abc):
assert report["tables"][0]["errors"][0]["note"] == "the source is empty"
def assert_no_report_errors(report):
assert len(report['tables'][0]['errors']) == 0, report
def test_valid_delimiter(schema_abc):
source = b"""A,B,C
a,b,c"""
report = validate_csv_bytes(source, schema_abc)
assert_no_report_errors(report)
assert report.valid
def test_valid_delimiter_semicolon(schema_abc):
source = b"""A;B;C
a;b;c"""
report = validate_csv_bytes(source, schema_abc)
assert_no_report_errors(report)
def test_invalid_delimiter_percent(schema_abc):
source = b"""A%B%C
a%b%c"""
report = validate_csv_bytes(source, schema_abc)
assert len(report['tables'][0]['errors']) != 0
# def test_missing_header_start(schema_abc):
# source = b"""B,C
# b,c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['A']
# def test_missing_header_middle(schema_abc):
# source = b"""A,C
# a,c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['B']
assert report.valid
# def test_missing_header_end(schema_abc):
# source = b"""A,B
# a,b"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['C']
# def test_missing_and_extra_header_end(schema_abc):
# source = b"""A,B,Z
# a,b,z"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# def test_missing_and_extra_header_middle(schema_abc):
# source = b"""A,Z,B
# a,z,b"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_missing_and_extra_header_multiple(schema_abc):
# source = b"""A,Z
# a,z"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['B', 'C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['Z']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header_start(schema_abc):
# source = b"""X,A,B,C
# x,a,b,c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['cells'] == ['X']
# waiting for https://github.com/frictionlessdata/frictionless-py/issues/551
# def test_extra_header(schema_abc):
# source = b"""A,B,C,D
# a,b,c,d"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['D']
def test_missing_required_column(schema_abc):
source = [["B","C"],["b","c"]]
report = validate(source, schema_abc)
# def test_extra_multiple(schema_abc):
# source = b"""A,B,C,X,Y
# a,b,c,x,y"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['error-stats']['count'] == 2
# assert report['tables'][0]['errors'][0]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][0]['cells'] == ['X', 'Y']
# def test_missing_and_extra_headers_multiple(schema_abc):
# source = b"""A,Z,D
# a,z,d"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['B', 'C']
# assert report['tables'][0]['errors'][1]['code'] == 'extra-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['D', 'Z']
# def test_header_order(schema_abc):
# source = b"""A,C,B
# a,c,b"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'wrong-headers-order'
# def test_invalid_delimiter_and_missing_header(schema_abc):
# source = b"""A;C
# a;c"""
# report = validate_csv_bytes(source, schema_abc)
# assert len(report['tables'][0]['errors']) == 2
# assert report['tables'][0]['errors'][0]['code'] == 'invalid-column-delimiter'
# assert report['tables'][0]['errors'][0]['message-data']['detected'] == ';'
# assert report['tables'][0]['errors'][0]['message-data']['expected'] == ','
# assert report['tables'][0]['errors'][1]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][1]['message-data']['headers'] == ['B']
def test_missing_column(schema_abc):
source = [
['A', 'C'],
['a', 'c'],
]
schema = {
"fields": [
{"name": "A", "constraints": { "required": True}},
{"name": "B"},
{"name": "C"},
]
}
report = validate(source, schema)
assert report.valid
def test_valid_custom_check_siren(schema_siren):
source = [["id","siren"],[1,"529173189"]]
report = validate(source, schema_siren)
assert_no_report_errors(report)
assert report.valid
def test_invalid_custom_check_siren(schema_siren):
......@@ -527,7 +419,7 @@ def test_invalid_custom_check_siren(schema_siren):
def test_valid_custom_check_siret(schema_siret):
source = [["id", "numero_siret"],[1,"83014132100026"]]
report = validate(source, schema_siret)
assert_no_report_errors(report)
assert report.valid
def test_invalid_custom_check_siret(schema_siret):
......@@ -540,19 +432,19 @@ def test_invalid_custom_check_siret(schema_siret):
def test_valid_custom_check_year_interval_1(schema_year_interval):
source = [["projet", "annee"],["Validata", "2018/2020"]]
report = validate(source, schema_year_interval)
assert_no_report_errors(report)
assert report.valid
def test_valid_custom_check_year_interval_2(schema_year_interval_allow_year_only):
source = [["projet", "annee"],["Validata", "2018/2020"]]
report = validate(source, schema_year_interval_allow_year_only)
assert_no_report_errors(report)
assert report.valid
def test_valid_custom_check_year_interval_3(schema_year_interval_allow_year_only):
source = [["projet", "annee"],["Validata", "2020"]]
report = validate(source, schema_year_interval_allow_year_only)
assert_no_report_errors(report)
assert report.valid
def test_invalid_custom_check_year_interval_1(schema_year_interval):
......@@ -579,19 +471,19 @@ def test_invalid_custom_check_year_interval_3(schema_year_interval):
def test_valid_custom_sum_columns_value_1(schema_sum_columns_value_ok):
source = [['charges', 'chauffage', 'salaires', 'fraisdebouche'], [12000, 600, 4000, 7400]]
report = validate(source, schema_sum_columns_value_ok)
assert_no_report_errors(report)
assert report.valid
def test_valid_custom_sum_columns_value_2(schema_sum_columns_value_ok):
source = [["charges","chauffage","salaires","fraisdebouche"],[12000,600,None,7400]]
report = validate(source, schema_sum_columns_value_ok)
assert_no_report_errors(report)
assert report.valid
def test_valid_custom_sum_columns_value_3(schema_sum_columns_value_ok):
source = [["charges","chauffage","salaires","fraisdebouche"],[None,600,4000,7400]]
report = validate(source, schema_sum_columns_value_ok)
assert_no_report_errors(report)
assert report.valid
def test_valid_custom_sum_columns_value_3(schema_sum_columns_value_ok):
......@@ -611,7 +503,7 @@ def test_valid_nomenclature_actes_value(schema_nomenclature_actes_value):
source = [["acte"],["Fonction publique/foobar"]]
report = validate(source, schema_nomenclature_actes_value)
assert_no_report_errors(report)
assert report.valid
def test_invalid_nomenclature_actes_value_1(schema_nomenclature_actes_value):
......@@ -636,14 +528,14 @@ def test_cohesive_columns_values_1(schema_cohesive_columns):
source = [["id","col1","col2"],[1,None,None]]
report = validate(source, schema_cohesive_columns)
assert_no_report_errors(report)
assert report.valid
def test_cohesive_columns_values_2(schema_cohesive_columns):
source = [["id","col1","col2"],[1,"foo","bar"]]
report = validate(source, schema_cohesive_columns)
assert_no_report_errors(report)
assert report.valid
def test_cohesive_columns_values_3(schema_cohesive_columns):
......@@ -667,13 +559,13 @@ def test_cohesive_columns_values_4(schema_cohesive_columns):
def test_compare_columns_value_1(schema_compare_columns):
source = [["depenses","recettes"],[12000,15000]]
report = validate(source, schema_compare_columns)
assert_no_report_errors(report)
assert report.valid
def test_compare_columns_value_2(schema_compare_columns):
source = [["depenses","recettes"],[12000,12000]]
report = validate(source, schema_compare_columns)
assert_no_report_errors(report)
assert report.valid
def test_compare_columns_value_3(schema_compare_columns):
......
......@@ -34,7 +34,8 @@ def validate(source, schema, **options):
# Merge options to pass to frictionless
validate_options = {
'query': frictionless.Query(limit_rows=VALIDATA_MAX_ROWS),
'extra_checks': extra_checks,
'extra_checks': extra_checks, # add custom_checks if needed
'sync_schema': True, # Don't care about missing, extra or unordered columns
**{
k: v for k, v in options.items()
if k in {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment