Commit ce43d84f authored by Pierre Dittgen's avatar Pierre Dittgen

wip check headers

parent 76c32802
Pipeline #2359 failed with stage
in 1 minute and 58 seconds
......@@ -384,14 +384,15 @@ a%b%c"""
assert len(report['tables'][0]['errors']) != 0
# def test_missing_header_start(schema_abc):
# source = """B,C
# b,c"""
# report = validate_csv_text(source=source, schema=schema_abc)
# assert report['tables'][0]['error-count'] == 1
# assert len(report['tables'][0]['errors']) == 1
# assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
# assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['A']
def test_missing_header_start(schema_abc):
source = """B,C
b,c"""
report = validate_csv_text(source=source, schema=schema_abc)
import ipdb; ipdb.set_trace()
assert report['tables'][0]['error-count'] == 1
assert len(report['tables'][0]['errors']) == 1
assert report['tables'][0]['errors'][0]['code'] == 'missing-headers'
assert report['tables'][0]['errors'][0]['message-data']['headers'] == ['A']
# def test_missing_header_middle(schema_abc):
......
......@@ -14,7 +14,7 @@ import tablib
from toolz import get_in, thread_first, update_in
from . import csv_helpers, loaders, messages
from .custom_checks import available_checks
from .custom_checks import available_checks, header_checks
# from .spec import spec
log = logging.getLogger(__name__)
......@@ -349,7 +349,9 @@ def validate(source, schema, with_repair=True, **options):
# checks = ['structure', 'schema']
# Extract custom checks reference from table schema
extra_checks = None
extra_checks = header_checks
# Dynamically add custom check based on schema needs
custom_checks_config = schema_descriptor.get('custom_checks')
if custom_checks_config:
extra_checks = []
......@@ -370,7 +372,7 @@ def validate(source, schema, with_repair=True, **options):
**{
# TODO: Fix `pick_errors` content
#'pick_errors': checks,
'skip_errors': ['non-matching-header', 'extra-header', 'missing-header'],
'skip_errors': ['non-matching-header', 'extra-header', 'missing-header', "missing-cell"],
'query': frictionless.Query(limit_rows=VALIDATA_MAX_ROWS),
'extra_checks': extra_checks,
},
......
from .cohesive_columns_value import CohesiveColumnsValue
from .compare_columns_value import CompareColumnsValue
from .extra_or_missing_header import ExtraOrMissingHeader
from .french_siren_value import FrenchSirenValue
from .french_siret_value import FrenchSiretValue
from .nomenclature_actes_value import NomenclatureActesValue
......@@ -15,4 +16,6 @@ available_checks = {
"nomenclature-actes-value": NomenclatureActesValue,
"sum-columns-value": SumColumnsValue,
"year-interval-value": YearIntervalValue,
}
\ No newline at end of file
}
header_checks = [ExtraOrMissingHeader]
\ No newline at end of file
from goodtables.error import Error
from goodtables.registry import check
"""Reimplementation of NonMatchingHeader check,
taking into account missing header, extra header and wrong header order."""
from frictionless import errors, Check
"""Reimplementation of NonMatchingHeader check, taking into account missing header, extra header and wrong header order."""
class ExtraHeadersError(errors.HeaderError):
"""Custom error."""
code = "extra-headers"
name = "colonne(s) surnuméraire(s)"
tags = ["#head", "#structure"]
template = "{note}"
description = ""
@check('extra-or-missing-header', type='custom', context='head')
class ExtraOrMissingHeader(object):
def __init__(self, **options):
pass
class MissingHeadersError(errors.HeaderError):
"""Custom error."""
code = "missing-headers"
name = "colonne(s) manquante(s)"
tags = ["#head", "#structure"]
template = "{note}"
description = ""
def check_headers(self, cells, sample=None):
errors = []
field_names = [
cell['field'].name
for cell in cells
if 'field' in cell
]
class WrongHeadersOrderError(errors.HeaderError):
"""Custom error."""
code = "wrong-headers-order"
name = "en-têtes non ordonnés"
tags = ["#head", "#structure"]
template = "les colonnes du tableau ne sont pas dans l\'ordre défini par le schéma"
description = ""
class ExtraOrMissingHeader(Check):
"""Custom check."""
possible_Errors = [ExtraHeadersError, MissingHeadersError, WrongHeadersOrderError]
def validate_header(self, header):
field_names = self.table.schema.field_names
field_names_set = set(field_names)
headers = [
cell['value']
for cell in cells
if 'value' in cell
]
headers_set = set(headers)
for cell in cells:
if 'value' not in cell: # cell has been infered, and must not be passed to later checks
cells.remove(cell)
missing_headers = field_names_set - headers_set
header_set = set(header)
default_error_params = {
"cells": [],
"cell": "",
"field_name": "",
"field_number":0,
"field_position":0
}
missing_headers = field_names_set - header_set
if missing_headers:
errors.append(Error(code='missing-headers',
message_substitutions={"headers": sorted(missing_headers)}))
if len(missing_headers) == 1:
note = f"la colonne '{list(missing_headers)[0]}' n'a pas été trouvée dans le fichier"
else:
cols = ", ".join(f"'{col}'" for col in missing_headers)
note = f"les colonnes {cols} n'ont pas été trouvées dans le fichier"
yield MissingHeadersError(note=note, **default_error_params)
return
extra_headers = headers_set - field_names_set
extra_headers = header_set - field_names_set
if extra_headers:
errors.append(Error(code='extra-headers',
message_substitutions={"headers": sorted(extra_headers)}))
if field_names_set == headers_set and field_names != headers:
errors.append(Error(code='wrong-headers-order'))
if len(extra_headers) == 1:
note = f"la colonne '{list(extra_headers)[0]}' n'est pas déclarée dans le schéma"
else:
cols = ", ".join(f"'{col}'" for col in extra_headers)
note = f"les colonnes {cols} ne sont pas déclarées dans le schéma"
yield ExtraHeadersError(note=note, **default_error_params)
return
return errors
if field_names_set == header_set and field_names != header:
yield WrongHeadersOrderError(note="", **default_error_params)
return
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment