Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Validata
Validata UI
Commits
2b22e02f
Commit
2b22e02f
authored
Sep 03, 2019
by
Pierre Dittgen
Browse files
hilite duplicate header column
parent
2e5d5616
Changes
1
Hide whitespace changes
Inline
Side-by-side
validata_ui/views.py
View file @
2b22e02f
...
...
@@ -7,6 +7,7 @@ import json
import
logging
import
subprocess
import
tempfile
from
collections
import
defaultdict
from
datetime
import
datetime
from
operator
import
itemgetter
from
pathlib
import
Path
...
...
@@ -139,6 +140,18 @@ def extract_source_data(source: ValidataResource, schema_descriptor, preview_row
"""Transform value into string"""
return
''
if
val
is
None
else
str
(
val
)
def
compute_duplicate_header_column_indices
(
source_header
,
duplicate_header_names
):
column_name_to_indices
=
defaultdict
(
list
)
for
i
,
h
in
enumerate
(
source_header
):
if
h
in
duplicate_header_names
:
column_name_to_indices
[
h
].
append
(
i
)
col_indices
=
set
()
for
v
in
column_name_to_indices
.
values
():
col_indices
.
update
(
v
[
1
:])
return
col_indices
header
=
None
rows
=
[]
nb_rows
=
0
...
...
@@ -166,11 +179,21 @@ def extract_source_data(source: ValidataResource, schema_descriptor, preview_row
preview_rows_nb
=
min
(
preview_rows_nb
,
nb_rows
)
# Computes original_headers display
# wrong headers order: display all headers as in error
if
any
([
err
.
code
==
'wrong-headers-order'
for
err
in
repair_report
]):
source_header_info
=
[(
h
,
True
)
for
h
in
source_header
]
# else display header error for:
# - blank-header
# - unknown-header
# - duplicate-header
else
:
schema_field_names
=
[
f
[
'name'
]
for
f
in
schema_descriptor
.
get
(
'fields'
)
or
[]]
source_header_info
=
[(
h
,
not
h
or
h
not
in
schema_field_names
)
for
h
in
source_header
]
#import ipdb; ipdb.set_trace()
duplicate_header_names
=
[
err
.
_message_substitutions
[
'column-name'
]
for
err
in
repair_report
if
err
.
code
==
'duplicate-header'
]
duplicate_col_indices
=
compute_duplicate_header_column_indices
(
source_header
,
duplicate_header_names
)
source_header_info
=
[(
h
,
not
h
or
h
not
in
schema_field_names
or
i
in
duplicate_col_indices
)
for
i
,
h
in
enumerate
(
source_header
)]
return
{
'source_header_info'
:
source_header_info
,
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment