Commit 88a04a32 authored by Pierre Dittgen's avatar Pierre Dittgen

Ajout de table_schema_to_md.py

parent 76ede3e2
#!/usr/bin/env python3
#
# Table schema to markdown
#
# Pierre Dittgen, Jailbreak
"""
Generates markdown page from JSON table schema file
Table schema specs are defined by frictionlessdata.io:
https://frictionlessdata.io/specs/table-schema/
"""
import argparse
import io
import json
import os
import sys
SCHEMA_PROP_MAP = {
'author': 'Auteur(s)',
'contributor': 'Contributeur(s)',
'version': 'Version',
'created': 'Schéma créé le',
'homepage': 'Page d\'accueil',
'uri': 'Site web',
'example': 'Données d\'exemple',
}
OTHER_PROP_MAP = {
'missingValues': 'Valeurs manquantes',
'primaryKey': 'Clef primaire',
'foreignKeys': 'Clefs étrangères',
}
TYPE_MAP = {
'array': 'liste',
'boolean': 'booléen',
'date': 'date',
'datetime': 'date et heure',
'duration': 'durée',
'geojson': '',
'geopoint': 'point géographique',
'integer': 'entier',
'number': 'nombre',
'object': 'objet',
'string': 'chaîne de caractères',
'time': 'heure',
'year': 'année',
'year-month': 'année et mois',
}
FORMAT_MAP = {
'email': 'adresse de courriel',
'uri': 'adresse URL',
'binary': 'données binaires encodées en base64',
'uuid': 'identifiant UUID',
}
TYPE_SPECIFIC_MAP = {
'decimalChar': 'Séparateur décimal («.» par défaut)',
'groupChar': 'Séparateur de groupes de chiffres («,» par défaut)',
# 'bareNumber' : 'Nombre nu', => Needs a specific treatment
'trueValues': 'Valeurs considérées comme vraies',
'falseValues': 'Valeurs considérées comme fausses',
}
CONSTRAINTS_MAP = {
'minLength': 'Taille minimale attendue',
'maxLength': 'Taille maximale attendue',
'minimum': 'Valeur minimale autorisée',
'maximum': 'Valeur maximale autorisée',
'pattern': 'La valeur doit respecter le motif suivant',
'enum': 'Valeurs autorisées',
}
def convert_file(table_schema_file, out_fd):
""" Converts table schema file to markdown """
with open(table_schema_file, mode="r", encoding="utf-8") as ts_fd:
json_data = json.loads(ts_fd.read())
convert_content(json_data, out_fd)
def format_description(description):
""" Convert a bunch of text into nice markdown respecting newlines """
lines = map(lambda line: '> {} \n'.format(line), description.split('\n'))
return '{}\n'.format(''.join(lines))
def format_format(format_val):
""" Return markdown format information """
return "- `{}` {}\n".format(format_val, FORMAT_MAP[format_val])
def format_type_specific_info(col_content):
""" Formats and return info relative to type """
buff = io.StringIO()
for prop in TYPE_SPECIFIC_MAP:
if prop in col_content:
buff.write('- {} : {}\n'.format(TYPE_SPECIFIC_MAP[prop], col_content[prop]))
if 'bareNumber' in col_content and col_content['bareNumber'] == 'false':
buff.write('- Le nombre peut contenir des caractères supplémentaires (« € », « % » ...)\n')
ret = buff.getvalue()
buff.close()
return ret
def format_constraints(col_content):
""" Converts type and constraints information into markdown """
buffer = io.StringIO()
if 'constraints' in col_content:
constraints = col_content['constraints']
# required or not
req_val = 'obligatoire' if constraints.get('required', 'false') == 'true' else 'optionnelle'
buffer.write('- Cette colonne est {}\n'.format(req_val))
# Type
col_type = col_content.get('type', '')
if col_type != '':
type_val = TYPE_MAP.get(col_type, '??{}??'.format(col_type))
buffer.write('- Type attendu : `{}`\n'.format(type_val))
# Type specific properties
buffer.write(format_type_specific_info(col_content))
# Format
if 'format' in col_content:
buffer.write(format_format(col_content['format']))
# RDFType
if 'rdfType' in col_content:
buffer.write('- Type RDF associé : {}\n'.format(col_content['rdfType']))
else:
buffer.write('- Type non spécifié\n')
if 'constraints' in col_content:
# unique
if 'unique' in constraints and constraints['unique'] == 'true':
buffer.write('- Les valeurs de cette colonne doivent être uniques\n')
# minLength, maxLength, minimum, maximum, pattern, enum
for prop in CONSTRAINTS_MAP:
if prop in constraints:
buffer.write('- {} : {}\n'.format(CONSTRAINTS_MAP[prop], constraints[prop]))
ret = buffer.getvalue()
buffer.close()
return ret
def convert_other_table_properties(json_content):
""" Formats and returns other properties (missingValues, Primary key, Foreign keys) """
buff = io.StringIO()
for prop in OTHER_PROP_MAP:
if prop in json_content:
buff.write('- {} : {}\n'.format(OTHER_PROP_MAP[prop], json_content[prop]))
ret = buff.getvalue()
buff.close()
return ret
def convert_content(json_content, out_fd):
""" Converts table schema data to markdown """
# Header
out_fd.write('# Schéma « {} »\n'.format(json_content['title']))
if 'description' in json_content:
out_fd.write(format_description(json_content['description']))
out_fd.write('\n## Propriétés\n')
for prop in ('author', 'contributor', 'version', 'created', 'homepage', 'uri', 'example'):
if prop in json_content:
out_fd.write('- {} : {}\n'.format(SCHEMA_PROP_MAP[prop], json_content[prop]))
other_table_prop_str = convert_other_table_properties(json_content)
if other_table_prop_str != '':
out_fd.write('\n## Propriétés complémentaires\n')
out_fd.write(other_table_prop_str)
# Children section
out_fd.write('\n## Colonnes\n')
for child in json_content['fields']:
convert_column(child, out_fd)
def convert_column(json_content, out_fd):
""" Convert json content describing a column to markdown """
out_fd.write('\n### `{}`\n'.format(json_content.get('name', 'NOM ABSENT')))
if 'title' in json_content:
out_fd.write('_{}_\n\n'.format(json_content['title']))
if 'description' in json_content:
out_fd.write(format_description(json_content['description']))
out_fd.write(format_constraints(json_content))
def main():
""" Converts a table schema file into markdown """
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('table_schema_file', help='Table schema source file')
parser.add_argument('-o', '--output', help='Output file name', default='stdout')
args = parser.parse_args()
assert os.path.exists(args.table_schema_file)
out_fd = sys.stdout if args.output == 'stdout' else open(args.output, mode='wt', encoding='UTF-8')
convert_file(args.table_schema_file, out_fd)
if __name__ == '__main__':
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment