Commit 7ab0d58c authored by Pierre Dittgen's avatar Pierre Dittgen
Browse files

Use bash script options to simplify error handling

parent e6b4d6ca
......@@ -8,6 +8,7 @@
#
# Prepare data:
# - cog : cog_depts.tsv
set -euo pipefail
# Common initializations
source lib/common.sh
......@@ -43,13 +44,13 @@ download_and_filter_datagouv_data() {
# Extract id_datagouv column content from OpenDataFrance organisations file
DG_ORG_ID_FILE=$DG_TEMP_DIR/org_ids.txt
$CSV_CUT -c "id-datagouv" $ODF_ORGA_FILE | $SED -e '1d' | $GREP -v '""' > $DG_ORG_ID_FILE || exit 1
$CSV_CUT -c "id-datagouv" $ODF_ORGA_FILE | $SED -e '1d' | $GREP -v '""' > $DG_ORG_ID_FILE
# Filter datagouv organizations, datasets and resources on OpenDataFrance organizations
mkdir -p $DG_DUMP_DIR
$PYTHON $LIB_DIR/download/filter_dg_csv.py $DG_ORG_ID_FILE id $DG_TEMP_DIR/organizations.csv $DG_DUMP_DIR/organizations.csv || exit 1
$PYTHON $LIB_DIR/download/filter_dg_csv.py $DG_ORG_ID_FILE organization_id $DG_TEMP_DIR/datasets.csv $DG_DUMP_DIR/datasets.csv || exit 1
$PYTHON $LIB_DIR/download/filter_dg_csv.py $DG_ORG_ID_FILE dataset.organization_id $DG_TEMP_DIR/resources.csv $DG_DUMP_DIR/resources.csv || exit 1
$PYTHON $LIB_DIR/download/filter_dg_csv.py $DG_ORG_ID_FILE id $DG_TEMP_DIR/organizations.csv $DG_DUMP_DIR/organizations.csv
$PYTHON $LIB_DIR/download/filter_dg_csv.py $DG_ORG_ID_FILE organization_id $DG_TEMP_DIR/datasets.csv $DG_DUMP_DIR/datasets.csv
$PYTHON $LIB_DIR/download/filter_dg_csv.py $DG_ORG_ID_FILE dataset.organization_id $DG_TEMP_DIR/resources.csv $DG_DUMP_DIR/resources.csv
# Clean
rm -fR $DG_TEMP_DIR
......@@ -62,8 +63,8 @@ download_and_merge_opendatasoft_data() {
echo "Downloading OpenDataSoft catalog and monitoring..."
ODS_INFO_FILE=$ODS_TEMP_DIR/ods_info.csv
mkdir -p $ODS_TEMP_DIR
$CSV_CUT -c "siren,nom,url-ptf,id-ods" $ODF_ORGA_FILE > $ODS_INFO_FILE || exit 1
$PYTHON $LIB_DIR/download/ods_download.py $ODS_INFO_FILE $ODS_TEMP_DIR || exit 1
$CSV_CUT -c "siren,nom,url-ptf,id-ods" $ODF_ORGA_FILE > $ODS_INFO_FILE
$PYTHON $LIB_DIR/download/ods_download.py $ODS_INFO_FILE $ODS_TEMP_DIR
# Merge
mkdir -p $ODS_DUMP_DIR
......@@ -80,7 +81,7 @@ download_siren_info() {
ODF_SIREN_FILE=$SIREN_TEMP_DIR/odf_siren.txt
echo "Downloading SIREN info..."
$CSV_CUT -c "siren" $ODF_ORGA_FILE | $SED -e '1d' | sort -u > $ODF_SIREN_FILE || exit 1
$CSV_CUT -c "siren" $ODF_ORGA_FILE | $SED -e '1d' | sort -u > $ODF_SIREN_FILE
$PYTHON $LIB_DIR/download/siren_info.py $ODF_SIREN_FILE --o $CACHE_DIR/siren_info.csv
# Clean
......@@ -93,8 +94,8 @@ prepare_cog_info() {
COG_REG_TXT_FILE=$RSC_DIR/INSEE-COG/region2019.csv
echo "Preparing COG info..."
(echo "depcode,depnom" && $CSV_CUT -c "dep,libelle" $COG_DEPT_TXT_FILE | $SED "1d") > $CACHE_DIR/cog_departement.csv || exit 1
(echo "regcode,regnom" && $CSV_CUT -c "reg,libelle" $COG_REG_TXT_FILE | $SED "1d") > $CACHE_DIR/cog_region.csv || exit 1
(echo "depcode,depnom" && $CSV_CUT -c "dep,libelle" $COG_DEPT_TXT_FILE | $SED "1d") > $CACHE_DIR/cog_departement.csv
(echo "regcode,regnom" && $CSV_CUT -c "reg,libelle" $COG_REG_TXT_FILE | $SED "1d") > $CACHE_DIR/cog_region.csv
}
# SQLiteDB
......@@ -105,27 +106,27 @@ prepare_db_info() {
fi
# Imports departements CSV into DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name=depts $CACHE_DIR/cog_departement.csv $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name=depts $CACHE_DIR/cog_departement.csv $SQLITE_DB
# Imports GoogleSheet CSV into DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_orga --integer_cols=5,7 $ODF_ORGA_FILE $SQLITE_DB || exit 1
$SQLITE3 $SQLITE_DB < $LIB_DIR/db/add_reg_nom.sql || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_orga --integer_cols=5,7 $ODF_ORGA_FILE $SQLITE_DB
$SQLITE3 $SQLITE_DB < $LIB_DIR/db/add_reg_nom.sql
# Imports Data gouv CSVs into DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_gouv_organizations --csv_delimiter=semicolon --integer_cols=9,10,11,12,13 $DG_DUMP_DIR/organizations.csv $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_gouv_datasets --csv_delimiter=semicolon --integer_cols=18,19,20,22 $DG_DUMP_DIR/datasets.csv $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_gouv_resources --csv_delimiter=semicolon --integer_cols=20 $DG_DUMP_DIR/resources.csv $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_gouv_organizations --csv_delimiter=semicolon --integer_cols=9,10,11,12,13 $DG_DUMP_DIR/organizations.csv $SQLITE_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_gouv_datasets --csv_delimiter=semicolon --integer_cols=18,19,20,22 $DG_DUMP_DIR/datasets.csv $SQLITE_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --table_name data_gouv_resources --csv_delimiter=semicolon --integer_cols=20 $DG_DUMP_DIR/resources.csv $SQLITE_DB
# And add _siren et _nom columns
$SQLITE3 $SQLITE_DB < $LIB_DIR/db/add_siren_nom_to_datagouv_tables.sql || exit 1
$SQLITE3 $SQLITE_DB < $LIB_DIR/db/add_siren_nom_to_datagouv_tables.sql
# Imports OpenDataSoft CSV into DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py $ODS_DUMP_DIR/ods_catalog.csv $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py $ODS_DUMP_DIR/ods_monitoring.csv $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py $ODS_DUMP_DIR/ods_catalog.csv $SQLITE_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py $ODS_DUMP_DIR/ods_monitoring.csv $SQLITE_DB
# En a-t-on besoin ?
# Creates and fill ref_org table from data_orga
$SQLITE3 $SQLITE_DB < $LIB_DIR/db/create_ref_org_table.sql || exit 1
$SQLITE3 $SQLITE_DB < $LIB_DIR/db/create_ref_org_table.sql
}
prepare_georef_db() {
......@@ -140,19 +141,19 @@ prepare_georef_db() {
mkdir -p $GEO_TEMP_DIR
# Converts SHP to tsv
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/REGION.shp $GEO_TEMP_DIR/ae_metropole_region.tsv || exit 1
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/DEPARTEMENT.shp $GEO_TEMP_DIR/ae_metropole_departement.tsv || exit 1
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/COMMUNE.shp $GEO_TEMP_DIR/ae_metropole_commune.tsv || exit 1
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/EPCI.shp $GEO_TEMP_DIR/ae_metropole_epci.tsv || exit 1
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/IAU-IDF/Intercommunalites_de_la_Region_IledeFrance_au_1er_janvier_2018.shp $GEO_TEMP_DIR/iau_ept.tsv || exit 1
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/REGION.shp $GEO_TEMP_DIR/ae_metropole_region.tsv
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/DEPARTEMENT.shp $GEO_TEMP_DIR/ae_metropole_departement.tsv
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/COMMUNE.shp $GEO_TEMP_DIR/ae_metropole_commune.tsv
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/ADE_1-1_SHP_LAMB93_FR/EPCI.shp $GEO_TEMP_DIR/ae_metropole_epci.tsv
$PYTHON $LIB_DIR/db/shp2csv.py --csv_delimiter tab $RSC_DIR/IAU-IDF/Intercommunalites_de_la_Region_IledeFrance_au_1er_janvier_2018.shp $GEO_TEMP_DIR/iau_ept.tsv
# Imports Geo CSVs into GeoRefDB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_region.tsv $SQLITE_GEOREF_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_departement.tsv $SQLITE_GEOREF_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_commune.tsv $SQLITE_GEOREF_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_epci.tsv $SQLITE_GEOREF_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/iau_ept.tsv $SQLITE_GEOREF_DB || exit 1
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_region.tsv $SQLITE_GEOREF_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_departement.tsv $SQLITE_GEOREF_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_commune.tsv $SQLITE_GEOREF_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/ae_metropole_epci.tsv $SQLITE_GEOREF_DB
$PYTHON $LIB_DIR/db/csv_sqlite_import.py --csv_delimiter tab $GEO_TEMP_DIR/iau_ept.tsv $SQLITE_GEOREF_DB
# Clean
rm -fR $GEO_TEMP_DIR
......
......@@ -2,6 +2,7 @@
#
# Common initialization
#
set -euo pipefail
if [ "$0" == "common.sh" ]; then
echo "This script is not intended to be called directly"
......
......@@ -7,6 +7,7 @@
# - markdown file
# - sqlite db
# - csv files
set -euo pipefail
# Common initializations
source lib/common.sh
......@@ -51,7 +52,7 @@ generate_datasets_datagouv_csv() {
# compute matching table
ORG_ID_SIREN_CSV_FILE=$DG_ODF_TEMP_DIR/org_id_siren.csv
$CSV_CUT -c "id-datagouv,siren" $ODF_ORGA_FILE | $GREP -v '^,' > $ORG_ID_SIREN_CSV_FILE || exit 1
$CSV_CUT -c "id-datagouv,siren" $ODF_ORGA_FILE | $GREP -v '^,' > $ORG_ID_SIREN_CSV_FILE
# Normalize dataset csv
NORM_DS_CSV_FILE=$DG_ODF_TEMP_DIR/datasets.csv
......@@ -68,20 +69,20 @@ generate_datasets_datagouv_csv() {
generate_legacy_data_from_dbs() {
# Computes metrics
$PYTHON $LIB_DIR/legacy/fill_orga_metrics.py $SQLITE_DB || exit 1
$PYTHON $LIB_DIR/legacy/fill_orga_metrics.py $SQLITE_DB
# Generates markdown file
MD_DIR=$BUILD_DIR/markdown
mkdir -p $MD_DIR
$PYTHON $LIB_DIR/legacy/orga_metrics2md.py $SQLITE_DB $MD_DIR/organisations.md || exit 1
$PYTHON $LIB_DIR/legacy/orga_metrics2md.py $SQLITE_DB $MD_DIR/organisations.md
# Populates orga_metrics with geometry information
$PYTHON $LIB_DIR/legacy/populate_geo_data.py $SQLITE_DB $SQLITE_GEOREF_DB || exit 1
$PYTHON $LIB_DIR/legacy/populate_geo_data.py $SQLITE_DB $SQLITE_GEOREF_DB
# Generates GeoJSON
GEOJSON_DIR=$BUILD_DIR/geojson
mkdir -p $GEOJSON_DIR
$PYTHON $LIB_DIR/legacy/generate_geo_jsons.py $SQLITE_DB $GEOJSON_DIR || exit 1
$PYTHON $LIB_DIR/legacy/generate_geo_jsons.py $SQLITE_DB $GEOJSON_DIR
# Prepare sqlite_db for superset
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment