Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Observatoire
observatoire-scripts
Commits
7ab0d58c
Commit
7ab0d58c
authored
Dec 01, 2021
by
Pierre Dittgen
Browse files
Use bash script options to simplify error handling
parent
e6b4d6ca
Changes
3
Hide whitespace changes
Inline
Side-by-side
download_and_prepare_data
View file @
7ab0d58c
...
...
@@ -8,6 +8,7 @@
#
# Prepare data:
# - cog : cog_depts.tsv
set
-euo
pipefail
# Common initializations
source
lib/common.sh
...
...
@@ -43,13 +44,13 @@ download_and_filter_datagouv_data() {
# Extract id_datagouv column content from OpenDataFrance organisations file
DG_ORG_ID_FILE
=
$DG_TEMP_DIR
/org_ids.txt
$CSV_CUT
-c
"id-datagouv"
$ODF_ORGA_FILE
|
$SED
-e
'1d'
|
$GREP
-v
'""'
>
$DG_ORG_ID_FILE
||
exit
1
$CSV_CUT
-c
"id-datagouv"
$ODF_ORGA_FILE
|
$SED
-e
'1d'
|
$GREP
-v
'""'
>
$DG_ORG_ID_FILE
# Filter datagouv organizations, datasets and resources on OpenDataFrance organizations
mkdir
-p
$DG_DUMP_DIR
$PYTHON
$LIB_DIR
/download/filter_dg_csv.py
$DG_ORG_ID_FILE
id
$DG_TEMP_DIR
/organizations.csv
$DG_DUMP_DIR
/organizations.csv
||
exit
1
$PYTHON
$LIB_DIR
/download/filter_dg_csv.py
$DG_ORG_ID_FILE
organization_id
$DG_TEMP_DIR
/datasets.csv
$DG_DUMP_DIR
/datasets.csv
||
exit
1
$PYTHON
$LIB_DIR
/download/filter_dg_csv.py
$DG_ORG_ID_FILE
dataset.organization_id
$DG_TEMP_DIR
/resources.csv
$DG_DUMP_DIR
/resources.csv
||
exit
1
$PYTHON
$LIB_DIR
/download/filter_dg_csv.py
$DG_ORG_ID_FILE
id
$DG_TEMP_DIR
/organizations.csv
$DG_DUMP_DIR
/organizations.csv
$PYTHON
$LIB_DIR
/download/filter_dg_csv.py
$DG_ORG_ID_FILE
organization_id
$DG_TEMP_DIR
/datasets.csv
$DG_DUMP_DIR
/datasets.csv
$PYTHON
$LIB_DIR
/download/filter_dg_csv.py
$DG_ORG_ID_FILE
dataset.organization_id
$DG_TEMP_DIR
/resources.csv
$DG_DUMP_DIR
/resources.csv
# Clean
rm
-fR
$DG_TEMP_DIR
...
...
@@ -62,8 +63,8 @@ download_and_merge_opendatasoft_data() {
echo
"Downloading OpenDataSoft catalog and monitoring..."
ODS_INFO_FILE
=
$ODS_TEMP_DIR
/ods_info.csv
mkdir
-p
$ODS_TEMP_DIR
$CSV_CUT
-c
"siren,nom,url-ptf,id-ods"
$ODF_ORGA_FILE
>
$ODS_INFO_FILE
||
exit
1
$PYTHON
$LIB_DIR
/download/ods_download.py
$ODS_INFO_FILE
$ODS_TEMP_DIR
||
exit
1
$CSV_CUT
-c
"siren,nom,url-ptf,id-ods"
$ODF_ORGA_FILE
>
$ODS_INFO_FILE
$PYTHON
$LIB_DIR
/download/ods_download.py
$ODS_INFO_FILE
$ODS_TEMP_DIR
# Merge
mkdir
-p
$ODS_DUMP_DIR
...
...
@@ -80,7 +81,7 @@ download_siren_info() {
ODF_SIREN_FILE
=
$SIREN_TEMP_DIR
/odf_siren.txt
echo
"Downloading SIREN info..."
$CSV_CUT
-c
"siren"
$ODF_ORGA_FILE
|
$SED
-e
'1d'
|
sort
-u
>
$ODF_SIREN_FILE
||
exit
1
$CSV_CUT
-c
"siren"
$ODF_ORGA_FILE
|
$SED
-e
'1d'
|
sort
-u
>
$ODF_SIREN_FILE
$PYTHON
$LIB_DIR
/download/siren_info.py
$ODF_SIREN_FILE
--o
$CACHE_DIR
/siren_info.csv
# Clean
...
...
@@ -93,8 +94,8 @@ prepare_cog_info() {
COG_REG_TXT_FILE
=
$RSC_DIR
/INSEE-COG/region2019.csv
echo
"Preparing COG info..."
(
echo
"depcode,depnom"
&&
$CSV_CUT
-c
"dep,libelle"
$COG_DEPT_TXT_FILE
|
$SED
"1d"
)
>
$CACHE_DIR
/cog_departement.csv
||
exit
1
(
echo
"regcode,regnom"
&&
$CSV_CUT
-c
"reg,libelle"
$COG_REG_TXT_FILE
|
$SED
"1d"
)
>
$CACHE_DIR
/cog_region.csv
||
exit
1
(
echo
"depcode,depnom"
&&
$CSV_CUT
-c
"dep,libelle"
$COG_DEPT_TXT_FILE
|
$SED
"1d"
)
>
$CACHE_DIR
/cog_departement.csv
(
echo
"regcode,regnom"
&&
$CSV_CUT
-c
"reg,libelle"
$COG_REG_TXT_FILE
|
$SED
"1d"
)
>
$CACHE_DIR
/cog_region.csv
}
# SQLiteDB
...
...
@@ -105,27 +106,27 @@ prepare_db_info() {
fi
# Imports departements CSV into DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
=
depts
$CACHE_DIR
/cog_departement.csv
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
=
depts
$CACHE_DIR
/cog_departement.csv
$SQLITE_DB
# Imports GoogleSheet CSV into DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_orga
--integer_cols
=
5,7
$ODF_ORGA_FILE
$SQLITE_DB
||
exit
1
$SQLITE3
$SQLITE_DB
<
$LIB_DIR
/db/add_reg_nom.sql
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_orga
--integer_cols
=
5,7
$ODF_ORGA_FILE
$SQLITE_DB
$SQLITE3
$SQLITE_DB
<
$LIB_DIR
/db/add_reg_nom.sql
# Imports Data gouv CSVs into DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_gouv_organizations
--csv_delimiter
=
semicolon
--integer_cols
=
9,10,11,12,13
$DG_DUMP_DIR
/organizations.csv
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_gouv_datasets
--csv_delimiter
=
semicolon
--integer_cols
=
18,19,20,22
$DG_DUMP_DIR
/datasets.csv
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_gouv_resources
--csv_delimiter
=
semicolon
--integer_cols
=
20
$DG_DUMP_DIR
/resources.csv
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_gouv_organizations
--csv_delimiter
=
semicolon
--integer_cols
=
9,10,11,12,13
$DG_DUMP_DIR
/organizations.csv
$SQLITE_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_gouv_datasets
--csv_delimiter
=
semicolon
--integer_cols
=
18,19,20,22
$DG_DUMP_DIR
/datasets.csv
$SQLITE_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--table_name
data_gouv_resources
--csv_delimiter
=
semicolon
--integer_cols
=
20
$DG_DUMP_DIR
/resources.csv
$SQLITE_DB
# And add _siren et _nom columns
$SQLITE3
$SQLITE_DB
<
$LIB_DIR
/db/add_siren_nom_to_datagouv_tables.sql
||
exit
1
$SQLITE3
$SQLITE_DB
<
$LIB_DIR
/db/add_siren_nom_to_datagouv_tables.sql
# Imports OpenDataSoft CSV into DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
$ODS_DUMP_DIR
/ods_catalog.csv
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
$ODS_DUMP_DIR
/ods_monitoring.csv
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
$ODS_DUMP_DIR
/ods_catalog.csv
$SQLITE_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
$ODS_DUMP_DIR
/ods_monitoring.csv
$SQLITE_DB
# En a-t-on besoin ?
# Creates and fill ref_org table from data_orga
$SQLITE3
$SQLITE_DB
<
$LIB_DIR
/db/create_ref_org_table.sql
||
exit
1
$SQLITE3
$SQLITE_DB
<
$LIB_DIR
/db/create_ref_org_table.sql
}
prepare_georef_db
()
{
...
...
@@ -140,19 +141,19 @@ prepare_georef_db() {
mkdir
-p
$GEO_TEMP_DIR
# Converts SHP to tsv
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/REGION.shp
$GEO_TEMP_DIR
/ae_metropole_region.tsv
||
exit
1
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/DEPARTEMENT.shp
$GEO_TEMP_DIR
/ae_metropole_departement.tsv
||
exit
1
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/COMMUNE.shp
$GEO_TEMP_DIR
/ae_metropole_commune.tsv
||
exit
1
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/EPCI.shp
$GEO_TEMP_DIR
/ae_metropole_epci.tsv
||
exit
1
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/IAU-IDF/Intercommunalites_de_la_Region_IledeFrance_au_1er_janvier_2018.shp
$GEO_TEMP_DIR
/iau_ept.tsv
||
exit
1
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/REGION.shp
$GEO_TEMP_DIR
/ae_metropole_region.tsv
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/DEPARTEMENT.shp
$GEO_TEMP_DIR
/ae_metropole_departement.tsv
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/COMMUNE.shp
$GEO_TEMP_DIR
/ae_metropole_commune.tsv
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/ADE_1-1_SHP_LAMB93_FR/EPCI.shp
$GEO_TEMP_DIR
/ae_metropole_epci.tsv
$PYTHON
$LIB_DIR
/db/shp2csv.py
--csv_delimiter
tab
$RSC_DIR
/IAU-IDF/Intercommunalites_de_la_Region_IledeFrance_au_1er_janvier_2018.shp
$GEO_TEMP_DIR
/iau_ept.tsv
# Imports Geo CSVs into GeoRefDB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_region.tsv
$SQLITE_GEOREF_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_departement.tsv
$SQLITE_GEOREF_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_commune.tsv
$SQLITE_GEOREF_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_epci.tsv
$SQLITE_GEOREF_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/iau_ept.tsv
$SQLITE_GEOREF_DB
||
exit
1
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_region.tsv
$SQLITE_GEOREF_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_departement.tsv
$SQLITE_GEOREF_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_commune.tsv
$SQLITE_GEOREF_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/ae_metropole_epci.tsv
$SQLITE_GEOREF_DB
$PYTHON
$LIB_DIR
/db/csv_sqlite_import.py
--csv_delimiter
tab
$GEO_TEMP_DIR
/iau_ept.tsv
$SQLITE_GEOREF_DB
# Clean
rm
-fR
$GEO_TEMP_DIR
...
...
lib/common.sh
View file @
7ab0d58c
...
...
@@ -2,6 +2,7 @@
#
# Common initialization
#
set
-euo
pipefail
if
[
"
$0
"
==
"common.sh"
]
;
then
echo
"This script is not intended to be called directly"
...
...
process_and_generate
View file @
7ab0d58c
...
...
@@ -7,6 +7,7 @@
# - markdown file
# - sqlite db
# - csv files
set
-euo
pipefail
# Common initializations
source
lib/common.sh
...
...
@@ -51,7 +52,7 @@ generate_datasets_datagouv_csv() {
# compute matching table
ORG_ID_SIREN_CSV_FILE
=
$DG_ODF_TEMP_DIR
/org_id_siren.csv
$CSV_CUT
-c
"id-datagouv,siren"
$ODF_ORGA_FILE
|
$GREP
-v
'^,'
>
$ORG_ID_SIREN_CSV_FILE
||
exit
1
$CSV_CUT
-c
"id-datagouv,siren"
$ODF_ORGA_FILE
|
$GREP
-v
'^,'
>
$ORG_ID_SIREN_CSV_FILE
# Normalize dataset csv
NORM_DS_CSV_FILE
=
$DG_ODF_TEMP_DIR
/datasets.csv
...
...
@@ -68,20 +69,20 @@ generate_datasets_datagouv_csv() {
generate_legacy_data_from_dbs
()
{
# Computes metrics
$PYTHON
$LIB_DIR
/legacy/fill_orga_metrics.py
$SQLITE_DB
||
exit
1
$PYTHON
$LIB_DIR
/legacy/fill_orga_metrics.py
$SQLITE_DB
# Generates markdown file
MD_DIR
=
$BUILD_DIR
/markdown
mkdir
-p
$MD_DIR
$PYTHON
$LIB_DIR
/legacy/orga_metrics2md.py
$SQLITE_DB
$MD_DIR
/organisations.md
||
exit
1
$PYTHON
$LIB_DIR
/legacy/orga_metrics2md.py
$SQLITE_DB
$MD_DIR
/organisations.md
# Populates orga_metrics with geometry information
$PYTHON
$LIB_DIR
/legacy/populate_geo_data.py
$SQLITE_DB
$SQLITE_GEOREF_DB
||
exit
1
$PYTHON
$LIB_DIR
/legacy/populate_geo_data.py
$SQLITE_DB
$SQLITE_GEOREF_DB
# Generates GeoJSON
GEOJSON_DIR
=
$BUILD_DIR
/geojson
mkdir
-p
$GEOJSON_DIR
$PYTHON
$LIB_DIR
/legacy/generate_geo_jsons.py
$SQLITE_DB
$GEOJSON_DIR
||
exit
1
$PYTHON
$LIB_DIR
/legacy/generate_geo_jsons.py
$SQLITE_DB
$GEOJSON_DIR
# Prepare sqlite_db for superset
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment