Commit 1ef02ce2 authored by Pierre Dittgen's avatar Pierre Dittgen
Browse files

simplify code using geopandas

parent b24dcb53
Pipeline #1796 passed with stages
in 18 minutes and 26 seconds
......@@ -5,125 +5,107 @@ import csv
from pathlib import Path
from timeit import default_timer as timer
import pyproj
import geopandas as gpd
import ujson as json
from fiona import collection
from shapely.geometry import mapping
"""
Extracts content from an ESRI Shapefile to a CSV file converting coordinates to WGS84
"""
DELIMITER_NAME_TO_CHAR = collections.OrderedDict([
('comma', ','),
('semicolon', ';'),
('tab', '\t'),
])
DELIMITER_NAME_TO_CHAR = collections.OrderedDict(
[("comma", ","), ("semicolon", ";"), ("tab", "\t"),]
)
WGS84_EPSG = 'epsg:4326'
WGS84_EPSG = "EPSG:4326"
class ProjConverter:
""" Converts geometry from given epsg code to 4326 (WGS84) """
def __init__(self, from_proj_dict, coords_decimal_number):
""" Inits class instance with origin projection """
# self.proj1 = pyproj.Proj(**from_proj_dict)
self.proj1 = pyproj.Proj(**from_proj_dict, preserve_units=False)
# self.proj2 = pyproj.Proj(init=self.WGS84_EPSG)
self.proj2 = pyproj.Proj(WGS84_EPSG, preserve_units=False)
self.fmt = '{0:.%df}' % coords_decimal_number
def convert_coords(self, coords):
""" Convert coords """
coords = pyproj.transform(self.proj1, self.proj2, coords[0], coords[1])
coords = (float(self.fmt.format(coords[0])), float(self.fmt.format(coords[1])))
return coords
def convert_coords_list(self, coord_list):
return [pt for pt in pyproj.itransform(self.proj1, self.proj2, coord_list, always_xy=True)]
def convert(shp_file: Path, csv_file: Path, csv_delimiter, nb_decimals):
""" Generates CSV file from Shapefile """
def convert_geometry(self, geom):
""" Convert geometry and return"""
coords = []
if geom['type'] == 'Polygon':
for ring in geom['coordinates']:
coords.append(self.convert_coords_list(ring))
elif geom['type'] == 'MultiPolygon':
for p in geom['coordinates']:
for ring in p:
coords.append(self.convert_coords_list(ring))
else:
import sys
sys.stderr.write('WTF geomtype [{}]?\n'.format(geom['type']))
sys.exit(1)
return coords
# Thanks to https://gis.stackexchange.com/questions/188622/rounding-all-coordinates-in-shapely?noredirect=1
def set_precision(coords, precision):
result = []
try:
return round(coords, int(precision))
except TypeError:
for coord in coords:
result.append(set_precision(coord, precision))
return result
# Loads shape file
gdf = gpd.read_file(shp_file)
def glue(values, sep):
""" converts values into string values and join with given separator
transforms ['foo', 4.5, 8, 'baz'], '-' into 'foo-4.5-8-baz' """
return sep.join(map(str, values))
# Converts projection to WGS84
gdf = gdf.to_crs(WGS84_EPSG)
# Headers
headers = [
colname for colname in gdf.columns.to_list() if colname != "geometry"
] + ["GEOMETRY"]
column_count = len(headers)
def convert(shp_file: Path, csv_file: Path, csv_delimiter, with_geometry, nb_decimals):
""" Generates CSV file from Shapefile """
with csv_file.open("wt", encoding="utf-8") as fd:
proj_converter = None
with collection(str(shp_file), 'r') as input:
writer = csv.writer(
fd, delimiter=csv_delimiter, quoting=csv.QUOTE_NONE, quotechar=""
)
writer.writerow(headers)
if with_geometry:
proj_converter = ProjConverter(input.crs, nb_decimals)
for row in gdf.itertuples():
with csv_file.open(mode='wt', encoding='utf-8') as csv_fd:
# Properties value
csv_row = [row[i] for i in range(1, column_count)]
header = False
features_nb = 0
for feature in input:
props = feature['properties']
# geometry converted to GeoJSON
geojson = mapping(row[column_count])
# Then reduce float precision to 3 digits
geojson["coordinates"] = set_precision(geojson["coordinates"], nb_decimals)
# shapely generate geojson with apos, just replace them by quotes
geojson = str(geojson).replace("'", '"')
csv_row.append(geojson)
# Header row
if not header:
header_cols = [k.upper() for k in props]
if with_geometry:
header_cols.append('GEOMETRY')
csv_fd.write('{}\n'.format(csv_delimiter.join(header_cols)))
header = True
writer.writerow(csv_row)
# feature row
row_values = list(map(str, props.values()))
if with_geometry:
geom = feature['geometry']
geom['coordinates'] = proj_converter.convert_geometry(feature['geometry'].copy())
row_values.append(json.dumps(geom))
csv_fd.write('{}\n'.format(csv_delimiter.join(row_values)))
features_nb += 1
return features_nb
return len(gdf.index)
def main():
""" Dump SHP file content """
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('shp_file', type=Path, help='ESRI Shapefile to read')
parser.add_argument('csv_file', type=Path, help='CSV file to write')
parser.add_argument('--csv_delimiter', help="CSV delimiter", choices=DELIMITER_NAME_TO_CHAR.keys(), default='comma')
parser.add_argument('--without_geometry', help="don't export geometry information", action='store_true')
parser.add_argument('--coords_decimals_nb', type=int, help='decimals nb for geo coordinates', default=4)
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("shp_file", type=Path, help="ESRI Shapefile to read")
parser.add_argument("csv_file", type=Path, help="CSV file to write")
parser.add_argument(
"--csv_delimiter",
help="CSV delimiter",
choices=DELIMITER_NAME_TO_CHAR.keys(),
default="comma",
)
parser.add_argument(
"--coords_decimals_nb",
type=int,
help="decimals nb for geo coordinates",
default=3,
)
args = parser.parse_args()
assert args.shp_file.is_file()
print('Converting {} into {}'.format(str(args.shp_file), str(args.csv_file)))
print("Converting {} into {}".format(str(args.shp_file), str(args.csv_file)))
start = timer()
features_nb = convert(args.shp_file, args.csv_file,
DELIMITER_NAME_TO_CHAR[args.csv_delimiter], not args.without_geometry,
args.coords_decimals_nb)
features_nb = convert(
args.shp_file,
args.csv_file,
DELIMITER_NAME_TO_CHAR[args.csv_delimiter],
args.coords_decimals_nb,
)
end = timer()
print('Done ({} features exported in {:.2f}s).'.format(features_nb, end - start))
print("Done ({} features exported in {:.2f}s).".format(features_nb, end - start))
if __name__ == '__main__':
if __name__ == "__main__":
main()
pyproj==2.4.0
toolz==0.9.0
ujson==1.35
requests==2.22.0
geopandas==0.7.0
python-slugify==1.2.6
python-stdnum==1.11
Fiona==1.8.8
requests==2.22.0
shapely==1.7.0
ujson==1.35
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment