-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocess_shapefile.py
46 lines (35 loc) · 1.41 KB
/
process_shapefile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import geopandas as gpd
import pandas as pd
def process_shapefile(input_shp, output_csv):
# Read the shapefile
gdf = gpd.read_file(input_shp)
# Get the original CRS
original_crs = gdf.crs
# Clean invalid geometries and calculate centroids
original_count = len(gdf)
gdf['centroid'] = gdf.geometry.apply(lambda geom:
geom.centroid if geom and geom.is_valid
else None)
# Remove rows where centroid calculation failed
gdf = gdf.dropna(subset=['centroid'])
print(f"Dropped {original_count - len(gdf)} invalid geometries")
# Create a new GeoDataFrame with just the centroids
centroid_gdf = gpd.GeoDataFrame(
gdf.drop(columns=['geometry']),
geometry='centroid',
crs=original_crs
)
# Reproject to WGS84 (EPSG:4326)
centroid_gdf = centroid_gdf.to_crs('EPSG:4326')
# Extract latitude and longitude from centroids
centroid_gdf['longitude'] = centroid_gdf.geometry.x
centroid_gdf['latitude'] = centroid_gdf.geometry.y
# Drop the geometry column and save to CSV
# centroid_df = centroid_gdf.drop(columns=['geometry'])
centroid_gdf.to_csv(output_csv, index=False)
print(f"Processed centroids saved to {output_csv}")
if __name__ == "__main__":
# Example usage
input_shapefile = "out.shp"
output_csv = "output.csv"
process_shapefile(input_shapefile, output_csv)