From e9cccbfe750c4371c6dedd9158260ab83ab27bd8 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Fri, 2 Feb 2024 17:28:03 +0000 Subject: [PATCH 1/6] Reorganise scripts and rules - workflow rules grouped by high-level concept and workflow stage - scripts sit next to the rules that use them Aim to reduce cognitive overhead and sense that different sub-workflows were muddled together in the previous grouping by general stage. Aim for a clear set of workflow/ directories, where we should be able to draw a simple high-level DAG for the workflow between stages, and sets of rules within each stage are roughly linear. --- .../damage_curves/storm}/high_fragility.csv | 0 .../storm}/lowmedium_fragility.csv | 0 workflow/Snakefile | 95 +++++++++---------- .../download => context}/coastlines.smk | 0 .../{rules/preprocess => context}/gadm.smk | 26 +++++ .../download => context}/natural-earth.smk | 0 .../hazards.smk => flood/aqueduct.smk} | 0 .../preprocess => flood}/trim_hazard_data.smk | 0 .../land-cover.smk} | 0 .../dryad-gdp.smk | 0 .../ghsl-pop.smk | 15 +-- .../aggregate_grid_disruption.py | 0 .../aggregate_grid_exposure.py | 0 .../analyse/aggregate_levels.smk | 2 +- .../analyse/common_functions.py | 0 .../analyse/country_pair_matrix.smk | 8 +- .../analyse/empirical_distribution.smk | 2 +- .../analyse/extract_percentile.smk | 2 +- .../analyse}/network_components.py | 0 .../analyse/network_components.smk | 2 +- .../analyse/power_analyse_all.smk | 0 .../analyse/select_percentile.py | 0 .../analyse/storm_aggregate_levels.py | 0 .../analyse/storm_distribution_empirical.py | 0 ...m_distribution_empirical_country_matrix.py | 0 .../storm_distribution_empirical_geo.py | 0 .../analyse/target_analysis.smk | 4 +- .../analyse/transmission_aggregate.py | 0 .../analyse/transmission_aggregate.smk | 2 +- .../target => power-tc}/cyclone-grid.smk | 0 .../disruption.smk | 4 +- .../exposure.smk | 8 +- .../intersect => power-tc}/grid_disruption.py | 0 .../grid_disruption_by_admin_region.py | 0 .../grid_exposure_by_admin_region.py | 0 .../intersection.smk | 2 +- .../analyse => power-tc}/map/outages.smk | 0 .../analyse => power-tc}/map/storm_tracks.smk | 0 .../analyse => power-tc}/map/wind_fields.smk | 0 .../power-tc/network_raster_intersection.smk | 23 +++++ .../plot}/EAD_EACA-finder.py | 0 .../plot}/cross_correlation.py | 0 .../plot/customers_affected_by_storm.smk | 0 .../figures => power-tc/plot}/diff_agg.py | 0 .../plot}/diff_cross_correlation.py | 0 .../analyse => power-tc}/plot/fig_CDFs.smk | 8 +- .../analyse => power-tc}/plot/fig_EACA.smk | 8 +- .../analyse => power-tc}/plot/fig_EAD.smk | 20 ++-- .../plot/fig_EAD_EACA.smk | 4 +- .../plot/fig_cross_correlation.smk | 14 +-- .../figures => power-tc/plot}/fig_initial.py | 0 .../analyse => power-tc}/plot/fig_initial.smk | 4 +- .../analyse => power-tc}/plot/fig_master.smk | 0 .../figures => power-tc/plot}/mean_agg.py | 0 .../plot}/plot_together.py | 0 .../figures => power-tc/plot}/plotter.py | 0 .../plot/target_disruption.smk | 0 .../plot_exposure_distributions.py | 0 .../preprocess => power}/annotate_targets.py | 0 .../create_electricity_network.py | 0 .../preprocess => power}/create_network.smk | 29 +----- .../gridfinder-targets.smk} | 2 +- .../{rules/download => power}/gridfinder.smk | 0 .../slice_network_assets.py | 0 .../wri-powerplants.smk} | 25 +++++ workflow/rules/download/IBTrACS.smk | 13 --- workflow/rules/download/IRIS.smk | 32 ------- workflow/rules/download/gadm.smk | 25 ----- workflow/rules/download/osm.smk | 20 ---- workflow/rules/download/wri-powerplants.smk | 24 ----- workflow/rules/preprocess/IRIS.smk | 29 ------ workflow/rules/preprocess/STORM.smk | 29 ------ workflow/rules/risk/.gitkeep | 0 workflow/scripts/download/scrape_url.py | 31 ------ .../aggregate_to_admin_area.py | 0 .../aggregate_to_admin_area.smk | 4 +- .../concat_and_sum_slices.py | 0 .../direct_damages.py | 0 .../flood_damages.smk | 4 +- .../network_raster_intersection.smk | 23 +---- .../plot_damage_distributions.py | 0 .../create_bbox_extracts.smk | 2 +- workflow/transport/create_network.smk | 27 ++++++ .../create_overall_bbox.py | 0 .../create_overall_bbox.smk | 2 +- .../transport/create_rail_network.py | 0 .../transport/create_road_network.py | 0 workflow/{scripts => transport}/join_data.py | 0 .../exposure => transport}/join_data.smk | 31 +++++- .../{scripts => transport}/join_network.py | 0 .../preprocess => transport}/join_network.smk | 2 +- .../openstreetmap.smk} | 22 +++++ .../osm_to_geoparquet.smk | 2 +- workflow/{scripts => transport}/osm_to_pq.py | 0 .../prepare-extracts.py | 0 .../{rules/preprocess => transport}/slice.smk | 0 workflow/{scripts => }/transport/utils.py | 0 .../IBTrACS.smk | 18 +++- workflow/tropical-cyclone/IRIS.smk | 62 ++++++++++++ .../download => tropical-cyclone}/STORM.smk | 30 ++++++ .../join_tracks.smk} | 30 +----- .../parse_IBTrACS.py | 0 .../parse_IRIS.py | 0 .../parse_STORM.py | 0 .../slice_storm_tracks.py | 0 .../tc_workflow_global_variables.smk} | 0 .../wind_fields.smk | 8 +- .../wind_fields}/concat_wind_over_sample.py | 0 .../wind_fields}/estimate_wind_fields.py | 0 .../wind_fields}/surface_roughness.py | 0 .../wind_fields}/wind_downscaling_factors.py | 0 111 files changed, 365 insertions(+), 414 deletions(-) rename {workflow/scripts => bundled_data/damage_curves/storm}/high_fragility.csv (100%) rename {workflow/scripts => bundled_data/damage_curves/storm}/lowmedium_fragility.csv (100%) rename workflow/{rules/download => context}/coastlines.smk (100%) rename workflow/{rules/preprocess => context}/gadm.smk (54%) rename workflow/{rules/download => context}/natural-earth.smk (100%) rename workflow/{rules/download/hazards.smk => flood/aqueduct.smk} (100%) rename workflow/{rules/preprocess => flood}/trim_hazard_data.smk (100%) rename workflow/{rules/download/land_cover.smk => nature-ecosystems/land-cover.smk} (100%) rename workflow/{rules/download => population-economy}/dryad-gdp.smk (100%) rename workflow/{rules/download => population-economy}/ghsl-pop.smk (66%) rename workflow/{scripts/exposure => power-tc}/aggregate_grid_disruption.py (100%) rename workflow/{scripts/exposure => power-tc}/aggregate_grid_exposure.py (100%) rename workflow/{rules => power-tc}/analyse/aggregate_levels.smk (91%) rename workflow/{scripts => power-tc}/analyse/common_functions.py (100%) rename workflow/{rules => power-tc}/analyse/country_pair_matrix.smk (80%) rename workflow/{rules => power-tc}/analyse/empirical_distribution.smk (89%) rename workflow/{rules => power-tc}/analyse/extract_percentile.smk (88%) rename workflow/{scripts => power-tc/analyse}/network_components.py (100%) rename workflow/{rules => power-tc}/analyse/network_components.smk (92%) rename workflow/{rules => power-tc}/analyse/power_analyse_all.smk (100%) rename workflow/{scripts => power-tc}/analyse/select_percentile.py (100%) rename workflow/{scripts => power-tc}/analyse/storm_aggregate_levels.py (100%) rename workflow/{scripts => power-tc}/analyse/storm_distribution_empirical.py (100%) rename workflow/{scripts => power-tc}/analyse/storm_distribution_empirical_country_matrix.py (100%) rename workflow/{scripts => power-tc}/analyse/storm_distribution_empirical_geo.py (100%) rename workflow/{rules => power-tc}/analyse/target_analysis.smk (87%) rename workflow/{scripts => power-tc}/analyse/transmission_aggregate.py (100%) rename workflow/{rules => power-tc}/analyse/transmission_aggregate.smk (91%) rename workflow/{rules/target => power-tc}/cyclone-grid.smk (100%) rename workflow/{rules/exposure/electricity_grid => power-tc}/disruption.smk (99%) rename workflow/{rules/exposure/electricity_grid => power-tc}/exposure.smk (97%) rename workflow/{scripts/intersect => power-tc}/grid_disruption.py (100%) rename workflow/{scripts/exposure => power-tc}/grid_disruption_by_admin_region.py (100%) rename workflow/{scripts/exposure => power-tc}/grid_exposure_by_admin_region.py (100%) rename workflow/{rules/exposure/electricity_grid => power-tc}/intersection.smk (99%) rename workflow/{rules/analyse => power-tc}/map/outages.smk (100%) rename workflow/{rules/analyse => power-tc}/map/storm_tracks.smk (100%) rename workflow/{rules/analyse => power-tc}/map/wind_fields.smk (100%) create mode 100644 workflow/power-tc/network_raster_intersection.smk rename workflow/{scripts/analyse/figures => power-tc/plot}/EAD_EACA-finder.py (100%) rename workflow/{scripts/analyse/figures => power-tc/plot}/cross_correlation.py (100%) rename workflow/{rules/analyse => power-tc}/plot/customers_affected_by_storm.smk (100%) rename workflow/{scripts/analyse/figures => power-tc/plot}/diff_agg.py (100%) rename workflow/{scripts/analyse/figures => power-tc/plot}/diff_cross_correlation.py (100%) rename workflow/{rules/analyse => power-tc}/plot/fig_CDFs.smk (87%) rename workflow/{rules/analyse => power-tc}/plot/fig_EACA.smk (89%) rename workflow/{rules/analyse => power-tc}/plot/fig_EAD.smk (89%) rename workflow/{rules/analyse => power-tc}/plot/fig_EAD_EACA.smk (90%) rename workflow/{rules/analyse => power-tc}/plot/fig_cross_correlation.smk (81%) rename workflow/{scripts/analyse/figures => power-tc/plot}/fig_initial.py (100%) rename workflow/{rules/analyse => power-tc}/plot/fig_initial.smk (89%) rename workflow/{rules/analyse => power-tc}/plot/fig_master.smk (100%) rename workflow/{scripts/analyse/figures => power-tc/plot}/mean_agg.py (100%) rename workflow/{scripts/analyse/figures => power-tc/plot}/plot_together.py (100%) rename workflow/{scripts/analyse/figures => power-tc/plot}/plotter.py (100%) rename workflow/{rules/analyse => power-tc}/plot/target_disruption.smk (100%) rename workflow/{scripts/exposure => power-tc}/plot_exposure_distributions.py (100%) rename workflow/{scripts/preprocess => power}/annotate_targets.py (100%) rename workflow/{scripts/preprocess => power}/create_electricity_network.py (100%) rename workflow/{rules/preprocess => power}/create_network.smk (90%) rename workflow/{rules/preprocess/targets.smk => power/gridfinder-targets.smk} (98%) rename workflow/{rules/download => power}/gridfinder.smk (100%) rename workflow/{scripts/preprocess => power}/slice_network_assets.py (100%) rename workflow/{rules/preprocess/powerplants.smk => power/wri-powerplants.smk} (68%) delete mode 100644 workflow/rules/download/IBTrACS.smk delete mode 100644 workflow/rules/download/IRIS.smk delete mode 100644 workflow/rules/download/gadm.smk delete mode 100644 workflow/rules/download/osm.smk delete mode 100644 workflow/rules/download/wri-powerplants.smk delete mode 100644 workflow/rules/preprocess/IRIS.smk delete mode 100644 workflow/rules/preprocess/STORM.smk delete mode 100644 workflow/rules/risk/.gitkeep delete mode 100644 workflow/scripts/download/scrape_url.py rename workflow/{scripts => transport-flood}/aggregate_to_admin_area.py (100%) rename workflow/{rules/exposure => transport-flood}/aggregate_to_admin_area.smk (94%) rename workflow/{scripts => transport-flood}/concat_and_sum_slices.py (100%) rename workflow/{scripts => transport-flood}/direct_damages.py (100%) rename workflow/{rules/exposure => transport-flood}/flood_damages.smk (94%) rename workflow/{rules/exposure => transport-flood}/network_raster_intersection.smk (57%) rename workflow/{scripts => transport-flood}/plot_damage_distributions.py (100%) rename workflow/{rules/preprocess => transport}/create_bbox_extracts.smk (94%) create mode 100644 workflow/transport/create_network.smk rename workflow/{scripts => transport}/create_overall_bbox.py (100%) rename workflow/{rules/preprocess => transport}/create_overall_bbox.smk (87%) rename workflow/{scripts => }/transport/create_rail_network.py (100%) rename workflow/{scripts => }/transport/create_road_network.py (100%) rename workflow/{scripts => transport}/join_data.py (100%) rename workflow/{rules/exposure => transport}/join_data.smk (64%) rename workflow/{scripts => transport}/join_network.py (100%) rename workflow/{rules/preprocess => transport}/join_network.smk (96%) rename workflow/{rules/preprocess/filter_osm_data.smk => transport/openstreetmap.smk} (56%) rename workflow/{rules/preprocess => transport}/osm_to_geoparquet.smk (94%) rename workflow/{scripts => transport}/osm_to_pq.py (100%) rename workflow/{scripts => transport}/prepare-extracts.py (100%) rename workflow/{rules/preprocess => transport}/slice.smk (100%) rename workflow/{scripts => }/transport/utils.py (100%) rename workflow/{rules/preprocess => tropical-cyclone}/IBTrACS.smk (61%) create mode 100644 workflow/tropical-cyclone/IRIS.smk rename workflow/{rules/download => tropical-cyclone}/STORM.smk (89%) rename workflow/{rules/preprocess/join_data.smk => tropical-cyclone/join_tracks.smk} (60%) rename workflow/{scripts/preprocess => tropical-cyclone}/parse_IBTrACS.py (100%) rename workflow/{scripts/preprocess => tropical-cyclone}/parse_IRIS.py (100%) rename workflow/{scripts/preprocess => tropical-cyclone}/parse_STORM.py (100%) rename workflow/{scripts/preprocess => tropical-cyclone}/slice_storm_tracks.py (100%) rename workflow/{rules/storm_workflow_global_variables.smk => tropical-cyclone/tc_workflow_global_variables.smk} (100%) rename workflow/{rules/exposure => tropical-cyclone}/wind_fields.smk (98%) rename workflow/{scripts/exposure => tropical-cyclone/wind_fields}/concat_wind_over_sample.py (100%) rename workflow/{scripts/intersect => tropical-cyclone/wind_fields}/estimate_wind_fields.py (100%) rename workflow/{scripts/intersect => tropical-cyclone/wind_fields}/surface_roughness.py (100%) rename workflow/{scripts/intersect => tropical-cyclone/wind_fields}/wind_downscaling_factors.py (100%) diff --git a/workflow/scripts/high_fragility.csv b/bundled_data/damage_curves/storm/high_fragility.csv similarity index 100% rename from workflow/scripts/high_fragility.csv rename to bundled_data/damage_curves/storm/high_fragility.csv diff --git a/workflow/scripts/lowmedium_fragility.csv b/bundled_data/damage_curves/storm/lowmedium_fragility.csv similarity index 100% rename from workflow/scripts/lowmedium_fragility.csv rename to bundled_data/damage_curves/storm/lowmedium_fragility.csv diff --git a/workflow/Snakefile b/workflow/Snakefile index 5c6c5b88..1b83b4e7 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -96,53 +96,52 @@ wildcard_constraints: TIFF_FILE="[^\/\.\s]+\.[tT][iI][fF][fF]?", # generate values for global variables used across rules -include: "rules/storm_workflow_global_variables.smk" +include: "tropical-cyclone/tc_workflow_global_variables.smk" ##### load rules ##### -include: "rules/download/coastlines.smk" -include: "rules/download/natural-earth.smk" -include: "rules/download/STORM.smk" -include: "rules/download/IRIS.smk" -include: "rules/download/IBTrACS.smk" -include: "rules/download/gadm.smk" -include: "rules/download/gridfinder.smk" -include: "rules/download/ghsl-pop.smk" -include: "rules/download/hazards.smk" -include: "rules/download/dryad-gdp.smk" -include: "rules/download/wri-powerplants.smk" -include: "rules/download/osm.smk" -include: "rules/download/land_cover.smk" - -include: "rules/preprocess/gadm.smk" -include: "rules/preprocess/filter_osm_data.smk" -include: "rules/preprocess/trim_hazard_data.smk" -include: "rules/preprocess/create_bbox_extracts.smk" -include: "rules/preprocess/slice.smk" -include: "rules/preprocess/join_network.smk" -include: "rules/preprocess/targets.smk" -include: "rules/preprocess/create_network.smk" -include: "rules/preprocess/join_data.smk" -include: "rules/preprocess/osm_to_geoparquet.smk" -include: "rules/preprocess/create_overall_bbox.smk" -include: "rules/preprocess/powerplants.smk" -include: "rules/preprocess/IBTrACS.smk" -include: "rules/preprocess/STORM.smk" -include: "rules/preprocess/IRIS.smk" - -include: "rules/exposure/join_data.smk" -include: "rules/exposure/network_raster_intersection.smk" -include: "rules/exposure/wind_fields.smk" -include: "rules/exposure/flood_damages.smk" -include: "rules/exposure/electricity_grid/intersection.smk" -include: "rules/exposure/electricity_grid/exposure.smk" -include: "rules/exposure/electricity_grid/disruption.smk" -include: "rules/exposure/aggregate_to_admin_area.smk" - -include: "rules/analyse/network_components.smk" -include: "rules/analyse/map/storm_tracks.smk" -include: "rules/analyse/map/outages.smk" -include: "rules/analyse/map/wind_fields.smk" -include: "rules/analyse/plot/target_disruption.smk" -include: "rules/analyse/plot/customers_affected_by_storm.smk" - -include: "rules/target/cyclone-grid.smk" +include: "context/coastlines.smk" +include: "context/gadm.smk" +include: "context/natural-earth.smk" + +include: "nature-ecosystems/land-cover.smk" +include: "population-economy/dryad-gdp.smk" +include: "population-economy/ghsl-pop.smk" + +include: "power/gridfinder.smk" +include: "power/wri-powerplants.smk" +include: "power/gridfinder-targets.smk" +include: "power/create_network.smk" + +include: "transport/openstreetmap.smk" +include: "transport/create_bbox_extracts.smk" +include: "transport/slice.smk" +include: "transport/join_network.smk" +include: "transport/create_network.smk" +include: "transport/osm_to_geoparquet.smk" +include: "transport/create_overall_bbox.smk" +include: "transport/join_data.smk" + +include: "flood/aqueduct.smk" +include: "flood/trim_hazard_data.smk" + +include: "tropical-cyclone/IBTrACS.smk" +include: "tropical-cyclone/IRIS.smk" +include: "tropical-cyclone/STORM.smk" +include: "tropical-cyclone/join_tracks.smk" +include: "tropical-cyclone/wind_fields.smk" + +include: "transport-flood/network_raster_intersection.smk" +include: "transport-flood/flood_damages.smk" +include: "transport-flood/aggregate_to_admin_area.smk" + +include: "power-tc/network_raster_intersection.smk" +include: "power-tc/intersection.smk" +include: "power-tc/exposure.smk" +include: "power-tc/disruption.smk" +include: "power-tc/analyse/network_components.smk" +include: "power-tc/map/storm_tracks.smk" +include: "power-tc/map/outages.smk" +include: "power-tc/map/wind_fields.smk" +include: "power-tc/plot/target_disruption.smk" +include: "power-tc/plot/customers_affected_by_storm.smk" +include: "power-tc/cyclone-grid.smk" diff --git a/workflow/rules/download/coastlines.smk b/workflow/context/coastlines.smk similarity index 100% rename from workflow/rules/download/coastlines.smk rename to workflow/context/coastlines.smk diff --git a/workflow/rules/preprocess/gadm.smk b/workflow/context/gadm.smk similarity index 54% rename from workflow/rules/preprocess/gadm.smk rename to workflow/context/gadm.smk index c950b9af..a06c6b97 100644 --- a/workflow/rules/preprocess/gadm.smk +++ b/workflow/context/gadm.smk @@ -1,3 +1,29 @@ +""" +Download GADM boundaries + +Reference +--------- +https://gadm.org/data.html +""" + + +rule download_gadm_levels: + output: + gpkg = "{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg" + shell: + """ + wget https://geodata.ucdavis.edu/gadm/gadm3.6/gadm36_levels_gpkg.zip \ + --output-document={wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip + unzip -o {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip \ + -d {wildcards.OUTPUT_DIR}/input/admin-boundaries + rm {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip + """ + +""" +Test with: +snakemake -c1 -- results/input/admin-boundaries/gadm36_levels.gpkg +""" + rule simplify_admin_bounds: input: all_admin_bounds = rules.download_gadm_levels.output.gpkg diff --git a/workflow/rules/download/natural-earth.smk b/workflow/context/natural-earth.smk similarity index 100% rename from workflow/rules/download/natural-earth.smk rename to workflow/context/natural-earth.smk diff --git a/workflow/rules/download/hazards.smk b/workflow/flood/aqueduct.smk similarity index 100% rename from workflow/rules/download/hazards.smk rename to workflow/flood/aqueduct.smk diff --git a/workflow/rules/preprocess/trim_hazard_data.smk b/workflow/flood/trim_hazard_data.smk similarity index 100% rename from workflow/rules/preprocess/trim_hazard_data.smk rename to workflow/flood/trim_hazard_data.smk diff --git a/workflow/rules/download/land_cover.smk b/workflow/nature-ecosystems/land-cover.smk similarity index 100% rename from workflow/rules/download/land_cover.smk rename to workflow/nature-ecosystems/land-cover.smk diff --git a/workflow/rules/download/dryad-gdp.smk b/workflow/population-economy/dryad-gdp.smk similarity index 100% rename from workflow/rules/download/dryad-gdp.smk rename to workflow/population-economy/dryad-gdp.smk diff --git a/workflow/rules/download/ghsl-pop.smk b/workflow/population-economy/ghsl-pop.smk similarity index 66% rename from workflow/rules/download/ghsl-pop.smk rename to workflow/population-economy/ghsl-pop.smk index 09ed6f36..60e6537a 100644 --- a/workflow/rules/download/ghsl-pop.smk +++ b/workflow/population-economy/ghsl-pop.smk @@ -23,20 +23,20 @@ Concept & Methodology: rule download_ghsl: output: - "{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_R2022A_54009_{RESOLUTION}_V1_0.tif" + "{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_{RELEASE}_54009_{RESOLUTION}_V1_0.tif" wildcard_constraints: - YEAR="1975|1980|1985|1990|1995|2000|2005|2010|2015|2020|2025|2030", - RESOLUTION="1000?" + YEAR=range(1975, 2031, 5), + RESOLUTION="100|1000" shell: """ output_dir=$(dirname {output}) mkdir -p $output_dir - wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2022A/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \ + wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_{wildcards.RELEASE}/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \ --directory-prefix=$output_dir - unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \ + unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \ -d $output_dir """ @@ -47,8 +47,9 @@ rule download_ghsl_all: "{{OUTPUT_DIR}}", "input", "ghsl", - "GHS_POP_E{year}_GLOBE_R2022A_54009_{resolution}_V1_0.tif", + "GHS_POP_E{year}_GLOBE_{release}_54009_{resolution}_V1_0.tif", ), resolution=(100, 1000), - year=(2020, ) + year=(2020, ), + release="R2022A" # TODO bump to R2023A ) diff --git a/workflow/scripts/exposure/aggregate_grid_disruption.py b/workflow/power-tc/aggregate_grid_disruption.py similarity index 100% rename from workflow/scripts/exposure/aggregate_grid_disruption.py rename to workflow/power-tc/aggregate_grid_disruption.py diff --git a/workflow/scripts/exposure/aggregate_grid_exposure.py b/workflow/power-tc/aggregate_grid_exposure.py similarity index 100% rename from workflow/scripts/exposure/aggregate_grid_exposure.py rename to workflow/power-tc/aggregate_grid_exposure.py diff --git a/workflow/rules/analyse/aggregate_levels.smk b/workflow/power-tc/analyse/aggregate_levels.smk similarity index 91% rename from workflow/rules/analyse/aggregate_levels.smk rename to workflow/power-tc/analyse/aggregate_levels.smk index 50b489a4..d44896a1 100644 --- a/workflow/rules/analyse/aggregate_levels.smk +++ b/workflow/power-tc/analyse/aggregate_levels.smk @@ -30,4 +30,4 @@ rule analyse_aggregate_levels: output: AGGREGATE_LEVELS_OUT, script: - os.path.join("..", "..", "scripts", "analyse", "storm_aggregate_levels.py") + "./storm_aggregate_levels.py" diff --git a/workflow/scripts/analyse/common_functions.py b/workflow/power-tc/analyse/common_functions.py similarity index 100% rename from workflow/scripts/analyse/common_functions.py rename to workflow/power-tc/analyse/common_functions.py diff --git a/workflow/rules/analyse/country_pair_matrix.smk b/workflow/power-tc/analyse/country_pair_matrix.smk similarity index 80% rename from workflow/rules/analyse/country_pair_matrix.smk rename to workflow/power-tc/analyse/country_pair_matrix.smk index a280ffec..e83f9261 100644 --- a/workflow/rules/analyse/country_pair_matrix.smk +++ b/workflow/power-tc/analyse/country_pair_matrix.smk @@ -22,10 +22,4 @@ rule analyse_country_matrix: output: COUNTRY_MATRIX_OUTPUT, script: - os.path.join( - "..", - "..", - "scripts", - "analyse", - "storm_distribution_empirical_country_matrix.py", - ) + "./storm_distribution_empirical_country_matrix.py" diff --git a/workflow/rules/analyse/empirical_distribution.smk b/workflow/power-tc/analyse/empirical_distribution.smk similarity index 89% rename from workflow/rules/analyse/empirical_distribution.smk rename to workflow/power-tc/analyse/empirical_distribution.smk index 226e46a3..7c958492 100644 --- a/workflow/rules/analyse/empirical_distribution.smk +++ b/workflow/power-tc/analyse/empirical_distribution.smk @@ -23,4 +23,4 @@ rule analyse_empirical_distribution: output: EMPIRICAL_DISTRIBUTION_OUT script: - os.path.join('..', '..', 'scripts', 'analyse' , 'storm_distribution_empirical.py') + './storm_distribution_empirical.py' diff --git a/workflow/rules/analyse/extract_percentile.smk b/workflow/power-tc/analyse/extract_percentile.smk similarity index 88% rename from workflow/rules/analyse/extract_percentile.smk rename to workflow/power-tc/analyse/extract_percentile.smk index 9dfc5b7e..faeb6777 100644 --- a/workflow/rules/analyse/extract_percentile.smk +++ b/workflow/power-tc/analyse/extract_percentile.smk @@ -24,4 +24,4 @@ rule analyse_percentile: output: PERCENTILE_OUT, script: - os.path.join("..", "..", "scripts", "analyse", "select_percentile.py") + "./select_percentile.py" diff --git a/workflow/scripts/network_components.py b/workflow/power-tc/analyse/network_components.py similarity index 100% rename from workflow/scripts/network_components.py rename to workflow/power-tc/analyse/network_components.py diff --git a/workflow/rules/analyse/network_components.smk b/workflow/power-tc/analyse/network_components.smk similarity index 92% rename from workflow/rules/analyse/network_components.smk rename to workflow/power-tc/analyse/network_components.smk index b738ce17..f5124f6a 100644 --- a/workflow/rules/analyse/network_components.smk +++ b/workflow/power-tc/analyse/network_components.smk @@ -7,7 +7,7 @@ rule network_components: component_map="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/network_map_by_component.png", component_data="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/components.parquet" script: - "../../scripts/network_components.py" + "./network_components.py" """ Test with: diff --git a/workflow/rules/analyse/power_analyse_all.smk b/workflow/power-tc/analyse/power_analyse_all.smk similarity index 100% rename from workflow/rules/analyse/power_analyse_all.smk rename to workflow/power-tc/analyse/power_analyse_all.smk diff --git a/workflow/scripts/analyse/select_percentile.py b/workflow/power-tc/analyse/select_percentile.py similarity index 100% rename from workflow/scripts/analyse/select_percentile.py rename to workflow/power-tc/analyse/select_percentile.py diff --git a/workflow/scripts/analyse/storm_aggregate_levels.py b/workflow/power-tc/analyse/storm_aggregate_levels.py similarity index 100% rename from workflow/scripts/analyse/storm_aggregate_levels.py rename to workflow/power-tc/analyse/storm_aggregate_levels.py diff --git a/workflow/scripts/analyse/storm_distribution_empirical.py b/workflow/power-tc/analyse/storm_distribution_empirical.py similarity index 100% rename from workflow/scripts/analyse/storm_distribution_empirical.py rename to workflow/power-tc/analyse/storm_distribution_empirical.py diff --git a/workflow/scripts/analyse/storm_distribution_empirical_country_matrix.py b/workflow/power-tc/analyse/storm_distribution_empirical_country_matrix.py similarity index 100% rename from workflow/scripts/analyse/storm_distribution_empirical_country_matrix.py rename to workflow/power-tc/analyse/storm_distribution_empirical_country_matrix.py diff --git a/workflow/scripts/analyse/storm_distribution_empirical_geo.py b/workflow/power-tc/analyse/storm_distribution_empirical_geo.py similarity index 100% rename from workflow/scripts/analyse/storm_distribution_empirical_geo.py rename to workflow/power-tc/analyse/storm_distribution_empirical_geo.py diff --git a/workflow/rules/analyse/target_analysis.smk b/workflow/power-tc/analyse/target_analysis.smk similarity index 87% rename from workflow/rules/analyse/target_analysis.smk rename to workflow/power-tc/analyse/target_analysis.smk index 6bbb6c98..c35972a3 100644 --- a/workflow/rules/analyse/target_analysis.smk +++ b/workflow/power-tc/analyse/target_analysis.smk @@ -26,6 +26,4 @@ rule analyse_targets: f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent.gpkg", ), script: - os.path.join( - "..", "..", "scripts", "analyse", "storm_distribution_empirical_geo.py" - ) + "./storm_distribution_empirical_geo.py" diff --git a/workflow/scripts/analyse/transmission_aggregate.py b/workflow/power-tc/analyse/transmission_aggregate.py similarity index 100% rename from workflow/scripts/analyse/transmission_aggregate.py rename to workflow/power-tc/analyse/transmission_aggregate.py diff --git a/workflow/rules/analyse/transmission_aggregate.smk b/workflow/power-tc/analyse/transmission_aggregate.smk similarity index 91% rename from workflow/rules/analyse/transmission_aggregate.smk rename to workflow/power-tc/analyse/transmission_aggregate.smk index fecfad9d..474e5228 100644 --- a/workflow/rules/analyse/transmission_aggregate.smk +++ b/workflow/power-tc/analyse/transmission_aggregate.smk @@ -37,4 +37,4 @@ rule analyse_transmission: output: TRANSMISSION_OUT, script: - os.path.join("..", "..", "scripts", "analyse", "transmission_aggregate.py") + "./transmission_aggregate.py" diff --git a/workflow/rules/target/cyclone-grid.smk b/workflow/power-tc/cyclone-grid.smk similarity index 100% rename from workflow/rules/target/cyclone-grid.smk rename to workflow/power-tc/cyclone-grid.smk diff --git a/workflow/rules/exposure/electricity_grid/disruption.smk b/workflow/power-tc/disruption.smk similarity index 99% rename from workflow/rules/exposure/electricity_grid/disruption.smk rename to workflow/power-tc/disruption.smk index 196dfe2a..cdee616b 100644 --- a/workflow/rules/exposure/electricity_grid/disruption.smk +++ b/workflow/power-tc/disruption.smk @@ -73,7 +73,7 @@ rule aggregate_disruption_within_sample: by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_event.pq")), by_target = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_target.pq")), script: - "../../../scripts/exposure/aggregate_grid_disruption.py" + "./aggregate_grid_disruption.py" """ Test with: @@ -423,7 +423,7 @@ rule disruption_by_admin_region: output: expected_annual_disruption = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/EAPA_{ADMIN_SLUG}.gpq", script: - "../../../scripts/exposure/grid_disruption_by_admin_region.py" + "./grid_disruption_by_admin_region.py" """ Test with: diff --git a/workflow/rules/exposure/electricity_grid/exposure.smk b/workflow/power-tc/exposure.smk similarity index 97% rename from workflow/rules/exposure/electricity_grid/exposure.smk rename to workflow/power-tc/exposure.smk index 08c22e54..e41b30fb 100644 --- a/workflow/rules/exposure/electricity_grid/exposure.smk +++ b/workflow/power-tc/exposure.smk @@ -20,7 +20,7 @@ rule aggregate_exposure_within_sample: by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_event.pq")), by_edge = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_edge.pq")), script: - "../../../scripts/exposure/aggregate_grid_exposure.py" + "./aggregate_grid_exposure.py" """ Test with: @@ -103,7 +103,7 @@ rule plot_event_exposure_distributions_for_country: output: country_event_distributions = directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/length_m_event_dist/") script: - "../../../scripts/exposure/plot_exposure_distributions.py" + "./plot_exposure_distributions.py" """ Test with: @@ -124,7 +124,7 @@ rule exposure_by_admin_region: output: expected_annual_exposure = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/EAE_{ADMIN_SLUG}.gpq", script: - "../../../scripts/exposure/grid_exposure_by_admin_region.py" + "./grid_exposure_by_admin_region.py" """ Test with: @@ -233,4 +233,4 @@ rule merge_exposure_admin_levels: merged = merge_gadm_admin_levels(merged, other) merged.reset_index(drop=True).sort_index(axis=1).to_parquet(output.merged_admin_levels) - logging.info("Done") \ No newline at end of file + logging.info("Done") diff --git a/workflow/scripts/intersect/grid_disruption.py b/workflow/power-tc/grid_disruption.py similarity index 100% rename from workflow/scripts/intersect/grid_disruption.py rename to workflow/power-tc/grid_disruption.py diff --git a/workflow/scripts/exposure/grid_disruption_by_admin_region.py b/workflow/power-tc/grid_disruption_by_admin_region.py similarity index 100% rename from workflow/scripts/exposure/grid_disruption_by_admin_region.py rename to workflow/power-tc/grid_disruption_by_admin_region.py diff --git a/workflow/scripts/exposure/grid_exposure_by_admin_region.py b/workflow/power-tc/grid_exposure_by_admin_region.py similarity index 100% rename from workflow/scripts/exposure/grid_exposure_by_admin_region.py rename to workflow/power-tc/grid_exposure_by_admin_region.py diff --git a/workflow/rules/exposure/electricity_grid/intersection.smk b/workflow/power-tc/intersection.smk similarity index 99% rename from workflow/rules/exposure/electricity_grid/intersection.smk rename to workflow/power-tc/intersection.smk index b91e70b9..dba12462 100644 --- a/workflow/rules/exposure/electricity_grid/intersection.smk +++ b/workflow/power-tc/intersection.smk @@ -145,7 +145,7 @@ rule electricity_grid_damages: exposure = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}/")), disruption = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}/")), script: - "../../../scripts/intersect/grid_disruption.py" + "./grid_disruption.py" """ Test with: diff --git a/workflow/rules/analyse/map/outages.smk b/workflow/power-tc/map/outages.smk similarity index 100% rename from workflow/rules/analyse/map/outages.smk rename to workflow/power-tc/map/outages.smk diff --git a/workflow/rules/analyse/map/storm_tracks.smk b/workflow/power-tc/map/storm_tracks.smk similarity index 100% rename from workflow/rules/analyse/map/storm_tracks.smk rename to workflow/power-tc/map/storm_tracks.smk diff --git a/workflow/rules/analyse/map/wind_fields.smk b/workflow/power-tc/map/wind_fields.smk similarity index 100% rename from workflow/rules/analyse/map/wind_fields.smk rename to workflow/power-tc/map/wind_fields.smk diff --git a/workflow/power-tc/network_raster_intersection.smk b/workflow/power-tc/network_raster_intersection.smk new file mode 100644 index 00000000..574de01a --- /dev/null +++ b/workflow/power-tc/network_raster_intersection.smk @@ -0,0 +1,23 @@ +""" +Intersect a network representation with hazard rasters +""" + +rule rasterise_electricity_grid: + """ + Split electricity network edges on raster grid + Assign raster indicies to edges + """ + input: + network="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/edges.geoparquet", + tif_paths=["{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/wind_grid.tiff"], + params: + copy_raster_values=False, + output: + geoparquet="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/edges_split.geoparquet", + script: + "../scripts/intersection.py" + +""" +Test with: +snakemake --cores 1 results/power/by_country/PRI/exposure/edges_split.geoparquet +""" diff --git a/workflow/scripts/analyse/figures/EAD_EACA-finder.py b/workflow/power-tc/plot/EAD_EACA-finder.py similarity index 100% rename from workflow/scripts/analyse/figures/EAD_EACA-finder.py rename to workflow/power-tc/plot/EAD_EACA-finder.py diff --git a/workflow/scripts/analyse/figures/cross_correlation.py b/workflow/power-tc/plot/cross_correlation.py similarity index 100% rename from workflow/scripts/analyse/figures/cross_correlation.py rename to workflow/power-tc/plot/cross_correlation.py diff --git a/workflow/rules/analyse/plot/customers_affected_by_storm.smk b/workflow/power-tc/plot/customers_affected_by_storm.smk similarity index 100% rename from workflow/rules/analyse/plot/customers_affected_by_storm.smk rename to workflow/power-tc/plot/customers_affected_by_storm.smk diff --git a/workflow/scripts/analyse/figures/diff_agg.py b/workflow/power-tc/plot/diff_agg.py similarity index 100% rename from workflow/scripts/analyse/figures/diff_agg.py rename to workflow/power-tc/plot/diff_agg.py diff --git a/workflow/scripts/analyse/figures/diff_cross_correlation.py b/workflow/power-tc/plot/diff_cross_correlation.py similarity index 100% rename from workflow/scripts/analyse/figures/diff_cross_correlation.py rename to workflow/power-tc/plot/diff_cross_correlation.py diff --git a/workflow/rules/analyse/plot/fig_CDFs.smk b/workflow/power-tc/plot/fig_CDFs.smk similarity index 87% rename from workflow/rules/analyse/plot/fig_CDFs.smk rename to workflow/power-tc/plot/fig_CDFs.smk index 3ccf5f3b..487a7ff5 100644 --- a/workflow/rules/analyse/plot/fig_CDFs.smk +++ b/workflow/power-tc/plot/fig_CDFs.smk @@ -47,9 +47,7 @@ rule fig_cdfs_EAD: output: out_cdf_EAD, script: - os.path.join( - "..", "..", "..", "scripts", "analyse", "figures", "plot_together.py" - ) + "./plot_together.py" rule fig_cdfs_EACA: @@ -64,6 +62,4 @@ rule fig_cdfs_EACA: output: out_cdf_EACA, script: - os.path.join( - "..", "..", "..", "scripts", "analyse", "figures", "plot_together.py" - ) + "./plot_together.py" diff --git a/workflow/rules/analyse/plot/fig_EACA.smk b/workflow/power-tc/plot/fig_EACA.smk similarity index 89% rename from workflow/rules/analyse/plot/fig_EACA.smk rename to workflow/power-tc/plot/fig_EACA.smk index 158e06e4..c0d03d0d 100644 --- a/workflow/rules/analyse/plot/fig_EACA.smk +++ b/workflow/power-tc/plot/fig_EACA.smk @@ -50,7 +50,7 @@ rule fig_aggregate_EACA: output: out_agg_EACA=out_agg_EACA_file, # script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "mean_agg.py") + "./mean_agg.py" rule fig_diff_EACA: @@ -72,7 +72,7 @@ rule fig_diff_EACA: output: out_diff_EACA=out_diff_EACA_file, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "diff_agg.py") + "./diff_agg.py" rule fig_plot_current_EACA: @@ -90,7 +90,7 @@ rule fig_plot_current_EACA: output: out_agg_EACA_plot, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "plotter.py") + "./plotter.py" rule fig_plot_diff_EACA: @@ -108,4 +108,4 @@ rule fig_plot_diff_EACA: output: out_agg_EACA_plot_perc, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "plotter.py") + "./plotter.py" diff --git a/workflow/rules/analyse/plot/fig_EAD.smk b/workflow/power-tc/plot/fig_EAD.smk similarity index 89% rename from workflow/rules/analyse/plot/fig_EAD.smk rename to workflow/power-tc/plot/fig_EAD.smk index e819485c..235d404f 100644 --- a/workflow/rules/analyse/plot/fig_EAD.smk +++ b/workflow/power-tc/plot/fig_EAD.smk @@ -78,7 +78,7 @@ rule fig_aggregate_EAD_agg: f"mean_{merge_key_EAD_agg}_{metric_EAD_agg}.gpkg", ), script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "mean_agg.py") + "./mean_agg.py" rule fig_diff_EAD_agg: @@ -99,7 +99,7 @@ rule fig_diff_EAD_agg: output: out_diff_EAD_agg, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "diff_agg.py") + "./diff_agg.py" # rule fig_diff_EAD_agg_plot: @@ -116,7 +116,7 @@ rule fig_diff_EAD_agg: # output: # out_diff_EAD_plot # script: -# os.path.join("..", "..", "..", 'scripts', 'analyse', 'figures', 'plotter.py') +# './plotter.py' ## aggregated but normalised ## @@ -144,7 +144,7 @@ rule fig_aggregate_EAD_agg_norm: f"mean_{merge_key_EAD_agg_norm}_{metric_EAD_agg_norm}.gpkg", ), script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "mean_agg.py") + "./mean_agg.py" rule fig_diff_EAD_agg_norm: @@ -165,7 +165,7 @@ rule fig_diff_EAD_agg_norm: output: out_diff_EAD_agg_norm, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "diff_agg.py") + "./diff_agg.py" # rule fig_diff_EAD_agg_plot_norm: @@ -181,7 +181,7 @@ rule fig_diff_EAD_agg_norm: # output: # out_diff_EAD_plot_norm # script: -# os.path.join("..", "..", "..", 'scripts', 'analyse', 'figures', 'plotter.py') +# './plotter.py' # @@ -210,7 +210,7 @@ rule fig_aggregate_EAD_indiv: f"mean_{merge_key_EAD_indiv}_{metric_EAD_indiv}.gpkg", ), script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "mean_agg.py") + "./mean_agg.py" rule fig_diff_EAD_indiv: @@ -231,7 +231,7 @@ rule fig_diff_EAD_indiv: output: out_diff_EAD_indiv, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "diff_agg.py") + "./diff_agg.py" rule fig_diff_EAD_indiv_plot: @@ -249,7 +249,7 @@ rule fig_diff_EAD_indiv_plot: output: out_diff_EAD_indiv_plot, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "plotter.py") + "./plotter.py" rule fig_current_EAD_indiv_plot: @@ -273,4 +273,4 @@ rule fig_current_EAD_indiv_plot: output: out_current_EAD_indiv_plot, script: - os.path.join("..", "..", "..", "scripts", "analyse", "figures", "plotter.py") + "./plotter.py" diff --git a/workflow/rules/analyse/plot/fig_EAD_EACA.smk b/workflow/power-tc/plot/fig_EAD_EACA.smk similarity index 90% rename from workflow/rules/analyse/plot/fig_EAD_EACA.smk rename to workflow/power-tc/plot/fig_EAD_EACA.smk index e27ffbdd..9f8c5527 100644 --- a/workflow/rules/analyse/plot/fig_EAD_EACA.smk +++ b/workflow/power-tc/plot/fig_EAD_EACA.smk @@ -40,6 +40,4 @@ rule fig_EAD_EACA: output: out_EAD_EACA, script: - os.path.join( - "..", "..", "..", "scripts", "analyse", "figures", "EAD_EACA-finder.py" - ) + "./EAD_EACA-finder.py" diff --git a/workflow/rules/analyse/plot/fig_cross_correlation.smk b/workflow/power-tc/plot/fig_cross_correlation.smk similarity index 81% rename from workflow/rules/analyse/plot/fig_cross_correlation.smk rename to workflow/power-tc/plot/fig_cross_correlation.smk index d800f509..570e4d6c 100644 --- a/workflow/rules/analyse/plot/fig_cross_correlation.smk +++ b/workflow/power-tc/plot/fig_cross_correlation.smk @@ -41,9 +41,7 @@ rule fig_cross_correlation: output: out_cc=out_cc_file, script: - os.path.join( - "..", "..", "..", "scripts", "analyse", "figures", "cross_correlation.py" - ) + "./cross_correlation.py" rule fig_cross_correlation_diff: @@ -57,12 +55,4 @@ rule fig_cross_correlation_diff: output: out_cc_file_diff, script: - os.path.join( - "..", - "..", - "..", - "scripts", - "analyse", - "figures", - "diff_cross_correlation.py", - ) + "./diff_cross_correlation.py" diff --git a/workflow/scripts/analyse/figures/fig_initial.py b/workflow/power-tc/plot/fig_initial.py similarity index 100% rename from workflow/scripts/analyse/figures/fig_initial.py rename to workflow/power-tc/plot/fig_initial.py diff --git a/workflow/rules/analyse/plot/fig_initial.smk b/workflow/power-tc/plot/fig_initial.smk similarity index 89% rename from workflow/rules/analyse/plot/fig_initial.smk rename to workflow/power-tc/plot/fig_initial.smk index e1b111cd..0a061929 100644 --- a/workflow/rules/analyse/plot/fig_initial.smk +++ b/workflow/power-tc/plot/fig_initial.smk @@ -30,6 +30,4 @@ rule fig_checks: output: initial_checks, script: - os.path.join( - "..", "..", "..", "scripts", "analyse", "figures", "fig_initial.py" - ) + "./fig_initial.py" diff --git a/workflow/rules/analyse/plot/fig_master.smk b/workflow/power-tc/plot/fig_master.smk similarity index 100% rename from workflow/rules/analyse/plot/fig_master.smk rename to workflow/power-tc/plot/fig_master.smk diff --git a/workflow/scripts/analyse/figures/mean_agg.py b/workflow/power-tc/plot/mean_agg.py similarity index 100% rename from workflow/scripts/analyse/figures/mean_agg.py rename to workflow/power-tc/plot/mean_agg.py diff --git a/workflow/scripts/analyse/figures/plot_together.py b/workflow/power-tc/plot/plot_together.py similarity index 100% rename from workflow/scripts/analyse/figures/plot_together.py rename to workflow/power-tc/plot/plot_together.py diff --git a/workflow/scripts/analyse/figures/plotter.py b/workflow/power-tc/plot/plotter.py similarity index 100% rename from workflow/scripts/analyse/figures/plotter.py rename to workflow/power-tc/plot/plotter.py diff --git a/workflow/rules/analyse/plot/target_disruption.smk b/workflow/power-tc/plot/target_disruption.smk similarity index 100% rename from workflow/rules/analyse/plot/target_disruption.smk rename to workflow/power-tc/plot/target_disruption.smk diff --git a/workflow/scripts/exposure/plot_exposure_distributions.py b/workflow/power-tc/plot_exposure_distributions.py similarity index 100% rename from workflow/scripts/exposure/plot_exposure_distributions.py rename to workflow/power-tc/plot_exposure_distributions.py diff --git a/workflow/scripts/preprocess/annotate_targets.py b/workflow/power/annotate_targets.py similarity index 100% rename from workflow/scripts/preprocess/annotate_targets.py rename to workflow/power/annotate_targets.py diff --git a/workflow/scripts/preprocess/create_electricity_network.py b/workflow/power/create_electricity_network.py similarity index 100% rename from workflow/scripts/preprocess/create_electricity_network.py rename to workflow/power/create_electricity_network.py diff --git a/workflow/rules/preprocess/create_network.smk b/workflow/power/create_network.smk similarity index 90% rename from workflow/rules/preprocess/create_network.smk rename to workflow/power/create_network.smk index 81d64147..36b3e359 100644 --- a/workflow/rules/preprocess/create_network.smk +++ b/workflow/power/create_network.smk @@ -287,7 +287,7 @@ rule create_power_network: nodes="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/nodes.geoparquet", grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json", script: - "../../scripts/preprocess/create_electricity_network.py" + "./create_electricity_network.py" """ Test with: @@ -366,30 +366,3 @@ rule map_network_components: Test with: snakemake -c1 results/power/by_country/HTI/edges.png """ - - -rule create_transport_network: - """ - Take .geoparquet OSM files and output files of cleaned network nodes and edges - """ - input: - nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_nodes.geoparquet", - edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_edges.geoparquet", - admin="{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg", - output: - nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_nodes.geoparquet", - edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_edges.geoparquet" - params: - # determine the network type from the filter, e.g. road, rail - network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''), - # pass in the slice number so we can label edges and nodes with their slice - # edge and node IDs should be unique across all slices - slice_number=lambda wildcards: int(wildcards.SLICE_SLUG.replace('slice-', '')) - script: - # template the path string with a value from params (can't execute .replace in `script` context) - "../../scripts/transport/create_{params.network_type}_network.py" - -""" -Test with: -snakemake --cores all results/geoparquet/tanzania-mini_filter-road/processed/slice-0_edges.geoparquet -""" diff --git a/workflow/rules/preprocess/targets.smk b/workflow/power/gridfinder-targets.smk similarity index 98% rename from workflow/rules/preprocess/targets.smk rename to workflow/power/gridfinder-targets.smk index e664c23c..4ee2f644 100644 --- a/workflow/rules/preprocess/targets.smk +++ b/workflow/power/gridfinder-targets.smk @@ -90,7 +90,7 @@ rule annotate_targets: output: targets="{OUTPUT_DIR}/power/targets.geoparquet", script: - "../../scripts/preprocess/annotate_targets.py" + "./annotate_targets.py" """ Test with: diff --git a/workflow/rules/download/gridfinder.smk b/workflow/power/gridfinder.smk similarity index 100% rename from workflow/rules/download/gridfinder.smk rename to workflow/power/gridfinder.smk diff --git a/workflow/scripts/preprocess/slice_network_assets.py b/workflow/power/slice_network_assets.py similarity index 100% rename from workflow/scripts/preprocess/slice_network_assets.py rename to workflow/power/slice_network_assets.py diff --git a/workflow/rules/preprocess/powerplants.smk b/workflow/power/wri-powerplants.smk similarity index 68% rename from workflow/rules/preprocess/powerplants.smk rename to workflow/power/wri-powerplants.smk index 9f55ad81..9df4d257 100644 --- a/workflow/rules/preprocess/powerplants.smk +++ b/workflow/power/wri-powerplants.smk @@ -1,3 +1,28 @@ +""" +Download WRI powerplants database + +Reference +--------- +https://www.wri.org/research/global-database-power-plants +""" + + +rule download_powerplants: + output: + csv = "{OUTPUT_DIR}/input/powerplants/global_power_plant_database.csv" + shell: + """ + mkdir -p {wildcards.OUTPUT_DIR}/input/powerplants + cd {wildcards.OUTPUT_DIR}/input/powerplants + wget https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip + unzip -o global_power_plant_database_v_1_3.zip + """ + +""" +Test with: +snakemake -c1 -- results/input/powerplants/global_power_plant_database.csv +""" + rule parse_powerplants: """ Parse powerplant data for world and save in convenient format diff --git a/workflow/rules/download/IBTrACS.smk b/workflow/rules/download/IBTrACS.smk deleted file mode 100644 index 26563c3b..00000000 --- a/workflow/rules/download/IBTrACS.smk +++ /dev/null @@ -1,13 +0,0 @@ -rule download_IBTrACS: - output: - "{OUTPUT_DIR}/input/IBTrACS/raw/v4.csv" - shell: - """ - wget --output-document {output} \ - https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/csv/ibtracs.ALL.list.v04r00.csv - """ - -""" -Test with: -snakemake -c1 results/input/IBTrACS/v4.csv -""" diff --git a/workflow/rules/download/IRIS.smk b/workflow/rules/download/IRIS.smk deleted file mode 100644 index 47332447..00000000 --- a/workflow/rules/download/IRIS.smk +++ /dev/null @@ -1,32 +0,0 @@ -""" -Download the Imperial tropical cyclone storm tracks -""" - -rule download_IRIS: - """ - As of 20230626, this data is not publically available. You will need - appropriate keys to access the files on the OUCE file store. - """ - output: - zip_file = "{OUTPUT_DIR}/input/IRIS/archive.zip" - shell: - """ - mkdir -p $(dirname {output.zip_file}) - scp /ouce-home/projects/mistral/iris/iris-data.zip {output.zip_file} - """ - -""" -Test with: -snakemake -n -c1 -- results/input/IRIS/archive.zip -""" - - -rule extract_IRIS: - input: - zip_file = rules.download_IRIS.output.zip_file - output: - unzipped_dir = directory("{OUTPUT_DIR}/input/IRIS/iris-data/") - shell: - """ - unzip {input.zip_file} -d $(dirname {output.unzipped_dir}) - """ diff --git a/workflow/rules/download/gadm.smk b/workflow/rules/download/gadm.smk deleted file mode 100644 index d8d58eb8..00000000 --- a/workflow/rules/download/gadm.smk +++ /dev/null @@ -1,25 +0,0 @@ -""" -Download GADM boundaries - -Reference ---------- -https://gadm.org/data.html -""" - - -rule download_gadm_levels: - output: - gpkg = "{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg" - shell: - """ - wget https://geodata.ucdavis.edu/gadm/gadm3.6/gadm36_levels_gpkg.zip \ - --output-document={wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip - unzip -o {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip \ - -d {wildcards.OUTPUT_DIR}/input/admin-boundaries - rm {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip - """ - -""" -Test with: -snakemake -c1 -- results/input/admin-boundaries/gadm36_levels.gpkg -""" \ No newline at end of file diff --git a/workflow/rules/download/osm.smk b/workflow/rules/download/osm.smk deleted file mode 100644 index d7d46cd2..00000000 --- a/workflow/rules/download/osm.smk +++ /dev/null @@ -1,20 +0,0 @@ -# Download the file specified in the config -import os -import re - - -rule download_osm: - output: - "{OUTPUT_DIR}/input/OSM/{DATASET}.osm.pbf", - run: - input_file = config["infrastructure_datasets"][wildcards.DATASET] - if re.match("^https?://", input_file): - os.system(f"wget {input_file} --output-document={output}") - else: - os.system("mkdir -p dirname {output}") - os.system(f"cp {input_file} {output}") - - """ - Test with: - snakemake --cores all results/input/OSM/tanzania-mini.osm.pbf - """ diff --git a/workflow/rules/download/wri-powerplants.smk b/workflow/rules/download/wri-powerplants.smk deleted file mode 100644 index 01d4ec93..00000000 --- a/workflow/rules/download/wri-powerplants.smk +++ /dev/null @@ -1,24 +0,0 @@ -""" -Download WRI powerplants database - -Reference ---------- -https://www.wri.org/research/global-database-power-plants -""" - - -rule download_powerplants: - output: - csv = "{OUTPUT_DIR}/input/powerplants/global_power_plant_database.csv" - shell: - """ - mkdir -p {wildcards.OUTPUT_DIR}/input/powerplants - cd {wildcards.OUTPUT_DIR}/input/powerplants - wget https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip - unzip -o global_power_plant_database_v_1_3.zip - """ - -""" -Test with: -snakemake -c1 -- results/input/powerplants/global_power_plant_database.csv -""" \ No newline at end of file diff --git a/workflow/rules/preprocess/IRIS.smk b/workflow/rules/preprocess/IRIS.smk deleted file mode 100644 index 4b69c002..00000000 --- a/workflow/rules/preprocess/IRIS.smk +++ /dev/null @@ -1,29 +0,0 @@ -rule parse_IRIS: - input: - csv_dir="{OUTPUT_DIR}/input/IRIS/iris-data/event_sets/{IRIS_SCENARIO}/" - output: - parquet="{OUTPUT_DIR}/storm_tracks/IRIS-{IRIS_SCENARIO}/{SAMPLE}/tracks.geoparquet" - script: - "../../scripts/preprocess/parse_IRIS.py" - -""" -Test with: -snakemake -c1 results/storm_tracks/IRIS_SSP1-2050/0/tracks.geoparquet -""" - - -rule slice_IRIS: - input: - global_tracks=rules.parse_IRIS.output.parquet, - grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json" - output: - sliced_tracks="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/IRIS-{IRIS_SCENARIO}/{SAMPLE}/tracks.geoparquet", - resources: - mem_mb=10000 # the global tracks file is fairly chunky - script: - "../../scripts/preprocess/slice_storm_tracks.py" - -""" -To test: -snakemake -c1 results/power/by_country/PRI/storms/IRIS-PRESENT/0/tracks.geoparquet -""" diff --git a/workflow/rules/preprocess/STORM.smk b/workflow/rules/preprocess/STORM.smk deleted file mode 100644 index abfdc79f..00000000 --- a/workflow/rules/preprocess/STORM.smk +++ /dev/null @@ -1,29 +0,0 @@ -rule parse_storm: - input: - csv_dir="{OUTPUT_DIR}/input/STORM/events/{STORM_MODEL}/raw" - output: - parquet="{OUTPUT_DIR}/storm_tracks/STORM-{STORM_MODEL}/{SAMPLE}/tracks.geoparquet" - script: - "../../scripts/preprocess/parse_STORM.py" - -""" -Test with: -snakemake -c1 results/storm_tracks/STORM-constant/0/tracks.geoparquet -""" - - -rule slice_storm: - input: - global_tracks="{OUTPUT_DIR}/storm_tracks/STORM-{STORM_MODEL}/{SAMPLE}/tracks.geoparquet", - grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json" - output: - sliced_tracks="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/STORM-{STORM_MODEL}/{SAMPLE}/tracks.geoparquet", - resources: - mem_mb=10000 # the global tracks file is fairly chunky - script: - "../../scripts/preprocess/slice_storm_tracks.py" - -""" -To test: -snakemake -c1 results/power/by_country/PRI/storms/STORM-constant/0/tracks.geoparquet -""" \ No newline at end of file diff --git a/workflow/rules/risk/.gitkeep b/workflow/rules/risk/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/workflow/scripts/download/scrape_url.py b/workflow/scripts/download/scrape_url.py deleted file mode 100644 index a0327af6..00000000 --- a/workflow/scripts/download/scrape_url.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import sys - -import requests - -try: - country_ident = snakemake.params["code_country"] # type: ignore - output_dir = snakemake.params["output_dir"] # type: ignore -except: - country_ident = sys.argv[1] - output_dir = sys.argv[2] - - -country_url = ( - "https://www.worldpop.org/rest/data/pop/cic2020_UNadj_100m?iso3=" + country_ident -) -country_data = requests.get( - country_url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"} -).json() -link = country_data["data"][0]["files"][0] - -fname = os.path.join( - output_dir, "input", "population", f"{country_ident}_ppp_2020_UNadj_constrained.tif" -) - -if not os.path.exists(fname): # check if file already exists - print(fname) - with open(fname, "wb") as fh: - data_request = requests.get(link, stream=True) - for chunk in data_request.iter_content(chunk_size=1024 * 1024 * 4): - fh.write(chunk) diff --git a/workflow/scripts/aggregate_to_admin_area.py b/workflow/transport-flood/aggregate_to_admin_area.py similarity index 100% rename from workflow/scripts/aggregate_to_admin_area.py rename to workflow/transport-flood/aggregate_to_admin_area.py diff --git a/workflow/rules/exposure/aggregate_to_admin_area.smk b/workflow/transport-flood/aggregate_to_admin_area.smk similarity index 94% rename from workflow/rules/exposure/aggregate_to_admin_area.smk rename to workflow/transport-flood/aggregate_to_admin_area.smk index bc403457..6106b58a 100644 --- a/workflow/rules/exposure/aggregate_to_admin_area.smk +++ b/workflow/transport-flood/aggregate_to_admin_area.smk @@ -8,7 +8,7 @@ rule aggregate_damages_to_admin_area: output: aggregated_data = "{OUTPUT_DIR}/direct_damages/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/{DIRECT_DAMAGE_TYPES}/{AGG_FUNC_SLUG}/{ADMIN_SLUG}/{SLICE_SLUG}.geoparquet", script: - "../../scripts/aggregate_to_admin_area.py" + "./aggregate_to_admin_area.py" """ Test with: @@ -37,7 +37,7 @@ rule concat_and_sum_aggregated_damages: output: joined = "{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/{DIRECT_DAMAGE_TYPES}/{AGG_FUNC_SLUG}/{ADMIN_SLUG}.geoparquet", script: - "../../scripts/concat_and_sum_slices.py" + "./concat_and_sum_slices.py" """ Test with: diff --git a/workflow/scripts/concat_and_sum_slices.py b/workflow/transport-flood/concat_and_sum_slices.py similarity index 100% rename from workflow/scripts/concat_and_sum_slices.py rename to workflow/transport-flood/concat_and_sum_slices.py diff --git a/workflow/scripts/direct_damages.py b/workflow/transport-flood/direct_damages.py similarity index 100% rename from workflow/scripts/direct_damages.py rename to workflow/transport-flood/direct_damages.py diff --git a/workflow/rules/exposure/flood_damages.smk b/workflow/transport-flood/flood_damages.smk similarity index 94% rename from workflow/rules/exposure/flood_damages.smk rename to workflow/transport-flood/flood_damages.smk index 8178dac0..c5aa4475 100644 --- a/workflow/rules/exposure/flood_damages.smk +++ b/workflow/transport-flood/flood_damages.smk @@ -19,7 +19,7 @@ rule direct_damages: # determine the hazard type from the hazard slug, e.g. flood, earthquake, storm hazard_type=lambda wildcards: config["hazard_types"][wildcards.HAZARD_SLUG.replace('hazard-', '')] script: - "../../scripts/direct_damages.py" + "./direct_damages.py" """ Test with: @@ -33,7 +33,7 @@ rule plot_damage_distributions: output: plots = directory("{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/damage_fraction_plots") script: - "../../scripts/plot_damage_distributions.py" + "./plot_damage_distributions.py" """ Test with: diff --git a/workflow/rules/exposure/network_raster_intersection.smk b/workflow/transport-flood/network_raster_intersection.smk similarity index 57% rename from workflow/rules/exposure/network_raster_intersection.smk rename to workflow/transport-flood/network_raster_intersection.smk index 34c5886a..1dfe21f7 100644 --- a/workflow/rules/exposure/network_raster_intersection.smk +++ b/workflow/transport-flood/network_raster_intersection.smk @@ -19,30 +19,9 @@ rule rasterise_osm_network: # TODO: investigate why this job is sometimes failing with a coredump from intersection.py retries: 3 script: - "../../scripts/intersection.py" + "../scripts/intersection.py" """ Test with: snakemake --cores all results/splits/tanzania-mini_filter-road/hazard-aqueduct-river/slice-0.geoparquet """ - - -rule rasterise_electricity_grid: - """ - Split electricity network edges on raster grid - Assign raster indicies to edges - """ - input: - network="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/edges.geoparquet", - tif_paths=["{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/wind_grid.tiff"], - params: - copy_raster_values=False, - output: - geoparquet="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/edges_split.geoparquet", - script: - "../../scripts/intersection.py" - -""" -Test with: -snakemake --cores 1 results/power/by_country/PRI/exposure/edges_split.geoparquet -""" diff --git a/workflow/scripts/plot_damage_distributions.py b/workflow/transport-flood/plot_damage_distributions.py similarity index 100% rename from workflow/scripts/plot_damage_distributions.py rename to workflow/transport-flood/plot_damage_distributions.py diff --git a/workflow/rules/preprocess/create_bbox_extracts.smk b/workflow/transport/create_bbox_extracts.smk similarity index 94% rename from workflow/rules/preprocess/create_bbox_extracts.smk rename to workflow/transport/create_bbox_extracts.smk index c0ad8aae..8c03afbb 100644 --- a/workflow/rules/preprocess/create_bbox_extracts.smk +++ b/workflow/transport/create_bbox_extracts.smk @@ -13,7 +13,7 @@ rule create_bbox_extracts: n=range(config["slice_count"]), ), script: - "../../scripts/prepare-extracts.py" + "./prepare-extracts.py" """ diff --git a/workflow/transport/create_network.smk b/workflow/transport/create_network.smk new file mode 100644 index 00000000..7b07d404 --- /dev/null +++ b/workflow/transport/create_network.smk @@ -0,0 +1,27 @@ + + +rule create_transport_network: + """ + Take .geoparquet OSM files and output files of cleaned network nodes and edges + """ + input: + nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_nodes.geoparquet", + edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_edges.geoparquet", + admin="{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg", + output: + nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_nodes.geoparquet", + edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_edges.geoparquet" + params: + # determine the network type from the filter, e.g. road, rail + network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''), + # pass in the slice number so we can label edges and nodes with their slice + # edge and node IDs should be unique across all slices + slice_number=lambda wildcards: int(wildcards.SLICE_SLUG.replace('slice-', '')) + script: + # template the path string with a value from params (can't execute .replace in `script` context) + "./create_{params.network_type}_network.py" + +""" +Test with: +snakemake --cores all results/geoparquet/tanzania-mini_filter-road/processed/slice-0_edges.geoparquet +""" diff --git a/workflow/scripts/create_overall_bbox.py b/workflow/transport/create_overall_bbox.py similarity index 100% rename from workflow/scripts/create_overall_bbox.py rename to workflow/transport/create_overall_bbox.py diff --git a/workflow/rules/preprocess/create_overall_bbox.smk b/workflow/transport/create_overall_bbox.smk similarity index 87% rename from workflow/rules/preprocess/create_overall_bbox.smk rename to workflow/transport/create_overall_bbox.smk index 4bc7b99f..a0be6690 100644 --- a/workflow/rules/preprocess/create_overall_bbox.smk +++ b/workflow/transport/create_overall_bbox.smk @@ -5,7 +5,7 @@ rule create_overall_bbox: output: bbox = "{OUTPUT_DIR}/json/{DATASET}.json", script: - "../../scripts/create_overall_bbox.py" + "./create_overall_bbox.py" """ Test with: diff --git a/workflow/scripts/transport/create_rail_network.py b/workflow/transport/create_rail_network.py similarity index 100% rename from workflow/scripts/transport/create_rail_network.py rename to workflow/transport/create_rail_network.py diff --git a/workflow/scripts/transport/create_road_network.py b/workflow/transport/create_road_network.py similarity index 100% rename from workflow/scripts/transport/create_road_network.py rename to workflow/transport/create_road_network.py diff --git a/workflow/scripts/join_data.py b/workflow/transport/join_data.py similarity index 100% rename from workflow/scripts/join_data.py rename to workflow/transport/join_data.py diff --git a/workflow/rules/exposure/join_data.smk b/workflow/transport/join_data.smk similarity index 64% rename from workflow/rules/exposure/join_data.smk rename to workflow/transport/join_data.smk index 96816442..a1c2c1d6 100644 --- a/workflow/rules/exposure/join_data.smk +++ b/workflow/transport/join_data.smk @@ -1,3 +1,30 @@ +rule join_data: + """ + Take split .geoparquet files and output a single, unified .geoparquet file + for each dataset-hazard combination + """ + input: + lambda wildcards: expand( + os.path.join( + "{OUTPUT_DIR}", + "splits", + "{DATASET}_{FILTER_SLUG}", + "{HAZARD_SLUG}", + "slice-{i}.geoparquet", + ), + **wildcards, + i=range(config["slice_count"]) + ), + output: + "{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}_{HAZARD_SLUG}.geoparquet", + script: + "./join_data.py" + +""" +Test with: +snakemake --cores all results/tanzania-mini_filter-road_hazard-aqueduct-river.geoparquet +""" + # Take split .geoparquet files and output a single, unified .geoparquet file # for each dataset-hazard combination @@ -18,7 +45,7 @@ rule join_exposure: output: "{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}_{HAZARD_SLUG}.geoparquet", script: - "../../scripts/join_data.py" + "./join_data.py" """ Test with: @@ -43,7 +70,7 @@ rule join_direct_damages: output: joined = "{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/{HAZARD_SLUG}/damage_{DIRECT_DAMAGE_TYPES}.geoparquet", script: - "../../scripts/join_data.py" + "./join_data.py" """ Test with: diff --git a/workflow/scripts/join_network.py b/workflow/transport/join_network.py similarity index 100% rename from workflow/scripts/join_network.py rename to workflow/transport/join_network.py diff --git a/workflow/rules/preprocess/join_network.smk b/workflow/transport/join_network.smk similarity index 96% rename from workflow/rules/preprocess/join_network.smk rename to workflow/transport/join_network.smk index 83dc043c..e2f140c6 100644 --- a/workflow/rules/preprocess/join_network.smk +++ b/workflow/transport/join_network.smk @@ -19,7 +19,7 @@ rule join_network: nodes="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/nodes.geoparquet", edges="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/edges.geoparquet" script: - "../../scripts/join_network.py" + "./join_network.py" """ Test with: diff --git a/workflow/rules/preprocess/filter_osm_data.smk b/workflow/transport/openstreetmap.smk similarity index 56% rename from workflow/rules/preprocess/filter_osm_data.smk rename to workflow/transport/openstreetmap.smk index 186d4ed5..abed9464 100644 --- a/workflow/rules/preprocess/filter_osm_data.smk +++ b/workflow/transport/openstreetmap.smk @@ -1,3 +1,25 @@ +# Download the file specified in the config +import os +import re + + +rule download_osm: + output: + "{OUTPUT_DIR}/input/OSM/{DATASET}.osm.pbf", + run: + input_file = config["infrastructure_datasets"][wildcards.DATASET] + if re.match("^https?://", input_file): + os.system(f"wget {input_file} --output-document={output}") + else: + os.system("mkdir -p dirname {output}") + os.system(f"cp {input_file} {output}") + + """ + Test with: + snakemake --cores all results/input/OSM/tanzania-mini.osm.pbf + """ + + # Take a .osm.pbf file and return a .osm.pbf file with a subset of the information rule filter_osm_data: input: diff --git a/workflow/rules/preprocess/osm_to_geoparquet.smk b/workflow/transport/osm_to_geoparquet.smk similarity index 94% rename from workflow/rules/preprocess/osm_to_geoparquet.smk rename to workflow/transport/osm_to_geoparquet.smk index f098036e..6157b988 100644 --- a/workflow/rules/preprocess/osm_to_geoparquet.smk +++ b/workflow/transport/osm_to_geoparquet.smk @@ -9,7 +9,7 @@ rule osm_to_geoparquet: edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_edges.geoparquet", nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_nodes.geoparquet", script: - "../../scripts/osm_to_pq.py" + "./osm_to_pq.py" """ diff --git a/workflow/scripts/osm_to_pq.py b/workflow/transport/osm_to_pq.py similarity index 100% rename from workflow/scripts/osm_to_pq.py rename to workflow/transport/osm_to_pq.py diff --git a/workflow/scripts/prepare-extracts.py b/workflow/transport/prepare-extracts.py similarity index 100% rename from workflow/scripts/prepare-extracts.py rename to workflow/transport/prepare-extracts.py diff --git a/workflow/rules/preprocess/slice.smk b/workflow/transport/slice.smk similarity index 100% rename from workflow/rules/preprocess/slice.smk rename to workflow/transport/slice.smk diff --git a/workflow/scripts/transport/utils.py b/workflow/transport/utils.py similarity index 100% rename from workflow/scripts/transport/utils.py rename to workflow/transport/utils.py diff --git a/workflow/rules/preprocess/IBTrACS.smk b/workflow/tropical-cyclone/IBTrACS.smk similarity index 61% rename from workflow/rules/preprocess/IBTrACS.smk rename to workflow/tropical-cyclone/IBTrACS.smk index 9408a112..80e23367 100644 --- a/workflow/rules/preprocess/IBTrACS.smk +++ b/workflow/tropical-cyclone/IBTrACS.smk @@ -1,10 +1,24 @@ +rule download_IBTrACS: + output: + "{OUTPUT_DIR}/input/IBTrACS/raw/v4.csv" + shell: + """ + wget --output-document {output} \ + https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r00/access/csv/ibtracs.ALL.list.v04r00.csv + """ + +""" +Test with: +snakemake -c1 results/input/IBTrACS/v4.csv +""" + rule parse_ibtracs: input: ibtracs_csv = "{OUTPUT_DIR}/input/IBTrACS/raw/v4.csv" output: ibtracs_parquet = "{OUTPUT_DIR}/storm_tracks/IBTrACS/0/tracks.geoparquet" script: - "../../scripts/preprocess/parse_IBTrACS.py" + "./parse_IBTrACS.py" """ To test: @@ -19,7 +33,7 @@ rule slice_ibtracs: output: sliced_tracks="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/IBTrACS/0/tracks.geoparquet", script: - "../../scripts/preprocess/slice_storm_tracks.py" + "./slice_storm_tracks.py" """ To test: diff --git a/workflow/tropical-cyclone/IRIS.smk b/workflow/tropical-cyclone/IRIS.smk new file mode 100644 index 00000000..e5944463 --- /dev/null +++ b/workflow/tropical-cyclone/IRIS.smk @@ -0,0 +1,62 @@ +""" +Download the Imperial tropical cyclone storm tracks +""" + +rule download_IRIS: + """ + As of 20230626, this data is not publically available. You will need + appropriate keys to access the files on the OUCE file store. + """ + output: + zip_file = "{OUTPUT_DIR}/input/IRIS/archive.zip" + shell: + """ + mkdir -p $(dirname {output.zip_file}) + scp /ouce-home/projects/mistral/iris/iris-data.zip {output.zip_file} + """ + +""" +Test with: +snakemake -n -c1 -- results/input/IRIS/archive.zip +""" + + +rule extract_IRIS: + input: + zip_file = rules.download_IRIS.output.zip_file + output: + unzipped_dir = directory("{OUTPUT_DIR}/input/IRIS/iris-data/") + shell: + """ + unzip {input.zip_file} -d $(dirname {output.unzipped_dir}) + """ + +rule parse_IRIS: + input: + csv_dir="{OUTPUT_DIR}/input/IRIS/iris-data/event_sets/{IRIS_SCENARIO}/" + output: + parquet="{OUTPUT_DIR}/storm_tracks/IRIS-{IRIS_SCENARIO}/{SAMPLE}/tracks.geoparquet" + script: + "./parse_IRIS.py" + +""" +Test with: +snakemake -c1 results/storm_tracks/IRIS_SSP1-2050/0/tracks.geoparquet +""" + + +rule slice_IRIS: + input: + global_tracks=rules.parse_IRIS.output.parquet, + grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json" + output: + sliced_tracks="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/IRIS-{IRIS_SCENARIO}/{SAMPLE}/tracks.geoparquet", + resources: + mem_mb=10000 # the global tracks file is fairly chunky + script: + "./slice_storm_tracks.py" + +""" +To test: +snakemake -c1 results/power/by_country/PRI/storms/IRIS-PRESENT/0/tracks.geoparquet +""" diff --git a/workflow/rules/download/STORM.smk b/workflow/tropical-cyclone/STORM.smk similarity index 89% rename from workflow/rules/download/STORM.smk rename to workflow/tropical-cyclone/STORM.smk index 81c3b1c1..71a2e368 100644 --- a/workflow/rules/download/STORM.smk +++ b/workflow/tropical-cyclone/STORM.smk @@ -48,6 +48,36 @@ Test with: snakemake -c1 results/input/STORM/events/HadGEM-GC31-HM/archive.zip """ +rule parse_storm: + input: + csv_dir="{OUTPUT_DIR}/input/STORM/events/{STORM_MODEL}/raw" + output: + parquet="{OUTPUT_DIR}/storm_tracks/STORM-{STORM_MODEL}/{SAMPLE}/tracks.geoparquet" + script: + "./parse_STORM.py" + +""" +Test with: +snakemake -c1 results/storm_tracks/STORM-constant/0/tracks.geoparquet +""" + + +rule slice_storm: + input: + global_tracks="{OUTPUT_DIR}/storm_tracks/STORM-{STORM_MODEL}/{SAMPLE}/tracks.geoparquet", + grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json" + output: + sliced_tracks="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/STORM-{STORM_MODEL}/{SAMPLE}/tracks.geoparquet", + resources: + mem_mb=10000 # the global tracks file is fairly chunky + script: + "./slice_storm_tracks.py" + +""" +To test: +snakemake -c1 results/power/by_country/PRI/storms/STORM-constant/0/tracks.geoparquet +""" + rule extract_storm_event: """ Unzip a storm file for a basin we are interested in diff --git a/workflow/rules/preprocess/join_data.smk b/workflow/tropical-cyclone/join_tracks.smk similarity index 60% rename from workflow/rules/preprocess/join_data.smk rename to workflow/tropical-cyclone/join_tracks.smk index 517ab23c..406c05c3 100644 --- a/workflow/rules/preprocess/join_data.smk +++ b/workflow/tropical-cyclone/join_tracks.smk @@ -1,31 +1,3 @@ -rule join_data: - """ - Take split .geoparquet files and output a single, unified .geoparquet file - for each dataset-hazard combination - """ - input: - lambda wildcards: expand( - os.path.join( - "{OUTPUT_DIR}", - "splits", - "{DATASET}_{FILTER_SLUG}", - "{HAZARD_SLUG}", - "slice-{i}.geoparquet", - ), - **wildcards, - i=range(config["slice_count"]) - ), - output: - "{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}_{HAZARD_SLUG}.geoparquet", - script: - "../../scripts/join_data.py" - -""" -Test with: -snakemake --cores all results/tanzania-mini_filter-road_hazard-aqueduct-river.geoparquet -""" - - def all_storm_tracks_by_sample(wildcards) -> list[str]: """ Return a list of every per-sample tracks file for a given STORM_SET. @@ -60,4 +32,4 @@ rule concat_storm_tracks: """ Test with: snakemake -c1 -- results/storm_tracks/STORM-constant/tracks.geoparquet -""" \ No newline at end of file +""" diff --git a/workflow/scripts/preprocess/parse_IBTrACS.py b/workflow/tropical-cyclone/parse_IBTrACS.py similarity index 100% rename from workflow/scripts/preprocess/parse_IBTrACS.py rename to workflow/tropical-cyclone/parse_IBTrACS.py diff --git a/workflow/scripts/preprocess/parse_IRIS.py b/workflow/tropical-cyclone/parse_IRIS.py similarity index 100% rename from workflow/scripts/preprocess/parse_IRIS.py rename to workflow/tropical-cyclone/parse_IRIS.py diff --git a/workflow/scripts/preprocess/parse_STORM.py b/workflow/tropical-cyclone/parse_STORM.py similarity index 100% rename from workflow/scripts/preprocess/parse_STORM.py rename to workflow/tropical-cyclone/parse_STORM.py diff --git a/workflow/scripts/preprocess/slice_storm_tracks.py b/workflow/tropical-cyclone/slice_storm_tracks.py similarity index 100% rename from workflow/scripts/preprocess/slice_storm_tracks.py rename to workflow/tropical-cyclone/slice_storm_tracks.py diff --git a/workflow/rules/storm_workflow_global_variables.smk b/workflow/tropical-cyclone/tc_workflow_global_variables.smk similarity index 100% rename from workflow/rules/storm_workflow_global_variables.smk rename to workflow/tropical-cyclone/tc_workflow_global_variables.smk diff --git a/workflow/rules/exposure/wind_fields.smk b/workflow/tropical-cyclone/wind_fields.smk similarity index 98% rename from workflow/rules/exposure/wind_fields.smk rename to workflow/tropical-cyclone/wind_fields.smk index f153fce3..01788f06 100644 --- a/workflow/rules/exposure/wind_fields.smk +++ b/workflow/tropical-cyclone/wind_fields.smk @@ -148,7 +148,7 @@ rule create_surface_roughness_raster: output: surface_roughness = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/surface_roughness.tiff" script: - "../../scripts/intersect/surface_roughness.py" + "./wind_fields/surface_roughness.py" """ Test with: @@ -166,7 +166,7 @@ rule create_downscaling_factors: downscale_factors="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/downscale_factors.npy", downscale_factors_plot="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/downscale_factors.png", script: - "../../scripts/intersect/wind_downscaling_factors.py" + "./wind_fields/wind_downscaling_factors.py" """ To test: @@ -220,7 +220,7 @@ rule estimate_wind_fields: plot_dir=directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/{STORM_SET}/{SAMPLE}/plots/"), wind_speeds="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/{STORM_SET}/{SAMPLE}/max_wind_field.nc", script: - "../../scripts/intersect/estimate_wind_fields.py" + "./wind_fields/estimate_wind_fields.py" """ To test: @@ -251,7 +251,7 @@ rule concat_wind_fields_over_sample: output: concat="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/{STORM_SET}/max_wind_field.nc", script: - "../../scripts/exposure/concat_wind_over_sample.py" + "./wind_fields/concat_wind_over_sample.py" """ To test: diff --git a/workflow/scripts/exposure/concat_wind_over_sample.py b/workflow/tropical-cyclone/wind_fields/concat_wind_over_sample.py similarity index 100% rename from workflow/scripts/exposure/concat_wind_over_sample.py rename to workflow/tropical-cyclone/wind_fields/concat_wind_over_sample.py diff --git a/workflow/scripts/intersect/estimate_wind_fields.py b/workflow/tropical-cyclone/wind_fields/estimate_wind_fields.py similarity index 100% rename from workflow/scripts/intersect/estimate_wind_fields.py rename to workflow/tropical-cyclone/wind_fields/estimate_wind_fields.py diff --git a/workflow/scripts/intersect/surface_roughness.py b/workflow/tropical-cyclone/wind_fields/surface_roughness.py similarity index 100% rename from workflow/scripts/intersect/surface_roughness.py rename to workflow/tropical-cyclone/wind_fields/surface_roughness.py diff --git a/workflow/scripts/intersect/wind_downscaling_factors.py b/workflow/tropical-cyclone/wind_fields/wind_downscaling_factors.py similarity index 100% rename from workflow/scripts/intersect/wind_downscaling_factors.py rename to workflow/tropical-cyclone/wind_fields/wind_downscaling_factors.py From a38c394bec8ed7c516a7ab2fe12b40e15322320d Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 6 Feb 2024 11:07:36 +0000 Subject: [PATCH 2/6] Drop unused scripts, lift to top-level directory --- {workflow/scripts => scripts}/intersection.py | 0 {workflow/scripts => scripts}/pq_to_gpkg.py | 0 .../scripts => scripts}/unpickle_plot.py | 0 .../power-tc/network_raster_intersection.smk | 2 +- workflow/scripts/join_edges.py | 108 ---------------- workflow/scripts/make_exposure_img.py | 105 --------------- workflow/scripts/make_exposure_tif.py | 120 ------------------ .../network_raster_intersection.smk | 2 +- 8 files changed, 2 insertions(+), 335 deletions(-) rename {workflow/scripts => scripts}/intersection.py (100%) rename {workflow/scripts => scripts}/pq_to_gpkg.py (100%) rename {workflow/scripts => scripts}/unpickle_plot.py (100%) delete mode 100644 workflow/scripts/join_edges.py delete mode 100644 workflow/scripts/make_exposure_img.py delete mode 100644 workflow/scripts/make_exposure_tif.py diff --git a/workflow/scripts/intersection.py b/scripts/intersection.py similarity index 100% rename from workflow/scripts/intersection.py rename to scripts/intersection.py diff --git a/workflow/scripts/pq_to_gpkg.py b/scripts/pq_to_gpkg.py similarity index 100% rename from workflow/scripts/pq_to_gpkg.py rename to scripts/pq_to_gpkg.py diff --git a/workflow/scripts/unpickle_plot.py b/scripts/unpickle_plot.py similarity index 100% rename from workflow/scripts/unpickle_plot.py rename to scripts/unpickle_plot.py diff --git a/workflow/power-tc/network_raster_intersection.smk b/workflow/power-tc/network_raster_intersection.smk index 574de01a..cf636fb5 100644 --- a/workflow/power-tc/network_raster_intersection.smk +++ b/workflow/power-tc/network_raster_intersection.smk @@ -15,7 +15,7 @@ rule rasterise_electricity_grid: output: geoparquet="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/edges_split.geoparquet", script: - "../scripts/intersection.py" + "../../scripts/intersection.py" """ Test with: diff --git a/workflow/scripts/join_edges.py b/workflow/scripts/join_edges.py deleted file mode 100644 index 6d003914..00000000 --- a/workflow/scripts/join_edges.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Takes a list of geoparquet files containing edges and join corresponding -geodataframes. The geodataframes must have the following columns: - - start_node_longitude start_node_latitude start_node_reference end_node_longitude end_node_latitude end_node_reference geometry - -example join: - - file1.geoparquet - id obs1 obs2 start_node_longitude start_node_latitude start_node_reference end_node_longitude end_node_latitude end_node_reference geometry - 0 a b 0 0 0 1 1 1 geom0 - 1 c d 1 1 1 2 1 2 geom1 - - file2.geoparquet - id obs1 obs2 start_node_longitude start_node_latitude start_node_reference end_node_longitude end_node_latitude end_node_reference geometry - 0 A B 2 2 2 4 3 3 GEOM0 - 1 C D 4 3 3 4 4 4 GEOM1 - - joined.geoparquet - id obs1 obs2 start_node_longitude start_node_latitude start_node_reference end_node_longitude end_node_latitude end_node_reference geometry - 0 a b 0 0 0 1 1 1 geom0 - 1 c d 1 1 1 2 1 2 geom1 - 0 A B 2 2 2 4 3 3 GEOM0 - 1 C D 4 3 3 4 4 4 GEOM1 - -Usage: - python join_edges [FILE] [output] - -Example: - python join_edges file1.geoparquet file2.geoparquet joined.geoparquet -""" -import logging -import sys -import warnings - -import geopandas as gpd -import numpy as np -import pandas - -from open_gira.io import concat_geoparquet - - -def add_custom_node_references(base): - """ - When converting to .geoparquet we added nodes at the bounding box edges. - These nodes have no reference. We need to make it easy to identify nodes by - ensuring that nodes in the same location have the same reference. We'll - make it easy on ourselves by giving our inserted nodes negative reference - numbers. - """ - - # Find start nodes with no reference - na_start_nodes = ( - base[base.start_node_reference.isna()][ - ["start_node_longitude", "start_node_latitude"] - ] - .copy() - .rename(columns={"start_node_longitude": "lon", "start_node_latitude": "lat"}) - ) - # and end nodes with no reference - na_end_nodes = ( - base[base.end_node_reference.isna()][ - ["end_node_longitude", "end_node_latitude"] - ] - .copy() - .rename(columns={"end_node_longitude": "lon", "end_node_latitude": "lat"}) - ) - # stitch them together, dropping any duplicate coordinates - nodes = pandas.concat([na_start_nodes, na_end_nodes]).drop_duplicates() - # give them ids - nodes_n = len(nodes) - nodes["node_reference"] = np.arange(nodes_n)[::-1] - nodes_n - - # merge on against start nodes and fill na values - base = base.merge( - nodes, - left_on=["start_node_longitude", "start_node_latitude"], - right_on=["lon", "lat"], - how="left", - ).drop(columns=["lon", "lat"]) - base.start_node_reference = base.start_node_reference.fillna(base.node_reference) - base = base.drop(columns="node_reference") - - # merge on against end nodes and fill na values - base = base.merge( - nodes, - left_on=["end_node_longitude", "end_node_latitude"], - right_on=["lon", "lat"], - how="left", - ).drop(columns=["lon", "lat"]) - base.end_node_reference = base.end_node_reference.fillna(base.node_reference) - base = base.drop(columns="node_reference") - - return base - - -if __name__ == "__main__": - try: - slice_files = snakemake.input # type: ignore - output_file = snakemake.output[0] # type: ignore - except NameError: - slice_files = sys.argv[1:-1] - output_file = sys.argv[-1] - - warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*") - - concatenated = concat_geoparquet(slice_files) - concatenated = add_custom_node_references(concatenated) - concatenated.to_parquet(output_file) diff --git a/workflow/scripts/make_exposure_img.py b/workflow/scripts/make_exposure_img.py deleted file mode 100644 index b77a49b0..00000000 --- a/workflow/scripts/make_exposure_img.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -""" -Create an image showing exposed road length by combining exposure tifs, -coastline data, and administrative boundary information. -""" -import glob -import logging -import os -import re -import sys - -import geopandas as gp -import matplotlib.pyplot as plt -import rasterio.plot -import shapely.geometry as shape - -if __name__ == "__main__": - logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO) - - try: - hazard_dir = snakemake.input["hazard_dir"] # type: ignore - coastline = os.path.join( - snakemake.input["coastline"], snakemake.params["coastline"] # type: ignore - ) - boundaries = os.path.join( - snakemake.input["boundaries"], snakemake.params["boundaries"] # type: ignore - ) - output_dir = snakemake.output[0] # type: ignore - - try: - opts_dict = snakemake.config["exposure_tifs"]["plot"] # type: ignore - except KeyError: - opts_dict = {} - - except NameError: - print(sys.argv) - (hazard_dir, coastline, boundaries, output_dir, opts_dict) = sys.argv[1:] - # hazard_dir = '../../results/exposure/tanzania-latest_filter-road/hazard-aqueduct-river/' - # coastline = '../../results/input/coastlines/ne_10m_ocean/ne_10m_ocean.shp' - # boundaries = '../../results/input/admin-boundaries/ne_50m/ne_50m_admin_0_countries.shp' - # output_dir = '../../results/exposure/tanzania-latest_filter-road/hazard-aqueduct-river/img/' - # opts_dict = {} - - # Load up the options from the opts_dict - def opt(s, default=None): - return opts_dict[s] if s in opts_dict.keys() else default - - raster_opts = opt("raster", {"cmap": "Reds"}) - coastline_opts = opt("coastline", {"facecolor": "#87cefa"}) - boundary_opts = opt("boundary", {"edgecolor": "#000000"}) - - logging.info(f"Saving images to {output_dir}") - if not os.path.exists(output_dir): - os.makedirs(output_dir) - logging.info(f"Created directory {output_dir}") - - logging.debug("Preparing coastline data.") - coast = gp.read_file(coastline) - - logging.debug("Preparing administrative boundary data.") - bounds = gp.read_file(boundaries) - - hazard_files = glob.glob(os.path.join(hazard_dir, "*.tif")) - logging.info(f"Found {len(hazard_files)} .tif files in {hazard_dir}") - - for hazard_tif in hazard_files: - with rasterio.open(hazard_tif) as hazard: - # To plot everything in the same way and in the same place - # we need to make sure everything uses the same - # Coordinate Reference System (CRS). - # The CRS we'll use as the boss is the hazard raster file CRS. - plt_crs = hazard.crs.to_epsg() - band = hazard.read() - hazard_rect = shape.Polygon( - [ - (hazard.bounds.left, hazard.bounds.top), - (hazard.bounds.right, hazard.bounds.top), - (hazard.bounds.right, hazard.bounds.bottom), - (hazard.bounds.left, hazard.bounds.bottom), - ] - ) - - # Make axes - fig, ax = plt.subplots(dpi=300) - - # Plot raster - logging.debug("Plotting raster data.") - rasterio.plot.show(hazard, ax=ax, zorder=1, **raster_opts) - - # Plot coastline - coast_tmp = coast.to_crs(plt_crs) - coast_tmp = coast_tmp.geometry.clip(hazard_rect) - coast_tmp.plot(ax=ax, edgecolor="none", zorder=2, **coastline_opts) - - # Plot boundaries - bounds_tmp = bounds.to_crs(plt_crs) - bounds_tmp = bounds_tmp.geometry.clip(hazard_rect) - bounds_tmp.plot(ax=ax, facecolor="none", zorder=3, **boundary_opts) - - output_path = os.path.join( - output_dir, re.sub("\\.tiff?$", ".png", os.path.basename(hazard_tif)) - ) - logging.info(f"Saving {output_path}") - plt.savefig(output_path) diff --git a/workflow/scripts/make_exposure_tif.py b/workflow/scripts/make_exposure_tif.py deleted file mode 100644 index e65d780a..00000000 --- a/workflow/scripts/make_exposure_tif.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -""" -Create a raster file (.tif) from a .geoparquet file -by thresholding cells by flood depth and grading by -length of road within the cell. -""" -import logging -import os -import sys - -import geopandas as gp -import numpy as np -import rasterio -from rasterio.enums import Resampling - -if __name__ == "__main__": - logging.basicConfig(format="%(asctime)s %(process)d %(filename)s %(message)s", level=logging.INFO) - # move to settings later - epsg = 3857 - try: - db_file = snakemake.input["geoparquet"] # type: ignore - hazard = snakemake.input["hazard"][0] # type: ignore - output_path = snakemake.output[0] # type: ignore - opts_dict = snakemake.config["exposure_tifs"] # type: ignore - except NameError: - print(sys.argv) - (db_file, hazard, output_path, opts_dict) = sys.argv[1:] - # db_file = '../../results/tanzania-mini_filter-road_hazard-aqueduct-river.geoparquet' - # hazard = '../../results/input/hazard-aqueduct-river/tanzania-mini/inunriver_rcp4p5_MIROC-ESM-CHEM_2030_rp00100.tif' - # output_path = '../../results/exposure/tanzania-mini/hazard-aqueduct-river' - # opts_dict = {} - - # Load up the options from the opts_dict - threshold = ( - opts_dict["exposure_threshold"] - if "exposure_threshold" in opts_dict.keys() - else 0.5 - ) - scaling_factor = ( - opts_dict["scaling_factor"] if "scaling_factor" in opts_dict.keys() else 0.1 - ) - resampling_mode = ( - opts_dict["resampling_mode"] - if "resampling_mode" in opts_dict.keys() - else "bilinear" - ) - resampling_mode = Resampling[resampling_mode] - - output_path = os.path.join(output_path, f"exposure_{os.path.basename(hazard)}") - tmp_file_path = output_path.replace(".tif", ".tmp.tif") - logging.info(f"Generating raster {output_path} from {db_file}") - if not os.path.exists(os.path.dirname(output_path)): - os.makedirs(os.path.dirname(output_path)) - logging.info(f"Created directory {os.path.dirname(output_path)}") - db = gp.read_parquet(db_file) - col, ext = os.path.splitext(os.path.basename(hazard)) - db = ( - db.rename({col: "hazard"}, axis=1) - .query(f"hazard >= {float(threshold)}") - .assign( - cell_str=lambda x: x["cell_index"], - cell_x=lambda x: [y[0] for y in x["cell_index"]], - cell_y=lambda x: [y[1] for y in x["cell_index"]], - ) - .astype({"cell_str": "str"}) # wasn't working well without cell_str to group by - .filter(["cell_x", "cell_y", "cell_str", "length_km"]) - .groupby(["cell_str", "cell_x", "cell_y"]) - .agg({"length_km": "sum"}) - .reset_index() # ungroup - ) - # Copy hazard file to the output destination, editing the band information - with rasterio.open(hazard) as h_file: - with rasterio.open( - tmp_file_path, - "w", - driver=h_file.driver, - height=h_file.height, - width=h_file.width, - count=1, - dtype=db["length_km"].dtype, - crs=h_file.crs, - transform=h_file.transform, - ) as tmp_file: - # Zero the data and then fill in non-missing values - # If a faster approach is needed we can probably find one using clever masking - # tricks to write everything at once - data = np.zeros((h_file.height, h_file.width), db["length_km"].dtype) - for i, r in db.iterrows(): - data[r["cell_y"], r["cell_x"]] = r["length_km"] - tmp_file.write(data, 1) - - with rasterio.open(tmp_file_path) as tmp_file: - scale_width = int(tmp_file.width * scaling_factor) - scale_height = int(tmp_file.height * scaling_factor) - - # scale image transform - transform = tmp_file.transform * tmp_file.transform.scale( - (tmp_file.width / scale_width), (tmp_file.height / scale_height) - ) - # Resampling - with rasterio.open( - f"{output_path}", - "w", - driver=h_file.driver, - height=scale_height, - width=scale_width, - count=1, - dtype=db["length_km"].dtype, - crs=h_file.crs, - transform=transform, - ) as out_file: - # resample data to target shape - data = tmp_file.read( - out_shape=(tmp_file.count, scale_height, scale_width), - resampling=resampling_mode, - ) - out_file.write(data) - - os.remove(tmp_file_path) diff --git a/workflow/transport-flood/network_raster_intersection.smk b/workflow/transport-flood/network_raster_intersection.smk index 1dfe21f7..ee1ca324 100644 --- a/workflow/transport-flood/network_raster_intersection.smk +++ b/workflow/transport-flood/network_raster_intersection.smk @@ -19,7 +19,7 @@ rule rasterise_osm_network: # TODO: investigate why this job is sometimes failing with a coredump from intersection.py retries: 3 script: - "../scripts/intersection.py" + "../../scripts/intersection.py" """ Test with: From ff9972bfe17fa65977edaa1ccaff9c278bad2331 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 6 Feb 2024 11:07:47 +0000 Subject: [PATCH 3/6] Update ignores --- .gitignore | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index b1dbd4cb..6cd74684 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/* venv/* src/snkit +build # Data data/* @@ -24,10 +25,7 @@ tester/* # IDE .idea *.swp +.vscode # Rendered documentation docs/book/ - -validation/plots/* -validation/holland_done_ratio -validation/wind_model_comparison From 115933d606f119afdd78f22016157c84b09e07dc Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 6 Feb 2024 11:14:00 +0000 Subject: [PATCH 4/6] Drop unused plotting rules for power-tc workflow --- workflow/Snakefile | 6 +- .../power-tc/analyse/aggregate_levels.smk | 33 -- workflow/power-tc/analyse/common_functions.py | 216 ---------- .../power-tc/analyse/country_pair_matrix.smk | 25 -- .../analyse/empirical_distribution.smk | 26 -- .../power-tc/analyse/extract_percentile.smk | 27 -- .../power-tc/analyse/power_analyse_all.smk | 14 - .../power-tc/analyse/select_percentile.py | 98 ----- .../analyse/storm_aggregate_levels.py | 170 -------- .../analyse/storm_distribution_empirical.py | 157 -------- ...m_distribution_empirical_country_matrix.py | 170 -------- .../storm_distribution_empirical_geo.py | 230 ----------- workflow/power-tc/analyse/target_analysis.smk | 29 -- .../analyse/transmission_aggregate.py | 373 ------------------ .../analyse/transmission_aggregate.smk | 40 -- .../customers_affected_by_storm.smk | 0 .../{plot => map}/target_disruption.smk | 0 .../{analyse => }/network_components.py | 0 .../{analyse => }/network_components.smk | 0 workflow/power-tc/plot/EAD_EACA-finder.py | 85 ---- workflow/power-tc/plot/cross_correlation.py | 78 ---- workflow/power-tc/plot/diff_agg.py | 76 ---- .../power-tc/plot/diff_cross_correlation.py | 95 ----- workflow/power-tc/plot/fig_CDFs.smk | 65 --- workflow/power-tc/plot/fig_EACA.smk | 111 ------ workflow/power-tc/plot/fig_EAD.smk | 276 ------------- workflow/power-tc/plot/fig_EAD_EACA.smk | 43 -- .../power-tc/plot/fig_cross_correlation.smk | 58 --- workflow/power-tc/plot/fig_initial.py | 29 -- workflow/power-tc/plot/fig_initial.smk | 33 -- workflow/power-tc/plot/fig_master.smk | 24 -- workflow/power-tc/plot/mean_agg.py | 52 --- workflow/power-tc/plot/plot_together.py | 112 ------ workflow/power-tc/plot/plotter.py | 119 ------ 34 files changed, 3 insertions(+), 2867 deletions(-) delete mode 100644 workflow/power-tc/analyse/aggregate_levels.smk delete mode 100644 workflow/power-tc/analyse/common_functions.py delete mode 100644 workflow/power-tc/analyse/country_pair_matrix.smk delete mode 100644 workflow/power-tc/analyse/empirical_distribution.smk delete mode 100644 workflow/power-tc/analyse/extract_percentile.smk delete mode 100644 workflow/power-tc/analyse/power_analyse_all.smk delete mode 100644 workflow/power-tc/analyse/select_percentile.py delete mode 100644 workflow/power-tc/analyse/storm_aggregate_levels.py delete mode 100644 workflow/power-tc/analyse/storm_distribution_empirical.py delete mode 100644 workflow/power-tc/analyse/storm_distribution_empirical_country_matrix.py delete mode 100644 workflow/power-tc/analyse/storm_distribution_empirical_geo.py delete mode 100644 workflow/power-tc/analyse/target_analysis.smk delete mode 100644 workflow/power-tc/analyse/transmission_aggregate.py delete mode 100644 workflow/power-tc/analyse/transmission_aggregate.smk rename workflow/power-tc/{plot => map}/customers_affected_by_storm.smk (100%) rename workflow/power-tc/{plot => map}/target_disruption.smk (100%) rename workflow/power-tc/{analyse => }/network_components.py (100%) rename workflow/power-tc/{analyse => }/network_components.smk (100%) delete mode 100644 workflow/power-tc/plot/EAD_EACA-finder.py delete mode 100644 workflow/power-tc/plot/cross_correlation.py delete mode 100644 workflow/power-tc/plot/diff_agg.py delete mode 100644 workflow/power-tc/plot/diff_cross_correlation.py delete mode 100644 workflow/power-tc/plot/fig_CDFs.smk delete mode 100644 workflow/power-tc/plot/fig_EACA.smk delete mode 100644 workflow/power-tc/plot/fig_EAD.smk delete mode 100644 workflow/power-tc/plot/fig_EAD_EACA.smk delete mode 100644 workflow/power-tc/plot/fig_cross_correlation.smk delete mode 100644 workflow/power-tc/plot/fig_initial.py delete mode 100644 workflow/power-tc/plot/fig_initial.smk delete mode 100644 workflow/power-tc/plot/fig_master.smk delete mode 100644 workflow/power-tc/plot/mean_agg.py delete mode 100644 workflow/power-tc/plot/plot_together.py delete mode 100644 workflow/power-tc/plot/plotter.py diff --git a/workflow/Snakefile b/workflow/Snakefile index 1b83b4e7..55666c65 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -138,10 +138,10 @@ include: "power-tc/network_raster_intersection.smk" include: "power-tc/intersection.smk" include: "power-tc/exposure.smk" include: "power-tc/disruption.smk" -include: "power-tc/analyse/network_components.smk" +include: "power-tc/network_components.smk" include: "power-tc/map/storm_tracks.smk" include: "power-tc/map/outages.smk" include: "power-tc/map/wind_fields.smk" -include: "power-tc/plot/target_disruption.smk" -include: "power-tc/plot/customers_affected_by_storm.smk" +include: "power-tc/map/target_disruption.smk" +include: "power-tc/map/customers_affected_by_storm.smk" include: "power-tc/cyclone-grid.smk" diff --git a/workflow/power-tc/analyse/aggregate_levels.smk b/workflow/power-tc/analyse/aggregate_levels.smk deleted file mode 100644 index d44896a1..00000000 --- a/workflow/power-tc/analyse/aggregate_levels.smk +++ /dev/null @@ -1,33 +0,0 @@ -""" -Takes a gpkg file and aggregates to chosen level -""" - -import os - -AGGREGATE_LEVELS_OUT = os.path.join( - STORM_IMPACT_STATISTICS_DIR, - "aggregate", - f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent_aggregated_region.gpkg", -) - - -rule analyse_aggregate_levels: - input: - os.path.join( - STORM_IMPACT_STATISTICS_DIR, - "aggregate", - f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent.gpkg", - ), - os.path.join( - config["output_dir"], "input", "admin-boundaries", f"gadm36_levels.gpkg" - ), - params: - output_dir=config["output_dir"], - metrics_target=TARGET_ANALYSIS_METRICS, - top_select=config["top_select"], - increased_severity_sort=config["increased_severity_sort"], - aggregate_level=config["aggregate_level"], - output: - AGGREGATE_LEVELS_OUT, - script: - "./storm_aggregate_levels.py" diff --git a/workflow/power-tc/analyse/common_functions.py b/workflow/power-tc/analyse/common_functions.py deleted file mode 100644 index c3e72508..00000000 --- a/workflow/power-tc/analyse/common_functions.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Set of functions which are shared between other scripts""" - -import os -import pandas as pd - - -def find_storm_tots(results_folder, samples, region): - """Finds the number of storms and number of years for samples and region input""" - tot_years = 0 - tot_storms = 0 - - for sample in samples: - winds_file = os.path.join( - results_folder, - "input", - "STORM", - "events", - f"STORM_DATA_IBTRACS_{region}_1000_YEARS_{sample}.txt", - ) - winds = pd.read_csv(winds_file, keep_default_na=False) - winds.columns = [ - "year", - "month", - "number", - "step", - "basin", - "lat", - "lon", - "pressure", - "wind", - "radius", - "cat", - "landfall", - "dis_land", - ] # https://www.nature.com/articles/s41597-020-0381-2.pdf - winds["number_hur"] = ( - str(sample) - + "_" - + winds["year"].astype(int).astype(str) - + "_" - + winds["number"].astype(int).astype(str) - ) - - tot_years += len(winds["year"].unique()) - tot_storms += len(winds["number_hur"].unique()) - - return tot_storms, tot_years - - -def find_storm_files( - file_type, results_folder, region_eval, sample_eval, nh_eval, thrval -): - """Will return a list of target.gpkg paths for each storm in the selected region and sample and/or storm identifier, the number of total storms (including those with zero damages) and total years - Note: file_type options are 'targets' or 'edges' (for transmission lines) - """ - thrval = str(thrval) - if file_type == "targets": - file_name = "targets" - elif file_type == "edges": - file_name = "edges_affected" - else: - raise RuntimeError("Wrong/no file type entered") - - indiv_storm_path = os.path.join( - results_folder, "power_intersection", "storm_data", "individual_storms" - ) - - if nh_eval == "None": - nh_eval = None - - storm_totals = 0 - year_total = 0 - - samples_add = [] - if region_eval == None: - region_eval = [os.path.basename(path) for path in os.listdir(indiv_storm_path)] - if sample_eval == None: - for region in region_eval: - samples_add += [ - { - os.path.basename(path) - for path in os.listdir(os.path.join(indiv_storm_path, region)) - } - ] - - sample_eval = samples_add[0] - for samples_test in samples_add[1:]: - sample_eval = sample_eval.union( - samples_test - ) # only keep overlapping samples. Should not remove samples if correctly entered in config. - - if len(sample_eval) != len(samples_add[0]): - print( - f"Not all samples could be evaluated due to overlap. Checking samples: {sample_eval}" - ) - - for region in region_eval: - # print(storm_totals, find_storm_tot(results_folder, sample_eval, region)) - storm_add, year_add = find_storm_tots( - results_folder, sample_eval, region - ) - storm_totals += storm_add - year_total += year_add - - target_paths = [] - for region in region_eval: - for sample in sample_eval: - sample = str(sample) - storms = [ - os.path.basename(path) - for path in os.listdir(os.path.join(indiv_storm_path, region, sample)) - ] - if nh_eval == None: - target_paths_add = [ - os.path.join( - indiv_storm_path, - region, - sample, - file, - str(thrval), - f"{file_name}__storm_r{region}_s{sample}_n{file[6:]}.gpkg", - ) - for file in storms - if os.path.isfile( - os.path.join( - indiv_storm_path, - region, - sample, - file, - str(thrval), - f"{file_name}__storm_r{region}_s{sample}_n{file[6:]}.gpkg", - ) - ) - ] # add only if targets gpkg file exists - else: - target_paths_add = [ - os.path.join( - indiv_storm_path, - region, - sample, - file, - str(thrval), - f"{file_name}__storm_r{region}_s{sample}_n{file[6:]}.gpkg", - ) - for file in storms - if os.path.isfile( - os.path.join( - indiv_storm_path, - region, - sample, - file, - str(thrval), - f"{file_name}__storm_r{region}_s{sample}_n{file[6:]}.gpkg", - ) - ) - and file[6:] in nh_eval - ] # add only if targets gpkg file exists and in nh_eval - - target_paths += target_paths_add - - storm_add, year_add = find_storm_tots(results_folder, sample_eval, region) - storm_totals += storm_add - year_total += year_add - - if len(target_paths) == 0: - print(f"No {file_name} paths...!") - - print(f"{len(target_paths)} targets and {storm_totals} total storms") - return target_paths, storm_totals, year_total - - -def avg(string_input): - """Returns average key for string_input""" - return string_input + "_avg" - - -def sm(string_input): - """Returns sum key for string_input""" - return string_input + "_sum" - - -def ae(string_input): - """Returns anually expected key for string_input""" - return string_input + "_anually-expected" - - -def check_srn(region_eval, sample_eval, nh_eval): - """Performs some checks and fixes""" - - if region_eval == "None": - region_eval = None - if region_eval != None: - assert all(type(x) == str for x in region_eval) - - if sample_eval == "None": - sample_eval = None - if sample_eval != None: - sample_eval = [str(s) if type(s) != str else s for s in sample_eval] - assert all(type(x) == str for x in sample_eval) - - if nh_eval == "None": - nh_eval = None - if nh_eval != None: - assert all(type(x) == str for x in nh_eval) - - return region_eval, sample_eval, nh_eval - - -def traprule(lst, spacing): - """Trapezium rule""" - if len(lst) == 0: - return 0 - if len(lst) == 1: - return lst[0] * spacing # dummy - else: - return 0.5 * spacing * (lst[0] + lst[-1] + sum(lst[1:-1])) diff --git a/workflow/power-tc/analyse/country_pair_matrix.smk b/workflow/power-tc/analyse/country_pair_matrix.smk deleted file mode 100644 index e83f9261..00000000 --- a/workflow/power-tc/analyse/country_pair_matrix.smk +++ /dev/null @@ -1,25 +0,0 @@ -""" -Plots the empirical storm relationship matrix between (two) countries and conditional probability relationship -""" - -import os - - -COUNTRY_MATRIX_OUTPUT = [ - os.path.join(STORM_IMPACT_STATISTICS_DIR, "empirical", "storms_hitting_A_and_B_given_either_hit" + ".png"), - os.path.join(STORM_IMPACT_STATISTICS_DIR, "empirical", "likelihood_B_hit_given_A_hit" + ".png"), -] - - -rule analyse_country_matrix: - input: - os.path.join( - STORM_IMPACT_STATISTICS_DIR, f"combined_storm_statistics_{config['central_threshold']}.csv" - ), - params: - output_dir=config["output_dir"], - central_threshold=config["central_threshold"], - output: - COUNTRY_MATRIX_OUTPUT, - script: - "./storm_distribution_empirical_country_matrix.py" diff --git a/workflow/power-tc/analyse/empirical_distribution.smk b/workflow/power-tc/analyse/empirical_distribution.smk deleted file mode 100644 index 7c958492..00000000 --- a/workflow/power-tc/analyse/empirical_distribution.smk +++ /dev/null @@ -1,26 +0,0 @@ -""" -Plots the empirical distribution of storms for simple statistics. -""" - -import os - - -EMPIRICAL_DISTRIBUTION_OUT = [os.path.join(STORM_IMPACT_STATISTICS_DIR, 'empirical', f'empirical_{metric}.png') for metric in STORM_ANALYSIS_METRICS] - - -rule analyse_empirical_distribution: - input: - STORM_STATS_BY_THRESHOLD - params: - output_dir = config['output_dir'], - metrics = STORM_ANALYSIS_METRICS, - region_eval = STORM_BASINS, - sample_eval = SAMPLES, - nh_eval = STORMS, - central_threshold = config['central_threshold'], - minimum_threshold = config['minimum_threshold'], - maximum_threshold = config['maximum_threshold'] - output: - EMPIRICAL_DISTRIBUTION_OUT - script: - './storm_distribution_empirical.py' diff --git a/workflow/power-tc/analyse/extract_percentile.smk b/workflow/power-tc/analyse/extract_percentile.smk deleted file mode 100644 index faeb6777..00000000 --- a/workflow/power-tc/analyse/extract_percentile.smk +++ /dev/null @@ -1,27 +0,0 @@ -""" -Based on percentile from config file, will copy to percentile folder in statistics -""" - -import os - - -PERCENTILE_OUT = directory( - os.path.join(config["output_dir"], "power_output", "statistics", "percentile") -) - - -rule analyse_percentile: - input: - STORM_STATS_BY_THRESHOLD, - params: - output_dir=config["output_dir"], - region_eval=STORM_BASINS, - sample_eval=SAMPLES, - nh_eval=STORMS, - metrics_target=TARGET_ANALYSIS_METRICS, - central_threshold=config["central_threshold"], - percentile=config["percentile"], - output: - PERCENTILE_OUT, - script: - "./select_percentile.py" diff --git a/workflow/power-tc/analyse/power_analyse_all.smk b/workflow/power-tc/analyse/power_analyse_all.smk deleted file mode 100644 index c0b92f67..00000000 --- a/workflow/power-tc/analyse/power_analyse_all.smk +++ /dev/null @@ -1,14 +0,0 @@ -""" -Perform analysis of below selected workflows -""" - - -rule analyse_all: - input: - AGGREGATE_LEVELS_OUT, - EMPIRICAL_DISTRIBUTION_OUT, - COUNTRY_MATRIX_OUTPUT, - TRANSMISSION_OUT, - PERCENTILE_OUT, - CONNECTOR_OUT, - COMPLETION_FLAG_FILES, # to ensure storms are all run diff --git a/workflow/power-tc/analyse/select_percentile.py b/workflow/power-tc/analyse/select_percentile.py deleted file mode 100644 index a0b03c91..00000000 --- a/workflow/power-tc/analyse/select_percentile.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Selects the percentile storm as specified in the config file and copies into power_output/statistics/percentile/ -""" -import os -from distutils.dir_util import copy_tree - -import numpy as np -import pandas as pd - -from common_functions import check_srn, find_storm_files - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - region_eval = snakemake.params["region_eval"] # type: ignore - sample_eval = snakemake.params["sample_eval"] # type: ignore - nh_eval = snakemake.params["nh_eval"] # type: ignore - metrics_target = snakemake.params["metrics_target"] # type: ignore - thrval = snakemake.params["central_threshold"] # type: ignore - percentile = snakemake.params["percentile"] # type: ignore -except: # for testing only - # output_dir = 'results' - # region_eval = None #["NA"] # list of regions to analyse (write None if none specified) - # sample_eval = None #[0] # list of samples of ALL regions in region_eval to analyse (write None if none specified) - # nh_eval = None # list of storms to analyse (write None if none specified) - # thrval = 25 - # metrics_target = ['population_without_power', 'effective_population', 'affected_population', 'mw_loss_storm', 'f_value', 'gdp_damage'] - # percentile = 99 - raise RuntimeError("Please use snakemake to define inputs") - -percentile = float(percentile) -assert 0 <= percentile <= 100 -region_eval, sample_eval, nh_eval = check_srn(region_eval, sample_eval, nh_eval) - -target_paths, storm_tot, years_tot = find_storm_files( - "targets", output_dir, region_eval, sample_eval, nh_eval, thrval -) -years_tot_all = 1000 - - -stat_path = os.path.join(output_dir, "power_output", "statistics") -stat_path_percentile = os.path.join(stat_path, "percentile") -if not os.path.exists(stat_path_percentile): - os.makedirs(stat_path_percentile) - -csv_path = os.path.join(stat_path, f"combined_storm_statistics_{thrval}.csv") -stats = pd.read_csv(csv_path, keep_default_na=False) - -metric_sortby = { - "population_without_power": "population with no power (f=0)", - "effective_population": "effective population affected", - "affected_population": "population affected", - "mw_loss_storm": "GDP losses", - "f_value": "GDP losses", - "gdp_damage": "GDP losses", -} # dict: sort the stats file for this metric key by its value -assert ( - all([metric_key in metric_sortby.keys() for metric_key in metrics_target]) == True -) # check all keys available - -rp_percentile = 1 / ((100 - percentile) / 100) # calculate the return period value - -metrics_none = True # set to False when a metric is covering a damage storm with the specified percentile -for metric in metrics_target: - x_count = np.arange(1, storm_tot + 1, 1) - x_ = years_tot / x_count - x = x_[::-1] - - idx = len(x[x >= rp_percentile]) # find index - - storm_select = stats.sort_values( - metric_sortby[metric], ascending=False - ) # select percentile - - if ( - idx <= len(stats) - 1 - ): # if idx > number of storms, there is no damage for that storm - metrics_none = True # now, override - storm_region = storm_select["Storm Region"].iloc[idx] - storm_id = storm_select["Storm ID"].iloc[idx] - storm_sample = storm_id.split("_")[0] - storm_id_directory = os.path.join( - output_dir, - "power_intersection", - "storm_data", - "individual_storms", - storm_region, - str(storm_sample), - f"storm_{storm_id}", - ) - - dest_folder = os.path.join(stat_path_percentile, metric) - if not os.path.exists(dest_folder): - os.makedirs(dest_folder) - - copy_tree(storm_id_directory, dest_folder) # copy file of selected storm - else: - print(f"For {metric}, the percentile does not cover a damaging storm") -if metrics_none == False: - print("No files will be in statistics/percentile") diff --git a/workflow/power-tc/analyse/storm_aggregate_levels.py b/workflow/power-tc/analyse/storm_aggregate_levels.py deleted file mode 100644 index cf3e82c8..00000000 --- a/workflow/power-tc/analyse/storm_aggregate_levels.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Takes a gpkg file and aggregates to chosen level - -Outputs a gpkg file with metrics as target features (can use QGIS to plot) -""" -import os - -import fiona -import geopandas as gpd -from shapely.geometry import shape -from tqdm import tqdm - -from common_functions import ae, avg, sm - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - metrics_target = snakemake.params["metrics_target"] # type: ignore - top_select = snakemake.params[ # type: ignore - "top_select" - ] # select (in percent). Set to 100 for all - increased_severity_sort = snakemake.params["increased_severity_sort"] # type: ignore - layer_num = snakemake.params["aggregate_level"] # type: ignore -except: # for testing only - output_dir = "results" - metrics_target = [ - "population_without_power", - "effective_population", - "affected_population", - "mw_loss_storm", - "f_value", - "gdp_damage", - ] - top_select = 100 - increased_severity_sort = True - layer_num = 1 - raise RuntimeError("Please use snakemake to define inputs") - -increased_severity_sort_bool = str(increased_severity_sort)[0] # either T or F - - -metrics_target_nof = metrics_target.copy() -metrics_target_nof.remove("f_value") -metrics_target_avg = [avg(metric) for metric in metrics_target] -metrics_target_sum = [ - sm(metric) for metric in metrics_target_nof -] # f_value does not make sense for sum -metrics_target_ae = [ - ae(metric) for metric in metrics_target_nof -] # f_value does not make sense for ae -metric_keys = metrics_target_sum + metrics_target_avg + metrics_target_ae - - -folder_agg = os.path.join(output_dir, "power_output", "statistics", "aggregate") -examine_file = os.path.join( - folder_agg, f"targets_geo_top{top_select}{increased_severity_sort_bool}percent.gpkg" -) -quantile_file = gpd.read_file(examine_file, keep_default_na=False) -quantile_file.crs = "EPSG:4326" -countries_relevant = set( - quantile_file.country.unique() -) # set of countries in which to check (others irrelevant, significant computational improvement) - -print(f"loading level {layer_num} data") -with fiona.open( - os.path.join(output_dir, "input", "admin-boundaries", f"gadm36_levels.gpkg"), - "r", - layer=layer_num, -) as src_code: - code_geoms = [] - code_GIDs = [] - for feature in src_code: - if ( - feature["properties"]["GID_0"] in countries_relevant - ): # only include search in countries that contain targets - code_geoms.append(shape(feature["geometry"])) - code_GIDs.append(feature["properties"][f"GID_{layer_num}"]) - print("creating dataframe") - code_geoms_gpd = gpd.GeoDataFrame( - {"geometry": code_geoms, "code": code_GIDs}, crs="EPSG:4326" - ) - -quantile_file["centre"] = quantile_file["geometry"].centroid - - -# code_geoms_gpd in quantile box -qf_bounds = quantile_file.bounds -qf_maxx = qf_bounds["maxx"].max() -qf_minx = qf_bounds["minx"].min() -qf_maxy = qf_bounds["maxy"].max() -qf_miny = qf_bounds["miny"].min() - -cg_bounds = code_geoms_gpd.bounds -cg_maxx_bool = ( - cg_bounds["maxx"] < qf_minx -) # True if max region x is less than min quantile x -cg_minx_bool = ( - cg_bounds["minx"] > qf_maxx -) # True if min region x is more than max quantile x -cg_maxy_bool = ( - cg_bounds["maxy"] < qf_miny -) # True if max region is less than min quantile y -cg_miny_bool = ( - cg_bounds["miny"] > qf_maxy -) # True if min region is more than max quantile y -bool_keep = [ - False - if cg_maxx_bool[x] + cg_minx_bool[x] + cg_maxy_bool[x] + cg_miny_bool[x] >= 1 - else True - for x in range(len(cg_bounds)) -] # if 1 or higher, then there is at least one True element -code_geoms_gpd = code_geoms_gpd[bool_keep] - -map_dict = {} - -for geom_area in tqdm( - code_geoms_gpd.itertuples(), total=len(code_geoms_gpd), desc="geom_intersect" -): - - bool_list = [ - True if geom_area.geometry.covers(c) else False for c in quantile_file.centre - ] # check contained (tehcnically covered as can lie on boundary, later is removed, so no issues with double counting) in geometry layer - - overlap_quantile = quantile_file[bool_list] - if len(overlap_quantile) >= 1: - remove_id = overlap_quantile["index"] - if geom_area.code not in map_dict.keys(): - map_dict[geom_area.code] = { - metric: [] for metric in metric_keys - } # dict(zip(metric_keys, [[]]*len(metric_keys))) - for metric in metrics_target: - if "f_value" in metric: - map_dict[geom_area.code][avg(metric)] = overlap_quantile[ - avg(metric) - ].mean() - else: - map_dict[geom_area.code][sm(metric)] = overlap_quantile[ - sm(metric) - ].sum() - map_dict[geom_area.code][avg(metric)] = overlap_quantile[ - avg(metric) - ].mean() - map_dict[geom_area.code][ae(metric)] = overlap_quantile[ - ae(metric) - ].sum() - - if "population" in metric: - new_key = f"{metric}_annually-expected_region_fraction" - if new_key not in metric_keys: - metric_keys.append(new_key) - map_dict[geom_area.code][new_key] = ( - overlap_quantile[ae(metric)].sum() - / overlap_quantile.population.sum() - ) # fraction of total target populations in the quantile file overlap (ie all targets in the geometry area) - - quantile_file = quantile_file[ - ~quantile_file["index"].isin(remove_id) - ] # remove classified targets - - -for metric in metric_keys: - map_dict_indiv = { - k: v[metric] for k, v in map_dict.items() - } # {code1: metric_value1, code2: metric_value2 ... } - code_geoms_gpd[metric] = code_geoms_gpd["code"].map(map_dict_indiv).fillna(0) - - -new_file_name = examine_file.replace("percent", f"percent_aggregated_region") -if len(code_geoms_gpd) == 0: - code_geoms_gpd = gpd.GeoDataFrame({"geometry": [None]}) # add dummy for snakemake -code_geoms_gpd.to_file(new_file_name, driver="GPKG") -print("top_select written to file") diff --git a/workflow/power-tc/analyse/storm_distribution_empirical.py b/workflow/power-tc/analyse/storm_distribution_empirical.py deleted file mode 100644 index 4d7d203d..00000000 --- a/workflow/power-tc/analyse/storm_distribution_empirical.py +++ /dev/null @@ -1,157 +0,0 @@ -"""Plots the empirical distribution of storms for simple statistics. - -Only the combined storm .csv files are examined, rather than the .gpkg target files. This is -much faster but limited to the available statistics in the .txt files. Outputs plots of metrics -vs return periods -""" -import os - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - -from common_functions import check_srn, find_storm_files - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - metrics = snakemake.params["metrics"] # type: ignore - region_eval = snakemake.params["region_eval"] # type: ignore - sample_eval = snakemake.params["sample_eval"] # type: ignore - nh_eval = snakemake.params["nh_eval"] # type: ignore - central_threshold = snakemake.params["central_threshold"] # type: ignore - minimum_threshold = snakemake.params["minimum_threshold"] # type: ignore - maximum_threshold = snakemake.params["maximum_threshold"] # type: ignore -except: # for testing only - output_dir = "results" - metrics = [ - "GDP losses", - "targets with no power (f=0)", - "population affected", - "population with no power (f=0)", - "effective population affected", - ] - region_eval = None - sample_eval = None - nh_eval = None - central_threshold = 25 - minimum_threshold = 20 - maximum_threshold = 30 - raise RuntimeError("Please use snakemake to define inputs") - - -def stat_file(thrval): - """Returns pandas stat file for thrval value""" - csv_path = os.path.join(stat_path, f"combined_storm_statistics_{thrval}.csv") - return pd.read_csv(csv_path, keep_default_na=False) - - -def y_vals(df, metric): - """Returns an array of sorted points of the metric column in the df.""" - stats_sorted = df.sort_values(metric) # sort for damages - stats_sorted = stats_sorted[stats_sorted[metric] != 0] # remove zeros - y = np.array(stats_sorted[metric]) - return y - - -def extme(y, len_reach): - """Extends y with zeros at front to length len_reach""" - if len(y) > len_reach: - raise RuntimeError("len_reach is incorrectly specified") - - if len(y) < len_reach: - zero_extend = np.zeros(len_reach - len(y)) # extension - if len(y) != 0: - y = np.concatenate((zero_extend, y)) # join - else: - y = zero_extend - return y - - -## Inputs ## - - -region_eval, sample_eval, nh_eval = check_srn(region_eval, sample_eval, nh_eval) - - -stat_path = os.path.join(output_dir, "power_output", "statistics") - -stat_path_empirical = os.path.join(stat_path, "empirical") -if not os.path.exists(stat_path_empirical): - os.makedirs(stat_path_empirical) - -stat_path_empirical_data = os.path.join(stat_path_empirical, "empirical_plotting_data") -if not os.path.exists(stat_path_empirical_data): - os.makedirs(stat_path_empirical_data) - - -_, storms_tot, years_tot = find_storm_files( - "targets", output_dir, region_eval, sample_eval, nh_eval, central_threshold -) # maximum return period (tot number of years) - - -def y_extend(y1, y2, y3): - """Extends (front) to include 0s to reach len(y_i)==max_i(y_i) for all y""" - maxlen = max(len(y1), len(y2), len(y3)) - return extme(y1, maxlen), extme(y2, maxlen), extme(y3, maxlen) - - -stats_min = stat_file(minimum_threshold) -stats_max = stat_file(maximum_threshold) -stats_cen = stat_file(central_threshold) - -for ii, metric in enumerate(metrics): - - f = plt.figure(ii) - f.set_figwidth(10) - f.set_figheight(8) - - y_min_base = y_vals(stats_min, metric) - y_max_base = y_vals(stats_max, metric) - y_cen_base = y_vals(stats_cen, metric) - - y_min, y_max, y_cen = y_extend(y_min_base, y_max_base, y_cen_base) - - x_count = np.arange(1, len(y_cen) + 1, 1) - x = years_tot / x_count # this is how return periods are defined - x = x[::-1] # correct order - - plt.fill_between(x, y_min, y_max, color=((138 / 256, 171 / 256, 1)), alpha=0.2) - plt.plot( - x, - y_min, - color="b", - linestyle=":", - label=f"Minimum wind threshold ({minimum_threshold}m/s)", - ) # plot interpolated line - plt.plot( - x, - y_max, - color="b", - linestyle="--", - label=f"Maximum wind threshold ({maximum_threshold}m/s)", - ) # plot interpolated line - plt.plot( - x, y_cen, color="r", label=f"Central wind threshold ({central_threshold}m/s)" - ) # plot interpolated line - plt.scatter(x, y_cen, s=2, color="r") # plot data points - - plt.xlabel("Return Period") - plt.ylabel(metric) - plt.title(f"Empirical - {metric}") - - plt.grid(axis="both", which="both") - plt.xscale("log") - - plt.legend() - plt.show() - plt.savefig(os.path.join(stat_path_empirical, f"empirical_{metric}.png")) - - # save data (in case of replot) - data_save = pd.DataFrame({"x": x, "y_min": y_min, "y_max": y_max, "y_cen": y_cen}) - data_save.to_csv( - os.path.join(stat_path_empirical_data, f"empirical_{metric}_plotting_data.csv"), - index=False, - ) - -print("Plotted all empirical") -# plt.close('all') diff --git a/workflow/power-tc/analyse/storm_distribution_empirical_country_matrix.py b/workflow/power-tc/analyse/storm_distribution_empirical_country_matrix.py deleted file mode 100644 index 76dea562..00000000 --- a/workflow/power-tc/analyse/storm_distribution_empirical_country_matrix.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Plots the empirical storm relationship matrix between (two) countries and conditional -probability relationship -""" -import itertools as it -import os -import sys - -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from tqdm import tqdm - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - thrval = snakemake.params["central_threshold"] # type: ignore -except: - output_dir = sys.argv[1] - thrval = sys.argv[2] - raise RuntimeError("Please use snakemake to define inputs") - - -## Inputs ## - - -def plot_relation_matrix(matrix, title, fig_num): - """Plots and saves imshow""" - f = plt.figure(fig_num) - f.set_figwidth(10) - f.set_figheight(8) - - plt.imshow(matrix, cmap="viridis") - plt.xlabel("Country B") - plt.yticks(list(country_index.values()), labels=list(country_index.keys())) - plt.xticks( - list(country_index.values()), labels=list(country_index.keys()), rotation=90 - ) - plt.ylabel("Country A") - - plt.title(title) - plt.colorbar() - - plt.show() - plt.savefig(os.path.join(stat_path_empirical, title)) - - -## load stats ## -stat_path = os.path.join(output_dir, "power_output", "statistics") -csv_path = os.path.join(stat_path, f"combined_storm_statistics_{thrval}.csv") -stats = pd.read_csv(csv_path, keep_default_na=False) -storm_count = len(stats) - -stat_path_empirical = os.path.join(stat_path, "empirical") -if not os.path.exists(stat_path_empirical): - os.makedirs(stat_path_empirical) -stat_path_empirical_data = os.path.join(stat_path_empirical, "empirical_plotting_data") -if not os.path.exists(stat_path_empirical_data): - os.makedirs(stat_path_empirical_data) - -country_storm_count = dict() # dictionary {country1: number_of_storms, country2: ... } -countries_overlap_master = ( - dict() -) # dictionary {country1_country2: total_overlap_storm_count, country1_country3: ... } note that the values represent the intersection -all_countries = set() # set of all countries investigated -for jj, stats_indiv in tqdm( - enumerate(stats["affected countries"]), desc="Iterating targets", total=len(stats) -): - if type(stats_indiv) == str: # First check for string - if len(stats_indiv) >= 1: # Then (if string) check not "" - individual_countries = stats_indiv.split("_") - all_countries.update(individual_countries) - - for country_a, country_b in it.combinations( - individual_countries, 2 - ): # for each unique country - country1 = min(country_a, country_b) - country2 = max(country_a, country_b) - country_key = country1 + "_" + country2 # min first then max (str) - if country_key in countries_overlap_master.keys(): - countries_overlap_master[country_key] = ( - countries_overlap_master[country_key] + 1 - ) # one more storm which has both countries (intersection) - else: - countries_overlap_master[country_key] = 1 - - for country in individual_countries: - if country in country_storm_count.keys(): - country_storm_count[country] = ( - country_storm_count[country] + 1 - ) # one more storm - else: - country_storm_count[country] = 1 # first storm - -# set country index -all_countries_list = list(all_countries) -all_countries_list.sort() # ensure each run same output -country_index = dict( - zip(all_countries_list, list(range(len(all_countries)))) -) # {country1: 0, country2: 1, country3: 2, ...} - -# create matrix nore that the row number corresponds to the country in the country index (same for column) -country_matrix_unint = -1 * np.ones( - (len(country_index), len(country_index)) -) # union intersection matrix -country_matrix_condprob = country_matrix_unint.copy() # conditional probability matrix - - -# note any non-overlapping -for country_a, country_b in it.combinations( - all_countries, 2 -): # for each unique country - country1 = min(country_a, country_b) - country2 = max(country_a, country_b) - key1 = country1 + "_" + country2 - key2 = country2 + "_" + country1 - if key1 not in countries_overlap_master.keys(): - countries_overlap_master[key1] = 0 # note no intersection - - if country1 not in country_storm_count.keys(): - country_storm_count[country1] = 0 # note no storms - - if country2 not in country_storm_count.keys(): - country_storm_count[country2] = 0 # note no storms - -# assign data -for country_pair, count in countries_overlap_master.items(): - country1, country2 = country_pair.split("_") - tot_storms = ( - country_storm_count[country1] + country_storm_count[country2] - count - ) # total number of storms for both countries (union) - idx1 = max(country_index[country1], country_index[country2]) - idx2 = min(country_index[country1], country_index[country2]) - country_matrix_unint[idx1, idx2] = ( - count / tot_storms - ) # add the count (intersection) for the countries using the country_index index for the respecive countries as a fraction: intersction/union - - country_matrix_condprob[country_index[country1], country_index[country2]] = ( - count / country_storm_count[country1] - ) # given country 1 is hit, what is likelyhood of country 2 being hit - country_matrix_condprob[country_index[country2], country_index[country1]] = ( - count / country_storm_count[country2] - ) # given country 2 is hit, what is likelyhood of country 1 being hit - -# remove 0 -country_matrix_unint[country_matrix_unint == -1] = np.nan -country_matrix_condprob[country_matrix_condprob == -1] = np.nan - - -dfA = pd.DataFrame(country_matrix_unint) -dfA.columns = all_countries_list -dfA.index = all_countries_list -dfA.to_csv(os.path.join(stat_path_empirical_data, "country_matrix_other.csv")) - -dfB = pd.DataFrame(country_matrix_condprob) -dfB.columns = all_countries_list -dfB.index = all_countries_list -dfB.to_csv(os.path.join(stat_path_empirical_data, "country_matrix_both.csv")) - - -x = list(range(len(country_index))) * len(country_index) -y = [] -_ = [y.append(len(country_index) * [idx]) for idx in range(len(country_index))] - -title_unint = ( - "Empirical fraction of a storm hitting both countries given it hits one of them" -) -plot_relation_matrix(country_matrix_unint, title_unint, 0) - -title_condprob = "Given country A is hit, what is the likelihood also B is hit" -plot_relation_matrix(country_matrix_condprob, title_condprob, 1) -# plt.close('all') diff --git a/workflow/power-tc/analyse/storm_distribution_empirical_geo.py b/workflow/power-tc/analyse/storm_distribution_empirical_geo.py deleted file mode 100644 index cdcfa948..00000000 --- a/workflow/power-tc/analyse/storm_distribution_empirical_geo.py +++ /dev/null @@ -1,230 +0,0 @@ -"""For each target performs parameter analysis/gathering - -The storms' .gpkg fxiles are examined, rather than the combined statistics .csv file. This is much slower but has a -wider statistical applicability. Here this feature is used to save the metrics directly to the targets in a gpkg file. -A selected value (top_select) may be input to extract to the top_select % of storms (ranked by the metric examined). -If top_select = 100 then all storms are shown. - -Outputs gpkg file with metrics as target features (option to select top_select % (ranked by metric) to this file) - -""" -import os - -import geopandas as gpd -import numpy as np -import pandas as pd -from tqdm import tqdm - -from common_functions import ae, avg, check_srn, find_storm_files, sm, traprule - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - metrics_target = snakemake.params["metrics_target"] # type: ignore - top_select = snakemake.params[ # type: ignore - "top_select" - ] # select (in percent). Set to 100 for all - increased_severity_sort = snakemake.params["increased_severity_sort"] # type: ignore - region_eval = snakemake.params["region_eval"] # type: ignore - sample_eval = snakemake.params["sample_eval"] # type: ignore - nh_eval = snakemake.params["nh_eval"] # type: ignore - thrval = snakemake.params["central_threshold"] # type: ignore -except: # for testing only - output_dir = "results" - metrics_target = [ - "population_without_power", - "effective_population", - "affected_population", - "mw_loss_storm", - "f_value", - "gdp_damage", - ] - top_select = 100 - increased_severity_sort = True - region_eval = ( - None # ["NA"] # list of regions to analyse (write None if none specified) - ) - sample_eval = None # [0] # list of samples of ALL regions in region_eval to analyse (write None if none specified) - nh_eval = None # ['0_2005_97'] # list of storms to analyse (write None if none specified) - thrval = 43 - raise RuntimeError("Please use snakemake to define inputs") - - -region_eval, sample_eval, nh_eval = check_srn(region_eval, sample_eval, nh_eval) - - -assert 0 <= top_select <= 100 -assert increased_severity_sort in [True, False] -increased_severity_sort_bool = str(increased_severity_sort)[0] - -## Inputs ## - - -stat_path = os.path.join(output_dir, "power_output", "statistics") -csv_path = os.path.join(stat_path, f"combined_storm_statistics_{thrval}.csv") -stats = pd.read_csv(csv_path, keep_default_na=False) - -target_paths, storm_tot, years_tot = find_storm_files( - "targets", output_dir, region_eval, sample_eval, nh_eval, thrval -) -if len(target_paths) == 0: - raise RuntimeError("No targets could be found. Shutting down process.") -assert len(target_paths) <= storm_tot - - -stat_path_empirical = os.path.join(stat_path, "empirical") -if not os.path.exists(stat_path_empirical): - os.makedirs(stat_path_empirical) - - -# return period -x_count = np.arange(1, storm_tot + 1, 1) -x_ = years_tot / x_count -x = x_[::-1] - -metric_sortby = { - "population_without_power": "population with no power (f=0)", - "effective_population": "effective population affected", - "affected_population": "population affected", - "mw_loss_storm": "GDP losses", - "f_value": "GDP losses", - "gdp_damage": "GDP losses", -} # dict: sort the stats file for this metric key by its value -assert ( - all([metric_key in metric_sortby.keys() for metric_key in metrics_target]) == True -) # check all keys available -metrics_target_avg = [avg(metric) for metric in metrics_target] -metrics_target_sum = [sm(metric) for metric in metrics_target] -metric_keys = metrics_target_sum + metrics_target_avg -metric_dict = dict(zip(metric_keys, [[]] * 2 * len(metrics_target))) -metrics_target_w_ae = metrics_target + [ - ae(metric) for metric in metrics_target -] # add annually expected - - -top_select_frac = int((top_select / 100) * storm_tot) # fraction -if increased_severity_sort == True: - text_extra = " from least to most damage" -else: - text_extra = "from most to least damage (i.e. the top 'worst' storms)" -print( - f"Total {storm_tot}, stats on {len(stats)} and examining {top_select_frac} {text_extra}. Length targets is {len(target_paths)}" -) -storm_id_metrics = ( - {} -) # dictionary {metric1: {stormids_top_quantile_for metric1...}, metric2: {stormids_top_quantile_for metric2...}, ... } - -for metric in metrics_target: - storm_id_metrics[metric] = set( - stats.sort_values(metric_sortby[metric], ascending=increased_severity_sort)[ - "Storm ID" - ][:top_select_frac] - ) # saves a set of the top selected quantile (sorted appropriately) - storms_increasing_severity = stats.sort_values( - metric_sortby[metric], ascending=True - )["Storm ID"][:top_select_frac] - - -metric_data_base = ( - {} -) # {target1: {metric1: val, metric2: ... , geometry: geom}, target2: {... } ...} base info, no sum or average -metric_data = {} # wil include sums and averages of above - -# FILTER TARGET PATHS FOR STORM - -metric_list_order = dict( - zip(metrics_target, [[]] * len(metrics_target)) -) # dictionary {metric1: [stormA, stormB, ...], metric2: ... } order of which storms analysed for that metric. -for jj, target_path in tqdm( - enumerate(target_paths), desc="Iterating targets", total=len(target_paths) -): - storm = os.path.basename(target_path).split("_n")[-1][:-5] # extract storm - - targets = gpd.read_file( - target_path, - dtype={"population": float, "gdp_damage": float, "mw_loss_storm": float}, - ) # [['population', 'population_density_at_centroid', 'gdp', 'id', 'f_value', 'mw_loss_storm', 'gdp_damage', 'geometry']] - - # add population definitions - zero_pop = [ - t.population if float(t.f_value) == 0 else 0 for t in targets.itertuples() - ] - targets["population_without_power"] = zero_pop - targets["effective_population"] = (1 - targets["f_value"].astype(float)) * targets[ - "population" - ].astype(float) - aff_pop = [ - t.population if float(t.f_value) < 1 else 0 for t in targets.itertuples() - ] - targets["affected_population"] = aff_pop - - targets["f_value"] = 1 - targets["f_value"].astype( - float - ) # rescale f_rescale = 1 - f (this means that the storms with no damages (ie do not have gpkg files) have f = 0 and so, later the average (which of course has to include ALL storms) is correct. After averages are taken, it is rescaled back 1 - f_rescale - for target_indiv in targets.itertuples(): - if target_indiv.id not in metric_data.keys(): - metric_data_new = dict( - zip(metrics_target_w_ae, [[]] * len(metrics_target_w_ae)) - ) # empty (sub)dict with metrics as keys - metric_data_base[target_indiv.id] = metric_data_new - - metric_data[target_indiv.id] = dict( - { - "geometry": target_indiv.geometry, - "country": target_indiv.country, - "population": target_indiv.population, - } - ) # set up for later - - for ii, metric in enumerate(metrics_target): # add the metric data - if ( - storm in storm_id_metrics[metric] - ): # only if in top selected quantile (sorted by metric) - - metric_value = getattr(target_indiv, metric) - - metric_data_base[target_indiv.id][metric] = metric_data_base[ - target_indiv.id - ][metric] + [metric_value] - - -for target_key in metric_data.keys(): # for each target.id - for metric in metrics_target: # for each metric - if "f_value" in metric: - metric_data[target_key][avg(metric)] = ( - 1 - sum(metric_data_base[target_key][metric]) / storm_tot - ) # find average and rescale back to correct - - metric_data[target_key][ae(metric)] = 1 - sum( - metric_data_base[target_key][ae(metric)] - ) # find average and rescale back to correct - else: - metric_data[target_key][avg(metric)] = ( - sum(metric_data_base[target_key][metric]) / storm_tot - ) # find average - metric_sum = sum(metric_data_base[target_key][metric]) - metric_data[target_key][sm(metric)] = metric_sum # find sum - - metric_data[target_key][ae(metric)] = traprule( - metric_data_base[target_key][metric], 1 / years_tot - ) # annually expected - - -targets_combined = gpd.GeoDataFrame(metric_data).T # include transpose due to list -t_cols = list(targets_combined.columns) -t_cols.remove("geometry") # remove from list to be forced to float -t_cols.remove("country") # remove from list to be forced to float -targets_combined = targets_combined.astype( - dict(zip(t_cols, [float] * len(t_cols))) -) # force float -print(targets_combined.describe()) -folder_agg = os.path.join(output_dir, "power_output", "statistics", "aggregate") -if not os.path.exists(folder_agg): - os.makedirs(folder_agg) -targets_combined.to_file( - os.path.join( - folder_agg, - f"targets_geo_top{top_select}{increased_severity_sort_bool}percent.gpkg", - ), - driver="GPKG", -) -print("Written to file") diff --git a/workflow/power-tc/analyse/target_analysis.smk b/workflow/power-tc/analyse/target_analysis.smk deleted file mode 100644 index c35972a3..00000000 --- a/workflow/power-tc/analyse/target_analysis.smk +++ /dev/null @@ -1,29 +0,0 @@ -""" -For each target performs parameter analysis/gathering -""" - -import os - - -rule analyse_targets: - input: - os.path.join( - STORM_IMPACT_STATISTICS_DIR, f"combined_storm_statistics_{config['central_threshold']}.csv" - ), - params: - output_dir=config["output_dir"], - metrics_target=TARGET_ANALYSIS_METRICS, - top_select=config["top_select"], - increased_severity_sort=config["increased_severity_sort"], - region_eval=STORM_BASINS, - sample_eval=SAMPLES, - nh_eval=STORMS, - central_threshold=config["central_threshold"], - output: - os.path.join( - STORM_IMPACT_STATISTICS_DIR, - "aggregate", - f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent.gpkg", - ), - script: - "./storm_distribution_empirical_geo.py" diff --git a/workflow/power-tc/analyse/transmission_aggregate.py b/workflow/power-tc/analyse/transmission_aggregate.py deleted file mode 100644 index a4082401..00000000 --- a/workflow/power-tc/analyse/transmission_aggregate.py +++ /dev/null @@ -1,373 +0,0 @@ -"""Sums the count of hits on all tranmission lines for selected region, sample, storm and -aggregates reconstruction cost to level -""" -import json -import os -import time - -import fiona -import geopandas as gpd -import numpy as np -import pandas as pd -from geopy import distance -from shapely.geometry import LineString, shape -from tqdm import tqdm - -from common_functions import check_srn, find_storm_files - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - layer_num = snakemake.params["aggregate_level"] # type: ignore - region_eval = snakemake.params["region_eval"] # type: ignore - sample_eval = snakemake.params["sample_eval"] # type: ignore - nh_eval = snakemake.params["nh_eval"] # type: ignore - thrval = snakemake.params["central_threshold"] # type: ignore -except: - output_dir = "results" # sys.argv[1] - layer_num = 1 - region_eval = [ - "NA" - ] # ["NA"] # list of regions to analyse (write None if none specified) - sample_eval = [ - "0" - ] # [0] # list of samples of ALL regions in region_eval to analyse (write None if none specified) - nh_eval = ["0_2017_66"] # list of storms to analyse (write None if none specified) - thrval = 41 - raise RuntimeError("Please use snakemake to define inputs") - - -def eval_dist(linestring_df): - """ "Evaluate the coordinate dataframe and returns distance in km""" - - dist_tot = 0 - for ii in range(len(linestring_df)): - if type(linestring_df.iloc[ii].geometry) == type( - LineString([[1, 2], [3, 4]]) - ): # check is linestring - line_coords = list( - linestring_df.iloc[ii].geometry.coords - ) # extract the coordinates of a row - dist_tot += eval_coordist(line_coords) - - else: # multistring - for ms in range(len(linestring_df.iloc[ii]["geometry"].geoms)): - line_coords = list( - linestring_df.iloc[ii].geometry[ms].coords - ) # extract the coordinates of a row - dist_tot += eval_coordist(line_coords) - - return dist_tot - - -def eval_dist_lst(linestring_df): - """ "Evaluate the coordinate dataframe and returns distance list in km""" - - lst = [] - for ii in range(len(linestring_df)): - dist_tot = 0 - if type(linestring_df.iloc[ii].geometry) == type( - LineString([[1, 2], [3, 4]]) - ): # check is linestring - line_coords = list( - linestring_df.iloc[ii].geometry.coords - ) # extract the coordinates of a row - dist_tot = eval_coordist(line_coords) - - else: # multistring - for ms in range(len(linestring_df.iloc[ii]["geometry"].geoms)): - line_coords = list( - linestring_df.iloc[ii].geometry[ms].coords - ) # extract the coordinates of a row - dist_tot += eval_coordist(line_coords) - - lst.append(dist_tot) - - return lst - - -def eval_coordist(coords): - """Evaluate the coordinates and returns distance in km""" - dist = 0 - line_coords = [(x[1], x[0]) for x in coords] - if len(line_coords) >= 2: - for jj in range(len(line_coords) - 1): - dist += distance.distance( - line_coords[jj], line_coords[jj + 1] - ).km # add the km length of that row - return dist - - -region_eval, sample_eval, nh_eval = check_srn(region_eval, sample_eval, nh_eval) - - -boxes_county_file = os.path.join( - output_dir, "power_processed", "world_boxes_metadata.json" -) -with open(boxes_county_file, "r") as src: - boxes_country = json.load(src)["box_country_dict"] - - -transmission_paths, storm_tot, years_tot = find_storm_files( - "edges", output_dir, region_eval, sample_eval, nh_eval, thrval -) - -return_periods_count = np.arange(1, storm_tot + 1, 1) -return_periods_ = years_tot / return_periods_count -return_periods = return_periods_[::-1] - - -stat_path = os.path.join(output_dir, "power_output", "statistics") -csv_path = os.path.join(stat_path, f"combined_storm_statistics_{thrval}.csv") -stats = pd.read_csv(csv_path, keep_default_na=False) -storms_increasing_severity = stats.sort_values("reconstruction cost", ascending=True)[ - "Storm ID" -] -storm_id_metrics_weighting = dict( - zip(storms_increasing_severity, return_periods) -) # dictionary {storm1: return_period_of_storm1, storm2: ... } - - -folder_agg = os.path.join(output_dir, "power_output", "statistics", "aggregate") -if not os.path.exists(folder_agg): - os.makedirs(folder_agg) - -freq_hit_path = os.path.join(folder_agg, "transmission_line_frequency_hit.gpkg") -recon_path = freq_hit_path.replace("frequency_hit", "reconstruction_costs") - -if len(transmission_paths) == 0: - print("No targets could be found. Writing dummy files (for snakemake).") - dummy = gpd.GeoDataFrame({"geometry": [None]}) - dummy.to_file(freq_hit_path, driver="GPKG") - dummy.to_file(recon_path, driver="GPKG") - -else: - - assert len(transmission_paths) <= storm_tot - - countries_relevant = ( - set() - ) # list of countries which contain relevant data (rest can be ignored) - transmission_dict = dict() - transmission_lst = [] - for ii, transmission_path in tqdm( - enumerate(transmission_paths), - desc="Iterating transmission lines", - total=len(transmission_paths), - ): - - storm = os.path.basename(transmission_path).split("_n")[-1][ - :-5 - ] # extract storm - - transmission = gpd.read_file(transmission_path)[ - ["link", "geometry", "box_id", "reconstruction_cost"] - ] - countries_relevant = countries_relevant.union( - set().union( - *[set(boxes_country[box]) for box in transmission.box_id.unique()] - ) - ) # update relevant countires - for transmission_indiv_link in set(transmission.link): - if transmission_indiv_link in transmission_dict.keys(): - transmission_dict[transmission_indiv_link][0] += 1 - - weighting_factor = ( - 1 / years_tot - ) # return period weighting factor for annual expected. Note this is a pretty good approximation to the trapezium rule for large n - new_dict = { - transmission_indiv.link: [ - 1, - transmission_indiv.geometry, - transmission_indiv.reconstruction_cost, - weighting_factor * transmission_indiv.reconstruction_cost, - ] - for transmission_indiv in transmission.itertuples() - if transmission_indiv.link not in transmission_dict.keys() - } - transmission_dict.update(new_dict) - - max_val = max([x[0] for x in transmission_dict.values()]) - print("Max value: ", max_val) - - transmission_master = gpd.GeoDataFrame( - { - "link": transmission_dict.keys(), - "count_damage": [x[0] for x in transmission_dict.values()], - "geometry": [x[1] for x in transmission_dict.values()], - "reconstruction_cost": [x[2] for x in transmission_dict.values()], - "reconstruction_cost_annual_expected": [ - x[3] for x in transmission_dict.values() - ], - } - ) - - eval_dist_lst_output = eval_dist_lst(transmission_master) - - transmission_master["reconstruction_cost_annual_expected_per_km"] = [ - transmission_master["reconstruction_cost_annual_expected"].iloc[jj] - / eval_dist_lst_output[jj] - if eval_dist_lst_output[jj] != 0 - else 0 - for jj in range(len(transmission_master)) - ] - - # Fix geometries cut by units - links = set(transmission_master.link.unique()) - link_boxes = set( - [f"box_{l.split('__')[0].split('_')[-1]}" for l in links] - + [f"box_{l.split('__')[1].split('_')[-1]}" for l in links] - ) - for ii, box_id in enumerate(link_boxes): - if ii == 0: - box_network = gpd.read_file( - os.path.join( - output_dir, - "power_processed", - "all_boxes", - box_id, - f"network_{box_id}.gpkg", - ) - ) - else: - box_network_add = gpd.read_file( - os.path.join( - output_dir, - "power_processed", - "all_boxes", - box_id, - f"network_{box_id}.gpkg", - ) - ) - box_network = box_network.append(box_network_add) - - box_network["link"] = box_network.apply( - lambda e: "__".join(sorted([e.from_id, e.to_id])), axis=1 - ) # consistent naming - - geom_dict = dict(zip(box_network["link"], box_network["geometry"])) - - transmission_master["geometry"] = transmission_master["link"].map( - geom_dict - ) # fix truncated geoms - - transmission_master.to_file(freq_hit_path, driver="GPKG") - - # Then aggregate - print("Aggregating reconstruction costs") - with fiona.open( - os.path.join(output_dir, "input", "admin-boundaries", f"gadm36_levels.gpkg"), - "r", - layer=layer_num, - ) as src_code: - code_geoms = [] - code_GIDs = [] - for feature in src_code: - if ( - feature["properties"]["GID_0"] in countries_relevant - ): # only include search in countries that contain targets - code_geoms.append(shape(feature["geometry"])) - code_GIDs.append(feature["properties"][f"GID_{layer_num}"]) - print("creating dataframe") - code_geoms_gpd = gpd.GeoDataFrame({"geometry": code_geoms, "code": code_GIDs}) - - code_geoms_gpd["len"] = [ - len(g.geoms) for g in code_geoms_gpd["geometry"] - ] # include number of polygons (higher the more computationally expensive intersect function is - code_geoms_gpd_placeholder = code_geoms_gpd.copy() - - code_geoms_gpd = code_geoms_gpd.sort_values("len", ascending=False) - - pre_len = len(transmission_master) - transmission_master = transmission_master[ - ~transmission_master.geometry.isna() - ] # remomve null geometries - post_len = len(transmission_master) - if post_len / pre_len < 0.7 and pre_len > 50: # small check - print( - "Possibly issue as 30%+ of transmission lines are being removed for having no geometry" - ) - - transmission_bounds = [t.bounds for t in transmission_master.geometry] - ( - transmission_master["lon_min"], - transmission_master["lat_min"], - transmission_master["lon_max"], - transmission_master["lat_max"], - ) = zip(*transmission_bounds) - - map_dict = {} - map_dict_ae = {} # annual expected - map_dict_ll = {} # len_lines - - for geom_area in tqdm( - code_geoms_gpd.itertuples(), total=len(code_geoms_gpd), desc="geom_intersect" - ): - - minx, miny, maxx, maxy = geom_area.geometry.bounds - s1 = time.time() - transmission_master_bounded = transmission_master[ - (transmission_master.lon_min >= minx) - & (transmission_master.lon_max <= maxx) - & (transmission_master.lat_min >= miny) - & (transmission_master.lat_max <= maxy) - ] # only check within bounding box - - if len(transmission_master_bounded) >= 1: - - bool_list = [ - True if t.intersects(geom_area.geometry) else False - for t in transmission_master_bounded.geometry - ] - - overlap_transmission = transmission_master_bounded[bool_list] - - if len(overlap_transmission) >= 1: - - map_dict_ll[geom_area.code] = eval_dist( - overlap_transmission - ) # note down degree length - - if ( - geom_area.code not in map_dict.keys() - and "reconstruction_cost" in overlap_transmission.columns.values - ): - map_dict[geom_area.code] = sum( - overlap_transmission.reconstruction_cost - * overlap_transmission.count_damage - ) - map_dict_ae[geom_area.code] = sum( - overlap_transmission.reconstruction_cost_annual_expected - * overlap_transmission.count_damage - ) - else: - map_dict[geom_area.code] = map_dict[geom_area.code] + sum( - overlap_transmission.reconstruction_cost - * overlap_transmission.count_damage - ) - map_dict_ae[geom_area.code] = map_dict_ae[geom_area.code] + sum( - overlap_transmission.reconstruction_cost_annual_expected - * overlap_transmission.count_damage - ) - - transmission_master = transmission_master[ - ~transmission_master.link.isin(overlap_transmission.link) - ] # remove from code_geoms_gpd_transmission_master - - code_geoms_gpd["reconstruction_cost_sum"] = ( - code_geoms_gpd["code"].map(map_dict).fillna(0) - ) - code_geoms_gpd["reconstruction_cost_avg"] = ( - code_geoms_gpd["reconstruction_cost_sum"] / storm_tot - ) # average over all hitting storms - code_geoms_gpd["reconstruction_cost_annual_expected"] = ( - code_geoms_gpd["code"].map(map_dict_ae).fillna(0) - ) - code_geoms_gpd["reconstruction_cost_annual_expected_fraction_normalised"] = ( - code_geoms_gpd["reconstruction_cost_annual_expected"] - / (code_geoms_gpd["code"].map(map_dict_ll)) - ).fillna( - 0 - ) # This metric divides by the total unit length of transmission lines in that region. - - code_geoms_gpd.to_file(recon_path, driver="GPKG") - print("written to file") diff --git a/workflow/power-tc/analyse/transmission_aggregate.smk b/workflow/power-tc/analyse/transmission_aggregate.smk deleted file mode 100644 index 474e5228..00000000 --- a/workflow/power-tc/analyse/transmission_aggregate.smk +++ /dev/null @@ -1,40 +0,0 @@ -""" -Gathers and aggregates statistics on frequency of damage of transmission lines -""" - -import os - -TRANSMISSION_OUT = [ - os.path.join( - config["output_dir"], - "power_output", - "statistics", - "aggregate", - "transmission_line_frequency_hit.gpkg", - ), - os.path.join( - config["output_dir"], - "power_output", - "statistics", - "aggregate", - "transmission_line_reconstruction_costs.gpkg", - ), -] - - -rule analyse_transmission: - input: - os.path.join( - STORM_IMPACT_STATISTICS_DIR, f"combined_storm_statistics_{config['central_threshold']}.csv" - ), - params: - output_dir=config["output_dir"], - aggregate_level=config["aggregate_level"], - region_eval=STORM_BASINS, - sample_eval=SAMPLES, - nh_eval=STORMS, - central_threshold=config["central_threshold"], - output: - TRANSMISSION_OUT, - script: - "./transmission_aggregate.py" diff --git a/workflow/power-tc/plot/customers_affected_by_storm.smk b/workflow/power-tc/map/customers_affected_by_storm.smk similarity index 100% rename from workflow/power-tc/plot/customers_affected_by_storm.smk rename to workflow/power-tc/map/customers_affected_by_storm.smk diff --git a/workflow/power-tc/plot/target_disruption.smk b/workflow/power-tc/map/target_disruption.smk similarity index 100% rename from workflow/power-tc/plot/target_disruption.smk rename to workflow/power-tc/map/target_disruption.smk diff --git a/workflow/power-tc/analyse/network_components.py b/workflow/power-tc/network_components.py similarity index 100% rename from workflow/power-tc/analyse/network_components.py rename to workflow/power-tc/network_components.py diff --git a/workflow/power-tc/analyse/network_components.smk b/workflow/power-tc/network_components.smk similarity index 100% rename from workflow/power-tc/analyse/network_components.smk rename to workflow/power-tc/network_components.smk diff --git a/workflow/power-tc/plot/EAD_EACA-finder.py b/workflow/power-tc/plot/EAD_EACA-finder.py deleted file mode 100644 index ae68fad1..00000000 --- a/workflow/power-tc/plot/EAD_EACA-finder.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Finds EAD and EACA - -Must have one named constant -""" -import os - -import numpy as np -import pandas as pd - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - models_future = snakemake.params["models_future"] # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - - -def traprule(lst, spacing): - """Trapezium rule""" - return 0.5 * spacing * (lst[0] + lst[-1] + sum(lst[1:-1])) - - -metrics = ["EAD", "EACA"] -metric_unit = {"EAD": "USD", "EACA": "people"} -metric_file = { - "EAD": "empirical_reconstruction cost_plotting_data.csv", - "EACA": "empirical_effective population affected_plotting_data.csv", -} - - -for metric in metrics: - write_lst = [] - data_constant = pd.read_csv( - os.path.join( - output_dir, - "power_output-constant", - "statistics", - "empirical", - "empirical_plotting_data", - metric_file[metric], - ) - ) - print(f"\n\n {metric}") - for ii, model in enumerate(models_future): - - model_path = os.path.join( - output_dir, - f"power_output-{model}", - "statistics", - "empirical", - "empirical_plotting_data", - metric_file[metric], - ) - if ii == 0: - data = pd.read_csv(model_path) - else: - data_new = pd.read_csv(model_path) - data = pd.merge( - data, data_new, how="outer", on=["x"], suffixes=(f"_A{ii}", f"_B{ii}") - ) - - col_lst = ["y_cen", "y_min", "y_max"] - for col_id in col_lst: - data[col_id] = data[[c for c in data.columns if col_id in c]].mean(axis=1) - data = data[col_lst + ["x"]] # [:-7] # data specific - - data_dict = {"current climate": data_constant, "future climate": data} - for ii, (df_name, df) in enumerate(data_dict.items()): - x = df["x"] - y_cen = df["y_cen"] - y_min = df["y_min"] - y_max = df["y_max"] - - metric_X = int(traprule(np.array(y_cen), 1 / max(x))) - metric_U = int(traprule(np.array(y_min), 1 / max(x))) - metric_L = int(traprule(np.array(y_max), 1 / max(x))) - - metric_unit_indiv = metric_unit[metric] - details_metric = f'{df_name}: {metric} = {format(metric_X, ".1E")} {metric_unit_indiv}, UR: {format(metric_L, ".1E")} {metric_unit_indiv} - {format(metric_U, ".1E")} {metric_unit_indiv}' - print(details_metric) - write_lst.append(details_metric) - - txt_file = os.path.join(output_dir, "power_figures", f"{metric}_total.txt") - with open(txt_file, "w") as f: - f.write(write_lst[0] + " || ") - f.write(write_lst[1]) diff --git a/workflow/power-tc/plot/cross_correlation.py b/workflow/power-tc/plot/cross_correlation.py deleted file mode 100644 index 73f9c7c7..00000000 --- a/workflow/power-tc/plot/cross_correlation.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Plots heatmap with modifications""" -import os - -import matplotlib.pyplot as plt -import pandas as pd - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - inputs = snakemake.input # type: ignore - remove = snakemake.params["remove_countries"] # type: ignore - name_cc_constant = snakemake.params["name_cc_constant"] # type: ignore - name_cc_future = snakemake.params["name_cc_future"] # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - - -assert type(remove) == list - - -plot_path = os.path.join(output_dir, "power_figures") -if not os.path.exists(plot_path): - os.makedirs(plot_path) - - -def plot_relation_matrix(matrix, country_index, title, fig_num): - """Plots and saves imshow""" - country_index_plot = country_index.copy() - f = plt.figure(fig_num) - f.set_figwidth(10) - f.set_figheight(8) - - for c in remove: # update, remove - if c in matrix.columns: - matrix = matrix.drop(c, axis=1) - matrix = matrix.drop(c, axis=0) - del country_index_plot[c] - - country_index_plot = dict( - zip(country_index_plot.keys(), range(len(country_index_plot))) - ) # update - plt.imshow(matrix, cmap="viridis") - plt.xlabel("Country B") - plt.yticks( - list(country_index_plot.values()), labels=list(country_index_plot.keys()) - ) - plt.xticks( - list(country_index_plot.values()), - labels=list(country_index_plot.keys()), - rotation=90, - ) - plt.ylabel("Country A") - - # plt.title(title) - cbar = plt.colorbar() - cbar.set_label(f"JHR ({title}) [-]", rotation=90) - - plt.show() - plt.savefig(os.path.join(plot_path, f"JHR_{title}"), bbox_inches="tight") - print(f"Saved {title}") - - -files_sorted = [[inputs[0]], inputs[1:]] - - -for ii, files in enumerate(files_sorted): - for jj, file_indiv in enumerate(files): - if jj == 0: - data = pd.read_csv(file_indiv, index_col=0) - else: - data = data + pd.read_csv(file_indiv, index_col=0) - - data = data / (jj + 1) - c_dict = dict(zip(data.columns, range(len(data.columns)))) - if ii == 0: # constant - name = name_cc_constant - else: - name = name_cc_future - plot_relation_matrix(data, c_dict, name, ii) diff --git a/workflow/power-tc/plot/diff_agg.py b/workflow/power-tc/plot/diff_agg.py deleted file mode 100644 index 33f52338..00000000 --- a/workflow/power-tc/plot/diff_agg.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Takes in data/ file startswith1 minus startwith2 - -Best to have 1 as future - -targets: eff pop ae and code -recon agg: recon cost ae and code -recon agg norm: recon cost ae frac norm and code -recon indiv: recon cost ae and link -""" -import os -import time - -import geopandas as gpd - -try: - metric = snakemake.params["metric"] # type: ignore - merge_key = snakemake.params["merge_key"] # type: ignore - inputs = snakemake.input # type: ignore - output = snakemake.output # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - -# first input is future -assert len(inputs) == 2 -assert type(output) != list -output = str(output) - -d = dict() # master dict -g = dict() # geom -for ii, file in enumerate(inputs): - - mean_file = gpd.read_file(file) - - for jj in range(len(mean_file)): - code = mean_file.iloc[jj][merge_key] - metric_val = mean_file.iloc[jj][metric] - if code not in d: - d[code] = [metric_val] - g[code] = mean_file.iloc[jj]["geometry"] - else: - d[code] = d[code] + [metric_val] - -print(f"len(d) pre = {len(d)}") -d = {k: v for k, v in d.items() if len(v) == 2} -print(f"len(d) post = {len(d)}") - -p = dict() # percentage -for k, v in d.items(): - d_orig = d[k][1] - d[k] = d[k][0] - d[k][1] - if d[k] != 0: - p[k] = 100 * d[k] / d_orig - else: - p[k] = 0 - - -gdf = gpd.GeoDataFrame({merge_key: g.keys(), "geometry": g.values()}) -rem = "REMOVE" -gdf[metric] = gdf[merge_key].map(d).fillna(rem) -perc_metric = f"perc_{metric}" -gdf[perc_metric] = gdf[merge_key].map(p).fillna(rem) -b = [False if gdf.iloc[jj][metric] == rem else True for jj in range(len(gdf))] -gdf = gdf[b] - -for col in gdf.columns: - if col not in ["geometry", merge_key]: - gdf[col] = gdf[col].astype(float) - -output_folder = os.path.dirname(output) -if not os.path.exists(output_folder): - os.makedirs(output_folder) - -gdf.to_file(output, driver="GPKG") - -print("done") -time.sleep(1) diff --git a/workflow/power-tc/plot/diff_cross_correlation.py b/workflow/power-tc/plot/diff_cross_correlation.py deleted file mode 100644 index f6662b71..00000000 --- a/workflow/power-tc/plot/diff_cross_correlation.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Plots heatmap difference (future mean - current) with modifications""" -import os - -import matplotlib.pyplot as plt -import pandas as pd - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - inputs = snakemake.input # type: ignore - remove = snakemake.params["remove_countries"] # type: ignore - name_cc_future_perc_diff = snakemake.params["name_cc_future_perc_diff"] # type: ignore - name_cc_future_diff = snakemake.params["name_cc_future_diff"] # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - - -assert type(remove) == list - - -plot_path = os.path.join(output_dir, "power_figures") -if not os.path.exists(plot_path): - os.makedirs(plot_path) - - -def plot_relation_matrix(matrix, country_index, title, fig_num, perc): - """Plots and saves imshow""" - - country_index_plot = country_index.copy() - f = plt.figure(fig_num) - f.set_figwidth(10) - f.set_figheight(8) - - for c in remove: # update, remove - if c in matrix.columns: - matrix = matrix.drop(c, axis=1) - matrix = matrix.drop(c, axis=0) - del country_index_plot[c] - - country_index_plot = dict( - zip(country_index_plot.keys(), range(len(country_index_plot))) - ) # update - - if perc == True: - plt.imshow(matrix, cmap="RdBu_r", vmax=50, vmin=-50) - ext = " [% of constant climate]" - else: - plt.imshow(matrix, cmap="RdBu_r", vmax=0.1, vmin=-0.1) - ext = " [-]" - plt.xlabel("Country B") - plt.yticks( - list(country_index_plot.values()), labels=list(country_index_plot.keys()) - ) - plt.xticks( - list(country_index_plot.values()), - labels=list(country_index_plot.keys()), - rotation=90, - ) - plt.ylabel("Country A") - - # plt.title(title) - cbar = plt.colorbar() - cbar.set_label(f"JHR change{ext}", rotation=90) - - plt.show() - plt.savefig(os.path.join(plot_path, f"JHR_{title}"), bbox_inches="tight") - print(f"Saved {title}") - - -files_sorted = [[inputs[0]], inputs[1:]] - - -datas = [] -for ii, files in enumerate(files_sorted): - for jj, file_indiv in enumerate(files): - print(file_indiv) - if jj == 0: - data = pd.read_csv(file_indiv, index_col=0) - else: - data = data + pd.read_csv(file_indiv, index_col=0) - - data = data / (jj + 1) - datas.append(data) - c_dict = dict(zip(data.columns, range(len(data.columns)))) - -assert len(datas) == 2 - -# abs -data_plot = 100 * (datas[1] - datas[0]) / datas[0] # percentage -name = name_cc_future_perc_diff -plot_relation_matrix(data_plot, c_dict, name, 0, True) - -# perc -data_plot = datas[1] - datas[0] -name = name_cc_future_diff -plot_relation_matrix(data_plot, c_dict, name, 1, False) diff --git a/workflow/power-tc/plot/fig_CDFs.smk b/workflow/power-tc/plot/fig_CDFs.smk deleted file mode 100644 index 487a7ff5..00000000 --- a/workflow/power-tc/plot/fig_CDFs.smk +++ /dev/null @@ -1,65 +0,0 @@ -"""Plots the cdfs for EAD and EACA - -""" -import os - -cdf_EACA = "empirical_effective population affected_plotting_data" -cdf_EAD = "empirical_reconstruction cost_plotting_data" -in_cdf_EAD = [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "empirical", - "empirical_plotting_data", - f"{cdf_EAD}.csv", - ) - for model in models_all -] -in_cdf_EACA = [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "empirical", - "empirical_plotting_data", - f"{cdf_EACA}.csv", - ) - for model in models_all -] -out_cdf_EAD = os.path.join( - config["output_dir"], "power_figures", f"empirical_overlay_{cdf_EAD}.png" -) -out_cdf_EACA = os.path.join( - config["output_dir"], "power_figures", f"empirical_overlay_{cdf_EACA}.png" -) - - -rule fig_cdfs_EAD: - input: - in_cdf_EAD, - params: - output_dir=config["output_dir"], - EACA=False, - central_threshold=config["central_threshold"], - minimum_threshold=config["minimum_threshold"], - maximum_threshold=config["maximum_threshold"], - output: - out_cdf_EAD, - script: - "./plot_together.py" - - -rule fig_cdfs_EACA: - input: - in_cdf_EACA, - params: - output_dir=config["output_dir"], - EACA=True, # EAD - central_threshold=config["central_threshold"], - minimum_threshold=config["minimum_threshold"], - maximum_threshold=config["maximum_threshold"], - output: - out_cdf_EACA, - script: - "./plot_together.py" diff --git a/workflow/power-tc/plot/fig_EACA.smk b/workflow/power-tc/plot/fig_EACA.smk deleted file mode 100644 index c0d03d0d..00000000 --- a/workflow/power-tc/plot/fig_EACA.smk +++ /dev/null @@ -1,111 +0,0 @@ -""" -Aggregates EACA and finds difference to future. Plots current and difference -""" -import os - -SORT_BY_INCREASING_SEVERITY = str(config["increased_severity_sort"])[0] -assert SORT_BY_INCREASING_SEVERITY in ["T", "F"] - -metric_EACA = "effective_population_anually-expected" -metric_EACA_perc = "perc_effective_population_anually-expected" -merge_key_EACA = "code" - -out_agg_EACA_file = os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"mean_{merge_key_EACA}_{metric_EACA}.gpkg", -) -out_agg_EACA_plot_perc = os.path.join( - config["output_dir"], "power_figures", "EACA_perc_diff_aggregate.png" -) -out_agg_EACA_plot = os.path.join( - config["output_dir"], "power_figures", "EACA_current_aggregate.png" -) - -out_diff_EACA_file = os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"difference_{merge_key_EACA}_{metric_EACA}.gpkg", -) - - -rule fig_aggregate_EACA: - input: - in_agg_EACA=[ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "aggregate", - f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent_aggregated_region.gpkg", - ) - for model in models_future - ], - params: - output_dir=config["output_dir"], - metric=metric_EACA, - merge_key=merge_key_EACA, - output: - out_agg_EACA=out_agg_EACA_file, # - script: - "./mean_agg.py" - - -rule fig_diff_EACA: - input: - [ - rules.fig_aggregate_EACA.output.out_agg_EACA, - os.path.join( - config["output_dir"], - f"power_output-constant", - "statistics", - "aggregate", - f"targets_geo_top{config['top_select']}{SORT_BY_INCREASING_SEVERITY}percent_aggregated_region.gpkg", - ), - ], # first file is future - params: - output_dir=config["output_dir"], - metric=metric_EACA, - merge_key=merge_key_EACA, - output: - out_diff_EACA=out_diff_EACA_file, - script: - "./diff_agg.py" - - -rule fig_plot_current_EACA: - """Plots current""" - input: - rules.fig_aggregate_EACA.input.in_agg_EACA[0], # constant - params: - output_dir=config["output_dir"], - metric=metric_EACA, - vmax=50000, - vmin=0, - cmap="Reds", - linewidth=None, - legend_name="EACA (constant climate) [people]", - output: - out_agg_EACA_plot, - script: - "./plotter.py" - - -rule fig_plot_diff_EACA: - """Plots difference""" - input: - rules.fig_diff_EACA.output.out_diff_EACA, - params: - output_dir=config["output_dir"], - metric=metric_EACA_perc, - vmax=30, - vmin=-30, - cmap="RdBu_r", - linewidth=None, - legend_name="EACA change [% of constant climate]", - output: - out_agg_EACA_plot_perc, - script: - "./plotter.py" diff --git a/workflow/power-tc/plot/fig_EAD.smk b/workflow/power-tc/plot/fig_EAD.smk deleted file mode 100644 index 235d404f..00000000 --- a/workflow/power-tc/plot/fig_EAD.smk +++ /dev/null @@ -1,276 +0,0 @@ -"""Aggregates EAD and finds difference. For indivudal asset and aggregate - -""" -import os - - -## aggregated (absolute) ## -metric_EAD_agg = "reconstruction_cost_annual_expected" -merge_key_EAD_agg = "code" - -## aggregated but normalised ## -metric_EAD_agg_norm = "reconstruction_cost_annual_expected_fraction_normalised" -merge_key_EAD_agg_norm = "code" - -## individual assets ## -metric_EAD_indiv = "reconstruction_cost_annual_expected" -metric_EAD_indiv_perc = "perc_" + metric_EAD_indiv -metric_EAD_indiv_perkm = metric_EAD_indiv + "_per_km" -merge_key_EAD_indiv = "link" - - -### outputs ### -out_diff_EAD_agg = os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"difference_{merge_key_EAD_agg}_{metric_EAD_agg}.gpkg", -) -out_diff_EAD_agg_norm = os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"difference_{merge_key_EAD_agg_norm}_{metric_EAD_agg_norm}.gpkg", -) -out_diff_EAD_indiv = os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"difference_{merge_key_EAD_indiv}_{metric_EAD_indiv}.gpkg", -) - -out_diff_EAD_indiv_plot = os.path.join( - config["output_dir"], "power_figures", "EAD_difference_perc_recon.png" -) -out_current_EAD_indiv_plot = os.path.join( - config["output_dir"], "power_figures", "EAD_current_recon_per_km.png" -) -out_diff_EAD_plot = os.path.join( - config["output_dir"], "power_figures", "EAD_difference_recon.png" -) -out_diff_EAD_plot_norm = os.path.join( - config["output_dir"], "power_figures", "EAD_difference_recon_norm.png" -) - - -## aggregated (absolute) ## -rule fig_aggregate_EAD_agg: - input: - [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "aggregate", - "transmission_line_reconstruction_costs.gpkg", - ) - for model in models_future - ], - params: - output_dir=config["output_dir"], - metric=metric_EAD_agg, - merge_key=merge_key_EAD_agg, - output: - out_agg_EAD_agg=os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"mean_{merge_key_EAD_agg}_{metric_EAD_agg}.gpkg", - ), - script: - "./mean_agg.py" - - -rule fig_diff_EAD_agg: - input: - [ - rules.fig_aggregate_EAD_agg.output.out_agg_EAD_agg, - os.path.join( - config["output_dir"], - f"power_output-constant", - "statistics", - "aggregate", - "transmission_line_reconstruction_costs.gpkg", - ), - ], # first file is future - params: - metric=metric_EAD_agg, - merge_key=merge_key_EAD_agg, - output: - out_diff_EAD_agg, - script: - "./diff_agg.py" - - -# rule fig_diff_EAD_agg_plot: -# """Not ideal plot as varies a lot by total km in region""" -# input: -# out_diff_EAD_agg -# params: -# output_dir = config['output_dir'], -# metric = metric_EAD_agg, -# vmax = 500000, -# vmin = -500000, -# cmap = 'RdBu_r', -# linewidth = None -# output: -# out_diff_EAD_plot -# script: -# './plotter.py' - - -## aggregated but normalised ## -rule fig_aggregate_EAD_agg_norm: - input: - [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "aggregate", - "transmission_line_reconstruction_costs.gpkg", - ) - for model in models_future - ], - params: - output_dir=config["output_dir"], - metric=metric_EAD_agg_norm, - merge_key=merge_key_EAD_agg_norm, - output: - out_agg_EAD_agg_norm=os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"mean_{merge_key_EAD_agg_norm}_{metric_EAD_agg_norm}.gpkg", - ), - script: - "./mean_agg.py" - - -rule fig_diff_EAD_agg_norm: - input: - [ - rules.fig_aggregate_EAD_agg_norm.output.out_agg_EAD_agg_norm, - os.path.join( - config["output_dir"], - f"power_output-constant", - "statistics", - "aggregate", - "transmission_line_reconstruction_costs.gpkg", - ), - ], # first file is future - params: - metric=metric_EAD_agg_norm, - merge_key=merge_key_EAD_agg_norm, - output: - out_diff_EAD_agg_norm, - script: - "./diff_agg.py" - - -# rule fig_diff_EAD_agg_plot_norm: -# input: -# out_diff_EAD_agg -# params: -# output_dir = config['output_dir'] -# metric = metric_EAD_agg_norm, -# vmax = 5000, -# vmin = -5000, -# cmap = 'RdBu_r', -# linewidth = None -# output: -# out_diff_EAD_plot_norm -# script: -# './plotter.py' -# - - -## individual assets ## -rule fig_aggregate_EAD_indiv: - input: - in_agg_EAD_indiv=[ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "aggregate", - "transmission_line_frequency_hit.gpkg", - ) - for model in models_future - ], - params: - output_dir=config["output_dir"], - metric=metric_EAD_indiv, - merge_key=merge_key_EAD_indiv, - output: - out_agg_EAD_indiv=os.path.join( - config["output_dir"], - "power_figures", - "intermediate_files", - f"mean_{merge_key_EAD_indiv}_{metric_EAD_indiv}.gpkg", - ), - script: - "./mean_agg.py" - - -rule fig_diff_EAD_indiv: - input: - [ - rules.fig_aggregate_EAD_indiv.output.out_agg_EAD_indiv, - os.path.join( - config["output_dir"], - f"power_output-constant", - "statistics", - "aggregate", - "transmission_line_frequency_hit.gpkg", - ), - ], # first file is future - params: - metric=metric_EAD_indiv, - merge_key=merge_key_EAD_indiv, - output: - out_diff_EAD_indiv, - script: - "./diff_agg.py" - - -rule fig_diff_EAD_indiv_plot: - """Plots difference""" - input: - out_diff_EAD_indiv, - params: - output_dir=config["output_dir"], - metric=metric_EAD_indiv_perc, - vmax=100, - vmin=-100, - cmap="RdBu_r", - linewidth=master_linewidth, - legend_name="EAD change per km [% of constant climate]", - output: - out_diff_EAD_indiv_plot, - script: - "./plotter.py" - - -rule fig_current_EAD_indiv_plot: - """Plots current""" - input: - os.path.join( - config["output_dir"], - f"power_output-constant", - "statistics", - "aggregate", - "transmission_line_frequency_hit.gpkg", - ), - params: - output_dir=config["output_dir"], - metric=metric_EAD_indiv_perkm, - vmax=250, - vmin=0, - cmap="Reds", - linewidth=master_linewidth, - legend_name="EAD per km [USD]", - output: - out_current_EAD_indiv_plot, - script: - "./plotter.py" diff --git a/workflow/power-tc/plot/fig_EAD_EACA.smk b/workflow/power-tc/plot/fig_EAD_EACA.smk deleted file mode 100644 index 9f8c5527..00000000 --- a/workflow/power-tc/plot/fig_EAD_EACA.smk +++ /dev/null @@ -1,43 +0,0 @@ -""" -Writes EAD and EACA metrics to file in power_figures -""" - -import os - -out_EAD_EACA = [ - os.path.join(config["output_dir"], "power_figures", f"{metric}_total.txt") - for metric in ["EAD", "EACA"] -] - - -rule fig_EAD_EACA: - input: - [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "empirical", - "empirical_plotting_data", - "empirical_effective population affected_plotting_data.csv", - ) - for model in models_all - ], - [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "empirical", - "empirical_plotting_data", - "empirical_reconstruction cost_plotting_data.csv", - ) - for model in models_all - ], - params: - output_dir=config["output_dir"], - models_future=models_future, - output: - out_EAD_EACA, - script: - "./EAD_EACA-finder.py" diff --git a/workflow/power-tc/plot/fig_cross_correlation.smk b/workflow/power-tc/plot/fig_cross_correlation.smk deleted file mode 100644 index 570e4d6c..00000000 --- a/workflow/power-tc/plot/fig_cross_correlation.smk +++ /dev/null @@ -1,58 +0,0 @@ -"""Plots the cross correlation JHR and future differences - -""" -import os - - -in_cc = [ - os.path.join( - config["output_dir"], - f"power_output-{model}", - "statistics", - "empirical", - "empirical_plotting_data", - "country_matrix_both.csv", - ) - for model in models_all -] - -out_cc_file = [ - os.path.join(config["output_dir"], "power_figures", f"JHR_{name_cc_constant}.png"), - os.path.join(config["output_dir"], "power_figures", f"JHR_{name_cc_future}.png"), -] -out_cc_file_diff = [ - os.path.join( - config["output_dir"], "power_figures", f"JHR_{name_cc_future_diff}.png" - ), - os.path.join( - config["output_dir"], "power_figures", f"JHR_{name_cc_future_perc_diff}.png" - ), -] - - -rule fig_cross_correlation: - input: - in_cc, - params: - output_dir=config["output_dir"], - remove_countries=remove_countries, - name_cc_future=name_cc_future, - name_cc_constant=name_cc_constant, - output: - out_cc=out_cc_file, - script: - "./cross_correlation.py" - - -rule fig_cross_correlation_diff: - input: - in_cc, - params: - output_dir=config["output_dir"], - remove_countries=remove_countries, - name_cc_future_diff=name_cc_future_diff, - name_cc_future_perc_diff=name_cc_future_perc_diff, - output: - out_cc_file_diff, - script: - "./diff_cross_correlation.py" diff --git a/workflow/power-tc/plot/fig_initial.py b/workflow/power-tc/plot/fig_initial.py deleted file mode 100644 index 4f239e8c..00000000 --- a/workflow/power-tc/plot/fig_initial.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Performs initial figure generation checks""" - -import os - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - models_all = snakemake.params["models_all"] # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - - -all_folders = [ - os.path.join(output_dir, f"power_output-{model}") for model in models_all -] - -for folder in all_folders: # check exist - if not os.path.exists(folder): - raise RuntimeWarning( - f"\n----------------\nFolder {folder} does not exist. Can not proceed. Check that the workflow for {os.path.basename(folder)} has been completed and/or correctly named in {output_dir} directory.\n----------------\n" - ) - -folder_figs = os.path.join(output_dir, "power_figures") -if not os.path.exists(folder_figs): - os.makedirs(folder_figs) - -txt_done = os.path.join(folder_figs, "initial_checks.txt") -with open(txt_done, "w") as f: - f.writelines("Initial checks complete") - print(f"written to {txt_done}") diff --git a/workflow/power-tc/plot/fig_initial.smk b/workflow/power-tc/plot/fig_initial.smk deleted file mode 100644 index 0a061929..00000000 --- a/workflow/power-tc/plot/fig_initial.smk +++ /dev/null @@ -1,33 +0,0 @@ -"""Checks this part of workflow can be proceeded with and provides common variables - -Reminder: do not run figures_all until analyse_all has been completed! -""" - - -import os - -# prelim variables -models_future = ["CMCC-CM2-VHR4", "CNRM-CM6-1-HR", "EC-Earth3P-HR", "HadGEM3-GC31-HM"] -models_all = ["constant"] + models_future # constant MUST be first -remove_countries = ["USA", "VEN", "CYM", "VCT", "BHS", "ATG", "DMA", "LCA", "TTO"] -name_cc_constant = "constant climate" -name_cc_future = "future climate" -name_cc_future_diff = "Future minus current" -name_cc_future_perc_diff = "Future minus current (change in percent of current)" - -master_linewidth = 0.55 # linewidth when lines in GeoDataFrame - - -initial_checks = os.path.join( - config["output_dir"], "power_figures", "initial_checks.txt" -) - - -rule fig_checks: - params: - output_dir=config["output_dir"], - models_all=models_all, - output: - initial_checks, - script: - "./fig_initial.py" diff --git a/workflow/power-tc/plot/fig_master.smk b/workflow/power-tc/plot/fig_master.smk deleted file mode 100644 index bf086e46..00000000 --- a/workflow/power-tc/plot/fig_master.smk +++ /dev/null @@ -1,24 +0,0 @@ -"""for all figures - -""" -import os - - -rule figures_all: - input: - initial_checks, - out_diff_EACA_file, - out_diff_EAD_agg, - out_diff_EAD_agg_norm, - out_diff_EAD_indiv, - out_cc_file_diff, - out_cc_file, - out_cdf_EAD, - out_cdf_EACA, - out_agg_EACA_plot, - out_agg_EACA_plot_perc, - out_diff_EAD_indiv_plot, - out_current_EAD_indiv_plot, - out_EAD_EACA, - #out_diff_EAD_plot, - #out_diff_EAD_plot_norm diff --git a/workflow/power-tc/plot/mean_agg.py b/workflow/power-tc/plot/mean_agg.py deleted file mode 100644 index 90b9b44a..00000000 --- a/workflow/power-tc/plot/mean_agg.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Takes anything in folder data/ and aggregated to admin keys""" -import os -import time - -import geopandas as gpd - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - metric = snakemake.params["metric"] # type: ignore - merge_key = snakemake.params["merge_key"] # type: ignore - inputs = snakemake.input # type: ignore - output = snakemake.output # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - - -plot_path = os.path.join(output_dir, "power_figures") -if not os.path.exists(plot_path): - os.makedirs(plot_path) - - -assert type(output) != list -output = str(output) -d = dict() -g = dict() -for ii, file in enumerate(inputs): - - mean_file = gpd.read_file(file) - for jj in range(len(mean_file)): - code = mean_file.iloc[jj][merge_key] - metric_val = mean_file.iloc[jj][metric] - if code not in d: - d[code] = [metric_val] - g[code] = mean_file.iloc[jj]["geometry"] - else: - d[code] = d[code] + [metric_val] - - -for k, v in d.items(): - d[k] = sum(d[k]) / len(d[k]) - -gdf = gpd.GeoDataFrame({merge_key: g.keys(), "geometry": g.values()}) -gdf[metric] = gdf[merge_key].map(d).fillna(0) - -output_folder = os.path.dirname(output) -if not os.path.exists(output_folder): - os.makedirs(output_folder) - -gdf.to_file(output, driver="GPKG") - -print("done") -time.sleep(1) diff --git a/workflow/power-tc/plot/plot_together.py b/workflow/power-tc/plot/plot_together.py deleted file mode 100644 index cb365355..00000000 --- a/workflow/power-tc/plot/plot_together.py +++ /dev/null @@ -1,112 +0,0 @@ -"""Takes all csv files in /data/ and plots them, legend is the name of the csv file. - -Takes mean of all files unless 'constant' which has separate curve. -""" -import os - -import matplotlib.pyplot as plt -import pandas as pd - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - inputs = snakemake.input # type: ignore - output = snakemake.output # type: ignore - EACA = snakemake.params[ # type: ignore - "EACA" - ] # set True if effective pop affected, else assumes EAD - central_threshold = snakemake.params["central_threshold"] # type: ignore - minimum_threshold = snakemake.params["minimum_threshold"] # type: ignore - maximum_threshold = snakemake.params["maximum_threshold"] # type: ignore -except: - raise RuntimeError("Please use snakemake to define inputs") - - -assert type(output) != list -output = str(output) - -plot_path = os.path.join(output_dir, "power_figures") -if not os.path.exists(plot_path): - os.makedirs(plot_path) - - -metric = "EAD" -unit = "USD" -if EACA == True: - metric = "EACA" - unit = "people" - - -csv_files = inputs[1:] # future - -data_constant = pd.read_csv(inputs[0]) - - -for ii, csv_file in enumerate(csv_files): - - print(csv_file) - if ii == 0: - data = pd.read_csv(csv_file) - else: - data_new = pd.read_csv(csv_file) - data = pd.merge( - data, data_new, how="outer", on=["x"], suffixes=(f"_A{ii}", f"_B{ii}") - ) - -col_lst = ["y_cen", "y_min", "y_max"] -for col_id in col_lst: - data[col_id] = data[[c for c in data.columns if col_id in c]].mean(axis=1) - - -data = data[col_lst + ["x"]] -# if not EACA: -# data = data[:-7] # dependent on data - - -data_dict = {"future climate": data, "current climate": data_constant} -plt.figure(figsize=(10, 7), dpi=100) -for ii, (df_name, df) in enumerate(data_dict.items()): - x = df["x"] - y_cen = df["y_cen"] - y_min = df["y_min"] - y_max = df["y_max"] - c3 = (138 / 256 / (ii + 1), 171 / 256 / (ii + 1), 1) - c2 = (1 / (ii + 1), 0, 1 - 1 / (ii + 1)) - c1 = (0, 1 - 1 / (ii + 1), 1 / (ii + 1)) - - cen_name = f"{df_name}" - if EACA == True: - plt.fill_between(x, y_min, y_max, color=c1, alpha=0.2) - plt.plot( - x, - y_min, - color=c1, - linestyle=":", - label=f"{df_name} - Minimum wind threshold ({minimum_threshold}m/s)", - ) # plot interpolated line - plt.plot( - x, - y_max, - color=c1, - linestyle="--", - label=f"{df_name} - Maximum wind threshold ({maximum_threshold}m/s)", - ) # plot interpolated line - cen_name = f"{df_name} - Central wind threshold ({central_threshold}m/s)" - - plt.plot( - x, - y_cen, - color=c2, - label=cen_name, - ) # plot interpolated line - plt.scatter(x, y_cen, s=2, color=c2) # plot data points - -plt.xlabel("Return Period [years]") -plt.ylabel(f"{metric} [{unit}]") -plt.grid(axis="both", which="both") -plt.xscale("log") - -plt.legend() - -plt.savefig(output, bbox_inches="tight") - -plt.show() diff --git a/workflow/power-tc/plot/plotter.py b/workflow/power-tc/plot/plotter.py deleted file mode 100644 index 0d39b568..00000000 --- a/workflow/power-tc/plot/plotter.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Plotter for caribbean region""" -import os - -import geopandas as gpd -import matplotlib.pyplot as plt -from mpl_toolkits.axes_grid1 import make_axes_locatable -from shapely.geometry import LineString - -try: - output_dir = snakemake.params["output_dir"] # type: ignore - plotfile = str(snakemake.input) # type: ignore - output = str(snakemake.output) # type: ignore - metric = snakemake.params["metric"] # type: ignore - vmax = snakemake.params["vmax"] # type: ignore - vmin = snakemake.params["vmin"] # type: ignore - cmap = snakemake.params["cmap"] # type: ignore - linewidth = snakemake.params["linewidth"] # type: ignore - legend_name = snakemake.params["legend_name"] # type: ignore -except: - plotfile = "N/A" - output = "N/A" - metric = "effective_population_anually-expected" - metric = "perc_reconstruction_cost_annual_expected" - vmax = 30 - vmin = -30 - cmap = "RdBu_r" - linewidth = 0.55 - output_dir = "results" - legend_name = "LEG NAME" - raise RuntimeError("Please use snakemake to define inputs") - -max_dist = 0.5 - - -world_file = os.path.join(output_dir, "input", "admin-boundaries", "gadm36_levels.gpkg") -world = gpd.read_file(world_file, layer=0) - -minx = -87 -maxx = -63 -maxy = 24 -miny = 16 - - -def eval_dist_lst(linestring_df): - """Evaluate the coordinate dataframe and returns distance list in km""" - - lst = [] - for ii in range(len(linestring_df)): - dist_tot = 0 - if type(linestring_df.iloc[ii].geometry) == type( - LineString([[1, 2], [3, 4]]) - ): # check is linestring - line_coords = list( - linestring_df.iloc[ii].geometry.coords - ) # extract the coordinates of a row - dist_tot = len(line_coords) - - else: # multistring - for ms in range(len(linestring_df.iloc[ii]["geometry"].geoms)): - line_coords = list( - linestring_df.iloc[ii].geometry[ms].coords - ) # extract the coordinates of a row - dist_tot += len(line_coords) - - lst.append(dist_tot) - - return lst - - -fig, ax = plt.subplots(1, 1, frameon=False) - -data = gpd.read_file(plotfile) -divider = make_axes_locatable(ax) -cax = divider.append_axes("right", size="5%", pad=0.1) - - -ax.set_xlim(minx, maxx) -ax.set_ylim(miny, maxy) - - -# filter data -if linewidth: # if not None - data["len"] = eval_dist_lst(data) # number of points on linestring - data["geolen"] = [geom[1].length for geom in data.geometry.items()] - data = data[ - ~((data.len == 2) & (data.geolen > max_dist)) - ] # filter overly long for visual - - -if ( - vmax == "evenmax" and vmin == "evenmin" -): # ensure either side of zero is equal distance from vmax and vmin - vmax = max(data[metric].max(), -data[metric].min()) - vmin = max(-data[metric].max(), data[metric].min()) - - -if vmax == "max": - vmax = data[metric].max() -if vmin == "min": - vmin = data[metric].min() - - -world.plot(ax=ax, color=(0.9, 0.9, 0.9)) -data.plot( - column=metric, - ax=ax, - linewidth=linewidth, - legend=True, - cmap=cmap, - vmax=vmax, - vmin=vmin, - cax=cax, - legend_kwds={"label": legend_name}, -) -data.boundary.plot(ax=ax, linewidth=0.1, color=(0.5, 0.5, 0.5)) -fig.set_figheight(11) -fig.set_figwidth(20) -plt.savefig(output, dpi=200, bbox_inches="tight") -print(f"Saved {output}") From ccf2aeee384ae94c048efa7053d2218f827dd265 Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 6 Feb 2024 11:16:02 +0000 Subject: [PATCH 5/6] Lift global TC variable to Snakefile --- workflow/Snakefile | 8 ++++++-- .../tc_workflow_global_variables.smk | 18 ------------------ 2 files changed, 6 insertions(+), 20 deletions(-) delete mode 100644 workflow/tropical-cyclone/tc_workflow_global_variables.smk diff --git a/workflow/Snakefile b/workflow/Snakefile index 55666c65..d43f3d62 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -95,8 +95,12 @@ wildcard_constraints: # may be upper or lower, one 'f' or two TIFF_FILE="[^\/\.\s]+\.[tT][iI][fF][fF]?", -# generate values for global variables used across rules -include: "tropical-cyclone/tc_workflow_global_variables.smk" +# how many samples is each storm track dataset split into? +SAMPLES_PER_TRACKSET = { + "IBTrACS": 1, + "STORM": 10, + "IRIS": 10, +} ##### load rules ##### include: "context/coastlines.smk" diff --git a/workflow/tropical-cyclone/tc_workflow_global_variables.smk b/workflow/tropical-cyclone/tc_workflow_global_variables.smk deleted file mode 100644 index 753356be..00000000 --- a/workflow/tropical-cyclone/tc_workflow_global_variables.smk +++ /dev/null @@ -1,18 +0,0 @@ -""" -The variables in this file are used throughout the power analysis rules. - -If we can eventually do without them entirely, that would be great. -""" - - -from typing import List, Tuple - - -#### POWER/STORMS WORKFLOW #### - -# how many samples is each storm track dataset split into? -SAMPLES_PER_TRACKSET = { - "IBTrACS": 1, - "STORM": 10, - "IRIS": 10, -} From ee465d87bb5fe01dccfda629d617a737cc05b80b Mon Sep 17 00:00:00 2001 From: Tom Russell Date: Tue, 6 Feb 2024 11:17:53 +0000 Subject: [PATCH 6/6] Move wind fields rule next to scripts --- workflow/Snakefile | 2 +- .../tropical-cyclone/{ => wind_fields}/wind_fields.smk | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename workflow/tropical-cyclone/{ => wind_fields}/wind_fields.smk (98%) diff --git a/workflow/Snakefile b/workflow/Snakefile index d43f3d62..807e21d0 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -132,7 +132,7 @@ include: "tropical-cyclone/IBTrACS.smk" include: "tropical-cyclone/IRIS.smk" include: "tropical-cyclone/STORM.smk" include: "tropical-cyclone/join_tracks.smk" -include: "tropical-cyclone/wind_fields.smk" +include: "tropical-cyclone/wind_fields/wind_fields.smk" include: "transport-flood/network_raster_intersection.smk" include: "transport-flood/flood_damages.smk" diff --git a/workflow/tropical-cyclone/wind_fields.smk b/workflow/tropical-cyclone/wind_fields/wind_fields.smk similarity index 98% rename from workflow/tropical-cyclone/wind_fields.smk rename to workflow/tropical-cyclone/wind_fields/wind_fields.smk index 01788f06..709e198f 100644 --- a/workflow/tropical-cyclone/wind_fields.smk +++ b/workflow/tropical-cyclone/wind_fields/wind_fields.smk @@ -148,7 +148,7 @@ rule create_surface_roughness_raster: output: surface_roughness = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/surface_roughness.tiff" script: - "./wind_fields/surface_roughness.py" + "./surface_roughness.py" """ Test with: @@ -166,7 +166,7 @@ rule create_downscaling_factors: downscale_factors="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/downscale_factors.npy", downscale_factors_plot="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/downscale_factors.png", script: - "./wind_fields/wind_downscaling_factors.py" + "./wind_downscaling_factors.py" """ To test: @@ -220,7 +220,7 @@ rule estimate_wind_fields: plot_dir=directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/{STORM_SET}/{SAMPLE}/plots/"), wind_speeds="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/{STORM_SET}/{SAMPLE}/max_wind_field.nc", script: - "./wind_fields/estimate_wind_fields.py" + "./estimate_wind_fields.py" """ To test: @@ -251,7 +251,7 @@ rule concat_wind_fields_over_sample: output: concat="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/{STORM_SET}/max_wind_field.nc", script: - "./wind_fields/concat_wind_over_sample.py" + "./concat_wind_over_sample.py" """ To test: