Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorganise scripts and rules #176

Merged
merged 6 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
__pycache__/*
venv/*
src/snkit
build

# Data
data/*
Expand All @@ -24,10 +25,7 @@ tester/*
# IDE
.idea
*.swp
.vscode

# Rendered documentation
docs/book/

validation/plots/*
validation/holland_done_ratio
validation/wind_model_comparison
File renamed without changes.
File renamed without changes.
File renamed without changes.
101 changes: 52 additions & 49 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,54 +95,57 @@ wildcard_constraints:
# may be upper or lower, one 'f' or two
TIFF_FILE="[^\/\.\s]+\.[tT][iI][fF][fF]?",

# generate values for global variables used across rules
include: "rules/storm_workflow_global_variables.smk"
# how many samples is each storm track dataset split into?
SAMPLES_PER_TRACKSET = {
"IBTrACS": 1,
"STORM": 10,
"IRIS": 10,
}

##### load rules #####
include: "rules/download/coastlines.smk"
include: "rules/download/natural-earth.smk"
include: "rules/download/STORM.smk"
include: "rules/download/IRIS.smk"
include: "rules/download/IBTrACS.smk"
include: "rules/download/gadm.smk"
include: "rules/download/gridfinder.smk"
include: "rules/download/ghsl-pop.smk"
include: "rules/download/hazards.smk"
include: "rules/download/dryad-gdp.smk"
include: "rules/download/wri-powerplants.smk"
include: "rules/download/osm.smk"
include: "rules/download/land_cover.smk"

include: "rules/preprocess/gadm.smk"
include: "rules/preprocess/filter_osm_data.smk"
include: "rules/preprocess/trim_hazard_data.smk"
include: "rules/preprocess/create_bbox_extracts.smk"
include: "rules/preprocess/slice.smk"
include: "rules/preprocess/join_network.smk"
include: "rules/preprocess/targets.smk"
include: "rules/preprocess/create_network.smk"
include: "rules/preprocess/join_data.smk"
include: "rules/preprocess/osm_to_geoparquet.smk"
include: "rules/preprocess/create_overall_bbox.smk"
include: "rules/preprocess/powerplants.smk"
include: "rules/preprocess/IBTrACS.smk"
include: "rules/preprocess/STORM.smk"
include: "rules/preprocess/IRIS.smk"

include: "rules/exposure/join_data.smk"
include: "rules/exposure/network_raster_intersection.smk"
include: "rules/exposure/wind_fields.smk"
include: "rules/exposure/flood_damages.smk"
include: "rules/exposure/electricity_grid/intersection.smk"
include: "rules/exposure/electricity_grid/exposure.smk"
include: "rules/exposure/electricity_grid/disruption.smk"
include: "rules/exposure/aggregate_to_admin_area.smk"

include: "rules/analyse/network_components.smk"
include: "rules/analyse/map/storm_tracks.smk"
include: "rules/analyse/map/outages.smk"
include: "rules/analyse/map/wind_fields.smk"
include: "rules/analyse/plot/target_disruption.smk"
include: "rules/analyse/plot/customers_affected_by_storm.smk"

include: "rules/target/cyclone-grid.smk"
include: "context/coastlines.smk"
include: "context/gadm.smk"
include: "context/natural-earth.smk"

include: "nature-ecosystems/land-cover.smk"
include: "population-economy/dryad-gdp.smk"
include: "population-economy/ghsl-pop.smk"

include: "power/gridfinder.smk"
include: "power/wri-powerplants.smk"
include: "power/gridfinder-targets.smk"
include: "power/create_network.smk"

include: "transport/openstreetmap.smk"
include: "transport/create_bbox_extracts.smk"
include: "transport/slice.smk"
include: "transport/join_network.smk"
include: "transport/create_network.smk"
include: "transport/osm_to_geoparquet.smk"
include: "transport/create_overall_bbox.smk"
include: "transport/join_data.smk"

include: "flood/aqueduct.smk"
include: "flood/trim_hazard_data.smk"

include: "tropical-cyclone/IBTrACS.smk"
include: "tropical-cyclone/IRIS.smk"
include: "tropical-cyclone/STORM.smk"
include: "tropical-cyclone/join_tracks.smk"
include: "tropical-cyclone/wind_fields/wind_fields.smk"

include: "transport-flood/network_raster_intersection.smk"
include: "transport-flood/flood_damages.smk"
include: "transport-flood/aggregate_to_admin_area.smk"

include: "power-tc/network_raster_intersection.smk"
include: "power-tc/intersection.smk"
include: "power-tc/exposure.smk"
include: "power-tc/disruption.smk"
include: "power-tc/network_components.smk"
include: "power-tc/map/storm_tracks.smk"
include: "power-tc/map/outages.smk"
include: "power-tc/map/wind_fields.smk"
include: "power-tc/map/target_disruption.smk"
include: "power-tc/map/customers_affected_by_storm.smk"
include: "power-tc/cyclone-grid.smk"
File renamed without changes.
26 changes: 26 additions & 0 deletions workflow/rules/preprocess/gadm.smk → workflow/context/gadm.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
"""
Download GADM boundaries

Reference
---------
https://gadm.org/data.html
"""


rule download_gadm_levels:
output:
gpkg = "{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg"
shell:
"""
wget https://geodata.ucdavis.edu/gadm/gadm3.6/gadm36_levels_gpkg.zip \
--output-document={wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip
unzip -o {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip \
-d {wildcards.OUTPUT_DIR}/input/admin-boundaries
rm {wildcards.OUTPUT_DIR}/input/admin-boundaries/gadm36_levels_gpkg.zip
"""

"""
Test with:
snakemake -c1 -- results/input/admin-boundaries/gadm36_levels.gpkg
"""

rule simplify_admin_bounds:
input:
all_admin_bounds = rules.download_gadm_levels.output.gpkg
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,20 @@ Concept & Methodology:

rule download_ghsl:
output:
"{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_R2022A_54009_{RESOLUTION}_V1_0.tif"
"{OUTPUT_DIR}/input/ghsl/GHS_POP_E{YEAR}_GLOBE_{RELEASE}_54009_{RESOLUTION}_V1_0.tif"
wildcard_constraints:
YEAR="1975|1980|1985|1990|1995|2000|2005|2010|2015|2020|2025|2030",
RESOLUTION="1000?"
YEAR=range(1975, 2031, 5),
RESOLUTION="100|1000"
shell:
"""
output_dir=$(dirname {output})

mkdir -p $output_dir

wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_R2022A/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \
wget -nc https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/GHSL/GHS_POP_GLOBE_{wildcards.RELEASE}/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}/V1-0/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \
--directory-prefix=$output_dir

unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_R2022A_54009_{wildcards.RESOLUTION}_V1_0.zip \
unzip -o $output_dir/GHS_POP_E{wildcards.YEAR}_GLOBE_{wildcards.RELEASE}_54009_{wildcards.RESOLUTION}_V1_0.zip \
-d $output_dir
"""

Expand All @@ -47,8 +47,9 @@ rule download_ghsl_all:
"{{OUTPUT_DIR}}",
"input",
"ghsl",
"GHS_POP_E{year}_GLOBE_R2022A_54009_{resolution}_V1_0.tif",
"GHS_POP_E{year}_GLOBE_{release}_54009_{resolution}_V1_0.tif",
),
resolution=(100, 1000),
year=(2020, )
year=(2020, ),
release="R2022A" # TODO bump to R2023A
)
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ rule aggregate_disruption_within_sample:
by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_event.pq")),
by_target = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}_pop_affected_by_target.pq")),
script:
"../../../scripts/exposure/aggregate_grid_disruption.py"
"./aggregate_grid_disruption.py"

"""
Test with:
Expand Down Expand Up @@ -423,7 +423,7 @@ rule disruption_by_admin_region:
output:
expected_annual_disruption = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/EAPA_{ADMIN_SLUG}.gpq",
script:
"../../../scripts/exposure/grid_disruption_by_admin_region.py"
"./grid_disruption_by_admin_region.py"

"""
Test with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rule aggregate_exposure_within_sample:
by_event = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_event.pq")),
by_edge = temp(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}_length_m_by_edge.pq")),
script:
"../../../scripts/exposure/aggregate_grid_exposure.py"
"./aggregate_grid_exposure.py"

"""
Test with:
Expand Down Expand Up @@ -103,7 +103,7 @@ rule plot_event_exposure_distributions_for_country:
output:
country_event_distributions = directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/length_m_event_dist/")
script:
"../../../scripts/exposure/plot_exposure_distributions.py"
"./plot_exposure_distributions.py"

"""
Test with:
Expand All @@ -124,7 +124,7 @@ rule exposure_by_admin_region:
output:
expected_annual_exposure = "{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/EAE_{ADMIN_SLUG}.gpq",
script:
"../../../scripts/exposure/grid_exposure_by_admin_region.py"
"./grid_exposure_by_admin_region.py"

"""
Test with:
Expand Down Expand Up @@ -233,4 +233,4 @@ rule merge_exposure_admin_levels:
merged = merge_gadm_admin_levels(merged, other)

merged.reset_index(drop=True).sort_index(axis=1).to_parquet(output.merged_admin_levels)
logging.info("Done")
logging.info("Done")
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ rule electricity_grid_damages:
exposure = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/{STORM_SET}/{SAMPLE}/")),
disruption = protected(directory("{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/disruption/{STORM_SET}/{SAMPLE}/")),
script:
"../../../scripts/intersect/grid_disruption.py"
"./grid_disruption.py"

"""
Test with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule network_components:
component_map="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/network_map_by_component.png",
component_data="{OUTPUT_DIR}/{DATASET}_{FILTER_SLUG}/components.parquet"
script:
"../../scripts/network_components.py"
"./network_components.py"

"""
Test with:
Expand Down
23 changes: 23 additions & 0 deletions workflow/power-tc/network_raster_intersection.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Intersect a network representation with hazard rasters
"""

rule rasterise_electricity_grid:
"""
Split electricity network edges on raster grid
Assign raster indicies to edges
"""
input:
network="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/edges.geoparquet",
tif_paths=["{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/storms/wind_grid.tiff"],
params:
copy_raster_values=False,
output:
geoparquet="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/exposure/edges_split.geoparquet",
script:
"../../scripts/intersection.py"

"""
Test with:
snakemake --cores 1 results/power/by_country/PRI/exposure/edges_split.geoparquet
"""
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ rule create_power_network:
nodes="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/nodes.geoparquet",
grid_hull="{OUTPUT_DIR}/power/by_country/{COUNTRY_ISO_A3}/network/convex_hull.json",
script:
"../../scripts/preprocess/create_electricity_network.py"
"./create_electricity_network.py"

"""
Test with:
Expand Down Expand Up @@ -366,30 +366,3 @@ rule map_network_components:
Test with:
snakemake -c1 results/power/by_country/HTI/edges.png
"""


rule create_transport_network:
"""
Take .geoparquet OSM files and output files of cleaned network nodes and edges
"""
input:
nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_nodes.geoparquet",
edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/raw/{SLICE_SLUG}_edges.geoparquet",
admin="{OUTPUT_DIR}/input/admin-boundaries/gadm36_levels.gpkg",
output:
nodes="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_nodes.geoparquet",
edges="{OUTPUT_DIR}/geoparquet/{DATASET}_{FILTER_SLUG}/processed/{SLICE_SLUG}_edges.geoparquet"
params:
# determine the network type from the filter, e.g. road, rail
network_type=lambda wildcards: wildcards.FILTER_SLUG.replace('filter-', ''),
# pass in the slice number so we can label edges and nodes with their slice
# edge and node IDs should be unique across all slices
slice_number=lambda wildcards: int(wildcards.SLICE_SLUG.replace('slice-', ''))
script:
# template the path string with a value from params (can't execute .replace in `script` context)
"../../scripts/transport/create_{params.network_type}_network.py"

"""
Test with:
snakemake --cores all results/geoparquet/tanzania-mini_filter-road/processed/slice-0_edges.geoparquet
"""
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ rule annotate_targets:
output:
targets="{OUTPUT_DIR}/power/targets.geoparquet",
script:
"../../scripts/preprocess/annotate_targets.py"
"./annotate_targets.py"

"""
Test with:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
"""
Download WRI powerplants database

Reference
---------
https://www.wri.org/research/global-database-power-plants
"""


rule download_powerplants:
output:
csv = "{OUTPUT_DIR}/input/powerplants/global_power_plant_database.csv"
shell:
"""
mkdir -p {wildcards.OUTPUT_DIR}/input/powerplants
cd {wildcards.OUTPUT_DIR}/input/powerplants
wget https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
unzip -o global_power_plant_database_v_1_3.zip
"""

"""
Test with:
snakemake -c1 -- results/input/powerplants/global_power_plant_database.csv
"""

rule parse_powerplants:
"""
Parse powerplant data for world and save in convenient format
Expand Down
Loading
Loading