diff --git a/config/symbology/qgis/catfim_library.qml b/config/symbology/qgis/catfim_library.qml new file mode 100644 index 000000000..dc461ebc4 --- /dev/null +++ b/config/symbology/qgis/catfim_library.qml @@ -0,0 +1,917 @@ + + + + 1 + 1 + 1 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "stage" + + + + + + 0 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + 0 + generatedlayout + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "name" + + 2 + diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6d3f972a5..4a955fd30 100755 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,7 +1,25 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. -## v4.5.13.6 - 2025-1-10 - [PR#1387](https://github.com/NOAA-OWP/inundation-mapping/pull/1387) +## v4.5.13.7 - 2025-01-10 - [PR#1379](https://github.com/NOAA-OWP/inundation-mapping/pull/1379) + +There are many sites in non-CONUS regions (AK, PR, HI) where we would like to run CatFIM but they are being excluded because they are not NWM forecast points. This update brings back the double API pull and adds in some code to filter out duplicate (and NULL) lids from the metadata lists. + +### Additions +- `inundation-mapping/tools/catfim/vis_categorical_fim.py`: Functions for reading in, processing, and visualizing CatFIM results. +- `inundation-mapping/tools/catfim/notebooks/vis_catfim_cross_section.ipynb`: A new Jupyter notebook for viewing and analyzing CatFIM results. +- `inundation-mapping/tools/catfim/notebooks/eval_catfim_metadata.ipynb`: A new Jupyter notebook for evaluating metadata and results from CatFIM runs. +- `inundation-mapping\config/symbology/qgis/catfim_library.qml`: Symbology preset for viewing CatFIM library in QGIS. + + +### Changes + +- `inundation-mapping/tools/catfim/generate_categorical_fim_flows.py`: Re-implements the dual API call and filters out duplicate sites. + + +

+ +## v4.5.13.6 - 2025-01-10 - [PR#1387](https://github.com/NOAA-OWP/inundation-mapping/pull/1387) Fixes two issues in test_cases: 1. An error in `synthesize_test_cases` and `run_test_case` if any directories of the 5 benchmark sources (BLE, NWS, IFC, USGS, or ras2fim) do not exist. This issue was originally discovered and fixed in #1178, but is being elevated to its own PR here. Fixes #1386. diff --git a/tools/catfim/generate_categorical_fim_flows.py b/tools/catfim/generate_categorical_fim_flows.py index 3646fd7c3..57e4a3393 100755 --- a/tools/catfim/generate_categorical_fim_flows.py +++ b/tools/catfim/generate_categorical_fim_flows.py @@ -646,7 +646,7 @@ def __load_nwm_metadata( FLOG.trace(metadata_url) - all_meta_lists = [] + output_meta_list = [] # Check to see if meta file already exists # This feature means we can copy the pickle file to another enviro (AWS?) as it won't need to call # WRDS unless we need a smaller or modified version. This one likely has all nws_lid data. @@ -655,19 +655,16 @@ def __load_nwm_metadata( FLOG.lprint(f"Meta file already downloaded and exists at {nwm_metafile}") with open(nwm_metafile, "rb") as p_handle: - all_meta_lists = pickle.load(p_handle) + output_meta_list = pickle.load(p_handle) else: meta_file = os.path.join(output_catfim_dir, "nwm_metafile.pkl") FLOG.lprint(f"Meta file will be downloaded and saved at {meta_file}") - if lid_to_run != "all": - # single lid for now - - # must_include_value variable not yet tested - # must_include_value = 'nws_data.rfc_forecast_point' if lid_to_run not in ['HI', 'PR', 'AK'] else None - all_meta_lists, ___ = get_metadata( + if lid_to_run != "all": # TODO: Deprecate LID options (in favor of HUC list functionlity) + # Single lid for now (deprecated) + output_meta_list, ___ = get_metadata( metadata_url, select_by='nws_lid', selector=[lid_to_run], @@ -675,20 +672,14 @@ def __load_nwm_metadata( upstream_trace_distance=nwm_us_search, downstream_trace_distance=nwm_ds_search, ) - else: - # This gets all records including AK, HI and PR, but only if they ahve forecast points - # Note: Nov 2024: AK has 152 sites with forecast points, but after research - # non of the AK sites that nws_data.rfc_forecast_point = False so we can - # exclude them. - # Previously we allowed HI and PR sites to come in and most were failing. - # So, we will include HI and PR as well here - - # We can not just filter out based on dup lids as depending on which - # metadata load they are on, dup lid records will have different data + else: + # Dec 2024: Running two API calls: one to get all forecast points, and another + # to get all points (non-forecast and forecast) for the OCONUS regions. Then, + # duplicate LIDs are removed. - # orig_meta_lists, ___ = get_metadata( - all_meta_lists, ___ = get_metadata( + # Get all forecast points + forecast_point_meta_list, ___ = get_metadata( metadata_url, select_by='nws_lid', selector=['all'], @@ -697,41 +688,53 @@ def __load_nwm_metadata( downstream_trace_distance=nwm_ds_search, ) - # If we decided to put HI and PR back in we can do two loads one - # with the flag and one without. Then iterate through the meta_lists - # results and filtered out based on the state full value. Then - # call WRDS again with those specific states and simply concat them. + # Get all points for OCONUS regions (HI, PR, and AK) + oconus_meta_list, ___ = get_metadata( + metadata_url, + select_by='state', + selector=['HI', 'PR', 'AK'], + must_include=None, + upstream_trace_distance=nwm_us_search, + downstream_trace_distance=nwm_ds_search, + ) + + # Append the lists + unfiltered_meta_list = forecast_point_meta_list + oconus_meta_list - # filtered_meta_data = [] - # for metadata in orig_meta_lists: - # df = pd.json_normalize(metadata) - # state = df['nws_data.state'].item() - # lid = df['identifiers.nws_lid'].item() - # if state.lower() not in ["alaska", "hawaii", "puerto rico"]: - # filtered_meta_data.append(metadata) + # print(f"len(all_meta_lists) is {len(all_meta_lists)}") - # must_include='nws_data.rfc_forecast_point', + # Filter the metadata list + output_meta_list = [] + unique_lids, duplicate_lids = [], [] # TODO: remove? + duplicate_meta_list = [] + nonelid_metadata_list = [] # TODO: remove - # Nov 2024: We used to call them site specific and may add them back in but ok to leave then + for i, site in enumerate(unfiltered_meta_list): + nws_lid = site['identifiers']['nws_lid'] - # islands_list, ___ = get_metadata( - # metadata_url, - # select_by='state', - # selector=['HI', 'PR'], - # must_include=None, - # upstream_trace_distance=nwm_us_search, - # downstream_trace_distance=nwm_ds_search, - # ) + if nws_lid is None: + # No LID available + nonelid_metadata_list.append(site) # TODO: replace with Continue - # Append the lists - # all_meta_lists = filtered_all_meta_list + islands_list + elif nws_lid in unique_lids: + # Duplicate LID + duplicate_lids.append(nws_lid) + duplicate_meta_list.append(site) # TODO: remove extra lists - # print(f"len(all_meta_lists) is {len(all_meta_lists)}") + else: + # Unique/unseen LID that's not None + unique_lids.append(nws_lid) + output_meta_list.append(site) + + FLOG.lprint(f'{len(duplicate_lids)} duplicate points removed.') + FLOG.lprint(f'Filtered metadatada downloaded for {len(output_meta_list)} points.') + + # ---------- with open(meta_file, "wb") as p_handle: - pickle.dump(all_meta_lists, p_handle, protocol=pickle.HIGHEST_PROTOCOL) + pickle.dump(output_meta_list, p_handle, protocol=pickle.HIGHEST_PROTOCOL) - return all_meta_lists + return output_meta_list if __name__ == '__main__': diff --git a/tools/catfim/notebooks/eval_catfim_metadata.ipynb b/tools/catfim/notebooks/eval_catfim_metadata.ipynb new file mode 100755 index 000000000..5661e7de7 --- /dev/null +++ b/tools/catfim/notebooks/eval_catfim_metadata.ipynb @@ -0,0 +1,688 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4c5de4a5-80cf-4cbf-97a9-1c8da52187b7", + "metadata": { + "tags": [] + }, + "source": [ + "**README**\n", + "\n", + "This notebook is intended for developer and testing use, so the functions will \n", + "probably not be as useful for outside users. That said, feel free to peruse the\n", + "notebook and use what you wish from it!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "544d20bf-6273-4dd5-bae8-a42935b3e3f1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Import packages and functions\n", + "\n", + "import argparse\n", + "import copy\n", + "import glob\n", + "import re\n", + "import os\n", + "import pickle\n", + "import random\n", + "import shutil\n", + "import sys\n", + "import time\n", + "import traceback\n", + "from concurrent.futures import ProcessPoolExecutor, as_completed, wait\n", + "from datetime import datetime, timezone\n", + "from pathlib import Path\n", + "\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from dotenv import load_dotenv\n", + "from tools_shared_functions import (\n", + " aggregate_wbd_hucs,\n", + " filter_nwm_segments_by_stream_order,\n", + " flow_data,\n", + " get_metadata,\n", + " get_nwm_segs,\n", + " get_thresholds,\n", + ")\n", + "\n", + "import utils.fim_logger as fl\n", + "from utils.shared_variables import VIZ_PROJECTION\n", + "\n", + "print('Imports complete.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c7d9207-9b2b-431b-ab4e-3dbb3e2107f2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define Functions to retrieve, process, and filter the metadata\n", + "\n", + "# -------------------------------------------------------\n", + "def list_of_lids(conus_list, verbose):\n", + " '''\n", + " Extract a list of LIDs from the conus_list\n", + " \n", + " Example: \n", + " lid_list = list_of_lids(conus_list, True)\n", + " '''\n", + " lid_list = []\n", + " for i, site in enumerate(conus_list):\n", + " nws_lid = site['identifiers']['nws_lid']\n", + " lid_list.append(nws_lid)\n", + " if verbose == True:\n", + " print(f'List of LIDs: {lid_list}')\n", + " \n", + " return lid_list\n", + "\n", + "# -------------------------------------------------------\n", + "def list_duplicate_lids(conus_list, verbose):\n", + " '''\n", + " Extract a list of duplicate LIDs from the conus_list\n", + " \n", + " Example: \n", + " lid_list, duplicate_lid_list = list_duplicate_lids(conus_list, True)\n", + " '''\n", + " lid_list = []\n", + " duplicate_lid_list = []\n", + " \n", + " \n", + " for i, site in enumerate(conus_list):\n", + " nws_lid = site['identifiers']['nws_lid']\n", + "\n", + " if nws_lid in lid_list:\n", + " duplicate_lid_list.append(nws_lid)\n", + " else: \n", + " lid_list.append(nws_lid)\n", + "\n", + " if verbose == True:\n", + " print(f'Length of unique LID list: {len(lid_list)}')\n", + " print(f'List of duplicate LIDs: {duplicate_lid_list}')\n", + "\n", + " \n", + " return lid_list, duplicate_lid_list\n", + "\n", + "# -------------------------------------------------------\n", + "def filter_by_lid(lid_filter, conus_list, verbose):\n", + " '''\n", + " Function to filter conus_list by LID\n", + " \n", + " Example:\n", + " conus_list_filt = filter_by_lid('None', conus_list, True)\n", + " '''\n", + " conus_list_filt = []\n", + " for i, site in enumerate(conus_list):\n", + " lid = site['identifiers']['nws_lid']\n", + " if lid == lid_filter:\n", + " conus_list_filt.append(site)\n", + " if verbose == True:\n", + " print(f'LID filter: {lid_filter} \\nNumber of sites: {len(conus_list_filt)}')\n", + " \n", + " return conus_list_filt\n", + "\n", + "# -------------------------------------------------------\n", + "def filter_by_state(state_filter, conus_list, verbose):\n", + " '''\n", + " Function to filter conus_list by state\n", + " \n", + " Example: \n", + " conus_list_filt = filter_by_state('Alaska', conus_list, True)\n", + " '''\n", + " conus_list_filt = []\n", + " for i, site in enumerate(conus_list):\n", + " state = site['nws_data']['state']\n", + " if state == state_filter:\n", + " conus_list_filt.append(site)\n", + " if verbose == True:\n", + " print(f'State: {state_filter} \\nNumber of sites: {len(conus_list_filt)}')\n", + " \n", + " return conus_list_filt\n", + "\n", + "# -------------------------------------------------------\n", + "\n", + "print('Define functions complete.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dca1b28-976d-4487-9fd3-96253e1e83f2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Testing get_metadata() functionality\n", + "\n", + "# --------- Inputs --------- \n", + "\n", + "search = 5\n", + "\n", + "nwm_us_search, nwm_ds_search = search, search\n", + "\n", + "\n", + "# output_catfim_dir = \n", + "API_BASE_URL = 'https://nwcal-wrds.nwc.nws.noaa.gov/api/location/v3.0'\n", + "metadata_url = f'{API_BASE_URL}/metadata'\n", + "\n", + "\n", + "# lid_to_run = \n", + "# nwm_metafile = \n", + "\n", + "# --------- Code --------- \n", + "\n", + "all_meta_lists = []\n", + "\n", + "\n", + "conus_list, ___ = get_metadata(\n", + " metadata_url,\n", + " select_by='nws_lid',\n", + " selector=['all'],\n", + " must_include='nws_data.rfc_forecast_point',\n", + " upstream_trace_distance=nwm_us_search,\n", + " downstream_trace_distance=nwm_ds_search,\n", + ")\n", + "\n", + "\n", + "# Get metadata for Islands and Alaska\n", + "islands_list, ___ = get_metadata(\n", + " metadata_url,\n", + " select_by='state',\n", + " selector=['HI', 'PR', 'AK'],\n", + " must_include=None,\n", + " upstream_trace_distance=nwm_us_search,\n", + " downstream_trace_distance=nwm_ds_search,\n", + ")\n", + "# Append the lists\n", + "all_meta_lists = conus_list + islands_list\n", + "\n", + "# print(islands_list)\n", + "\n", + "# with open(meta_file, \"wb\") as p_handle:\n", + "# pickle.dump(all_meta_lists, p_handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + "\n", + "print('Metadata retrieval complete.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d25a4979-2b0c-4f4f-ae41-6fec3768d39b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ------ New addition: filtering ------\n", + "\n", + "# -- function --\n", + "def filter_metadata_list (metadata_list, verbose):\n", + " '''\n", + " \n", + " Filter metadata list to remove: \n", + " - sites where the nws_lid = None\n", + " - duplicate sites\n", + " \n", + " '''\n", + "\n", + " unique_lids, duplicate_lids = [], []\n", + " duplicate_metadata_list, unique_metadata_list = [], []\n", + "\n", + " nonelid_metadata_list = [] # TODO: remove eventually? \n", + "\n", + " for i, site in enumerate(metadata_list):\n", + " nws_lid = site['identifiers']['nws_lid']\n", + "\n", + " if nws_lid == None:\n", + " # No LID available\n", + " nonelid_metadata_list.append(site)\n", + "\n", + " # TODO: replace this with Continue, eventually we wont need this list\n", + "\n", + " elif nws_lid in unique_lids:\n", + " # Duplicate LID\n", + " duplicate_lids.append(nws_lid)\n", + " duplicate_metadata_list.append(site)\n", + "\n", + " else: \n", + " # Unique/unseen LID that's not None\n", + " unique_lids.append(nws_lid)\n", + " unique_metadata_list.append(site)\n", + "\n", + " if verbose == True:\n", + " print(f'Input metadata list length: {len(metadata_list)}')\n", + " print(f'Output (unique) metadata list length: {len(unique_metadata_list)}')\n", + " print(f'Number of unique LIDs: {len(unique_lids)} \\nNumber of duplicate LIDs: {len(duplicate_lids)} \\nNumber of None LIDs: {len(nonelid_metadata_list)}')\n", + "\n", + " return unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list # TODO: eventually, have it only return necessary objects\n", + "\n", + "\n", + "# Run filtering function\n", + "unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list = filter_metadata_list(all_meta_lists, True)\n", + "\n", + "print('Filtering complete.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ffc3b60-18de-4825-8766-80f3904fd6cf", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Quantify number of CatFIM sites available for each state (before and after filtering out duplicates)\n", + "\n", + "state_list = ['Puerto Rico', 'Hawaii', 'Alaska']\n", + "\n", + "print('Current Code: Single API call (only forecast points)')\n", + "for state in state_list: \n", + " currentcode_state = filter_by_state(state, conus_list, True)\n", + " print()\n", + "\n", + "print()\n", + "print('Proposed Update: Double API call (forecast points + all HI, AK, and PR points)')\n", + "print()\n", + "for state in state_list: \n", + " # print('Before filtering out duplicates:')\n", + " # prefilt_state = filter_by_state(state, all_meta_lists, True)\n", + " print('AFTER filtering out duplicates:')\n", + " postfilt_state = filter_by_state(state, unique_metadata_list, True)\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d2ad926-105c-4326-980c-3e4793b7b58a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "postfilt_state = filter_by_state('Connecticut', unique_metadata_list, True)\n", + "postfilt_state = filter_by_state('New York', unique_metadata_list, True)\n", + "postfilt_state = filter_by_state('Texas', unique_metadata_list, True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65996d68-b060-4cd3-b575-3e3d4172c532", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Test current code formulation\n", + "\n", + "unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list = filter_metadata_list(conus_list, True)\n", + "print()\n", + "conus_list_filt = filter_by_state('Alaska', conus_list, True)" + ] + }, + { + "cell_type": "markdown", + "id": "2ba42000-3855-47eb-9f1c-f3c805f45b96", + "metadata": {}, + "source": [ + "### Get a HUC list given HUC02 region \n", + "\n", + "Creates a list of HUC8 IDs in the provided FIM output path based on an input HUC2 value. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "034acbe3-d096-4dfd-a358-4ca409bfef64", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "fim_output_path = '/data/previous_fim/fim_4_5_2_11/'\n", + "\n", + "# huc2 = '20' # Hawaii\n", + "# huc2 = '21' # Puerto Rico\n", + "huc2 = '19' # Alaska\n", + "\n", + "all_hucs = os.listdir(fim_output_path)\n", + "\n", + "subsetted_hucs = [x for x in all_hucs if x.startswith(huc2)]\n", + "\n", + "for i in subsetted_hucs:\n", + " print(i)" + ] + }, + { + "cell_type": "markdown", + "id": "29a52847-3189-41b7-a755-3063512a96e0", + "metadata": { + "tags": [] + }, + "source": [ + "### Get stats for CatFIM Results\n", + "\n", + "Summarizes the number of mapped and unmapped points for a list of CatFIM results given a list of states. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca99d060-3963-497f-a8c0-cb117bbcd818", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define function that counts mapped vs unmapped values for a CatFIM output folder\n", + "\n", + "def count_mapped_for_state(catfim_folder, result_folders, states):\n", + " # Read in CatFIM outputs\n", + " for result_folder in result_folders:\n", + " print()\n", + " print('-----' + result_folder + '-----')\n", + " \n", + " catfim_points_path = 'None'\n", + " catfim_outputs_mapping_path = os.path.join(catfim_folder, result_folder, 'mapping')\n", + " \n", + " # Get filepath\n", + " for file in os.listdir(catfim_outputs_mapping_path):\n", + " if file.endswith('catfim_sites.csv'):\n", + " catfim_points_path = os.path.join(catfim_outputs_mapping_path, file)\n", + "\n", + " if catfim_points_path == 'None':\n", + " print(f'No site csv found in {catfim_outputs_mapping_path}')\n", + " continue\n", + " \n", + " # Open points file\n", + " try:\n", + " catfim_points = gpd.read_file(catfim_points_path)\n", + "\n", + " except Exception as e:\n", + " print('An error occurred', e)\n", + " continue\n", + "\n", + " # Get mapped vs unmapped data for the listed states\n", + " for state in states:\n", + " catfim_points_state = catfim_points[catfim_points['states'] == state]\n", + " \n", + " if len(catfim_points_state) != 0:\n", + " num_not_mapped = len(catfim_points_state[catfim_points_state['mapped'] == 'no'])\n", + " \n", + " catfim_points_state_mapped = catfim_points_state[catfim_points_state['mapped'] == 'yes']\n", + " num_mapped = len(catfim_points_state_mapped)\n", + " \n", + " huc_list = catfim_points_state['HUC8']\n", + " \n", + " if 'ahps_lid' in catfim_points_state.columns:\n", + " lid_list = catfim_points_state['ahps_lid']\n", + " lid_list_mapped = catfim_points_state_mapped['ahps_lid']\n", + " elif 'nws_lid' in catfim_points_state.columns:\n", + " lid_list = catfim_points_state['nws_lid']\n", + " lid_list_mapped = catfim_points_state_mapped['nws_lid']\n", + " else:\n", + " print('Could not find ahps_lid or nws_lid column in csv.') \n", + " print(catfim_points_state.columns)\n", + " continue\n", + " \n", + " huc_list_unique = set(huc_list)\n", + " lid_list_unique = set(lid_list)\n", + " \n", + " num_duplicate_sites = len(lid_list) - len(lid_list_unique)\n", + " num_duplicate_sites_mapped = len(lid_list_mapped) - len(set(lid_list_mapped))\n", + "\n", + " print(f'{state} \\n Mapped: {num_mapped} \\n Not mapped: {num_not_mapped}')\n", + " \n", + " \n", + " print(f' Number of duplicate LIDs: {num_duplicate_sites}')\n", + " print(f' Number of duplicate LIDs mapped: {num_duplicate_sites_mapped}')\n", + "\n", + " print(f' {len(huc_list_unique)} hucs: {huc_list_unique}')\n", + "\n", + " return catfim_points, #catfim_points_state" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9808a93c-aee4-465a-8faa-18e941f5923a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Set up inputs for counting mapped points in the CatFIM results\n", + "\n", + "# Input state list\n", + "states = ['AK', 'PR', 'HI']\n", + "\n", + "# Previous runs\n", + "catfim_folder_prev = '/data/catfim/'\n", + "result_folders_prev = ['hand_4_5_11_1_flow_based', 'hand_4_5_11_1_stage_based', 'fim_4_5_2_11_flow_based', 'fim_4_5_2_11_stage_based']\n", + "\n", + "# Current test runs\n", + "catfim_folder_testing = '/data/catfim/emily_test'\n", + "result_folders_testing = ['site_filtering_HI_flow_based', 'site_filtering_HI_stage_based', \n", + " 'site_filtering_PR_flow_based', 'site_filtering_PR_stage_based', \n", + " 'site_filtering_AK_flow_based', 'site_filtering_AK_stage_based']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6255a914-3c44-4e02-8c20-a6f11eb5a12e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Run count_mapped_for_state() for previous CatFIM results\n", + "\n", + "count_mapped_for_state(catfim_folder_prev, result_folders_prev, states)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68f45696-893c-4709-9fe6-d1c223bd9020", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Run count_mapped_for_state() for most recent CatFIM results\n", + "\n", + "count_mapped_for_state(catfim_folder_testing, result_folders_testing, states)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04988eab-2866-4d5b-882a-abb76e072df0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Quanitfy duplicate CatFIM sites\n", + "\n", + "huc_list = catfim_points_state['HUC8']\n", + "lid_list = catfim_points_state['ahps_lid']\n", + "\n", + "huc_list_unique = set(huc_list)\n", + "lid_list_unique = set(lid_list)\n", + "num_duplicate_sites = len(lid_list) - len(lid_list_unique)\n", + "\n", + "print(f'Number of duplicate LIDs: {num_duplicate_sites}')\n", + "print(f'{len(huc_list_unique)} hucs: {huc_list_unique}')\n" + ] + }, + { + "cell_type": "markdown", + "id": "ae429a31-f83e-4594-9245-66cfb452b2be", + "metadata": {}, + "source": [ + "### Create a Table of Sites with NA Value in the State Column\n", + "\n", + "This code is helpful for testing instability in metadata columns. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cce08d89-cf96-4905-a090-3ba3a09695b1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "input_meta_list = conus_list # conus_list or islands_list\n", + "\n", + "# -----\n", + "\n", + "state_data = []\n", + "\n", + "for i, site in enumerate(input_meta_list):\n", + " lid = site['identifiers']['nws_lid']\n", + "\n", + " nws_data_state = site['nws_data']['state']\n", + " usgs_data_state = site['usgs_data']['state']\n", + " nws_preferred_state = site['nws_preferred']['state']\n", + " usgs_preferred_state = site['usgs_preferred']['state']\n", + "\n", + " row = {'index': i, 'lid': lid, \n", + " 'nws_data_state':nws_data_state,\n", + " 'usgs_data_state':usgs_data_state,\n", + " 'nws_preferred_state':nws_preferred_state,\n", + " 'usgs_preferred_state':usgs_preferred_state}\n", + "\n", + " state_data.append(row)s\n", + "\n", + "state_data_df = pd.DataFrame(state_data)\n", + "\n", + "summary_df = pd.DataFrame({\n", + " 'nws_data_state': state_data_df['nws_data_state'].isna().sum(),\n", + " 'usgs_data_state': state_data_df['usgs_data_state'].isna().sum(),\n", + " 'nws_preferred_state': state_data_df['nws_preferred_state'].isna().sum(),\n", + " 'usgs_preferred_state': state_data_df['usgs_preferred_state'].isna().sum()}, index=[f'Number of NA Values in State Column, out of {len(state_data_df)} rows'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "007eb617-d224-46e4-ab8e-aab28679cf43", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# state_data_df[state_data_df['nws_data_state'].isna()]\n", + "# state_data_df.loc[(state_data_df['nws_data_state'].isna()) & (state_data_df['usgs_data_state'].isna())]\n", + "state_data_df.loc[(state_data_df['nws_preferred_state'].isna()) & (state_data_df['usgs_preferred_state'].isna())]" + ] + }, + { + "cell_type": "markdown", + "id": "9c98a678-555e-47a9-bc64-87fd41883cda", + "metadata": {}, + "source": [ + "### Parse CatFIM Logs into a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8323d4db-3cde-4029-9d06-f807a9419523", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "run_path = '/data/catfim/emily_test/site_filtering_PR_stage_based/'\n", + "log_file = 'catfim_2024_12_12-19_24_09.log'\n", + "\n", + "log_path = os.path.join(run_path, 'logs', log_file)\n", + "\n", + "\n", + "out_df = []\n", + "\n", + "with open(log_path) as f:\n", + " for line in f:\n", + " # print(line)\n", + " \n", + " # Initialize variables\n", + " huc, lid, message_type, message = '', '', '', ''\n", + "\n", + " # Get the HUC8\n", + " match = re.search(r\"\\d{8}\", line)\n", + " if match:\n", + " huc = match.group()\n", + " \n", + " # Get the LID\n", + " match2 = re.search(r\"(?<= : ).{5}\", line)\n", + " if match2:\n", + " lid = match2.group()\n", + " \n", + " if 'WARNING' in line: \n", + " pattern = lid + ':'\n", + " match3 = re.search(f\"(?<={pattern})(.*)\", line)\n", + " if match3:\n", + " message = match3.group()\n", + " message_type = 'WARNING'\n", + "\n", + " elif 'TRACE' in line: \n", + " pattern = lid + ':'\n", + " match3 = re.search(f\"(?<={pattern})(.*)\", line)\n", + " if match3:\n", + " message = match3.group()\n", + " message_type = 'TRACE'\n", + " \n", + " else:\n", + " continue\n", + " \n", + " new_line = {'huc': huc, 'lid':lid, 'message_type':message_type, 'message':message}\n", + "\n", + " out_df.append(new_line)\n", + " \n", + "out_df = pd.DataFrame(out_df)\n", + " \n", + "# out_df.to_csv('PR_error_logs.csv')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/catfim/notebooks/vis_catfim_cross_section.ipynb b/tools/catfim/notebooks/vis_catfim_cross_section.ipynb new file mode 100755 index 000000000..f49586270 --- /dev/null +++ b/tools/catfim/notebooks/vis_catfim_cross_section.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6eb53c55-02a3-4172-9026-8a1ddbc05a72", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from catfim.vis_categorical_fim import (read_catfim_outputs, \n", + " subset_catfim_geom_by_site, \n", + " subset_apply_symbology_catfim_library, \n", + " map_catfim_full_extent, \n", + " map_catfim_at_site, \n", + " map_catfim_full_extent, \n", + " create_perpendicular_cross_section, \n", + " map_cross_section_geometry, \n", + " generate_dem_path, \n", + " create_cross_section_points, \n", + " get_elevation_for_cross_section_points, \n", + " apply_catfim_library_to_points, \n", + " plot_catfim_cross_section, \n", + " map_catfim_cross_section_points)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ac6692c-262d-4d4b-9a62-f2cbcea83f6d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "EPSG = 5070\n", + "\n", + "## Viewing cross-section example\n", + "\n", + "# Inputs\n", + "lid = 'bltn7'\n", + "huc = '06010105'\n", + "catfim_inputs_path = '/data/previous_fim/fim_4_5_2_11'\n", + "catfim_outputs_path = '/data/catfim/emily_test/hand_4_5_11_1_catfim_datavis_flow_based/'\n", + "\n", + "# Viewing AUON6 \n", + "\n", + "# # Inputs\n", + "# lid = 'AUON6'\n", + "# huc = '04140201'\n", + "# # catfim_inputs_path = '/data/outputs/hand_4_5_11_1_catfim'\n", + "# catfim_inputs_path = '/data/previous_fim/fim_4_5_2_11'\n", + "# catfim_outputs_path = '/data/catfim/hand_4_5_11_1_stage_based/'\n", + "\n", + "# Viewing PACI1\n", + "\n", + "# Inputs\n", + "# lid = 'paci1'\n", + "# huc = '17060108'\n", + "# catfim_inputs_path = '/data/previous_fim/fim_4_5_2_11'\n", + "# catfim_outputs_path = '/data/catfim/hand_4_5_11_1_stage_based/'\n", + "\n", + "# ## Viewing Alaska BEFORE updates\n", + "\n", + "# # Inputs\n", + "# lid = 'apta2'\n", + "# huc = '19020301'\n", + "# catfim_inputs_path = '/data/previous_fim/fim_4_5_2_11'\n", + "# catfim_outputs_path = '/data/catfim/hand_4_5_11_1_flow_based'\n", + "\n", + "## Viewing Alaska updates\n", + "\n", + "# # Inputs\n", + "# lid = 'apta2'\n", + "# huc = '19020301'\n", + "# catfim_inputs_path = '/data/previous_fim/fim_4_5_2_11'\n", + "# catfim_outputs_path = '/data/catfim/emily_test/AK_new_wrds_flow_based/'" + ] + }, + { + "cell_type": "markdown", + "id": "3683983c-a5af-4c99-9863-b09f213d371c", + "metadata": { + "tags": [] + }, + "source": [ + "### Reading in and processing CatFIM library and geospatial data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d0c27b8-f47d-4543-9471-c6d2f0e799a0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Read in the CatFIM outputs\n", + "catfim_library, catfim_points, flowline_gdf = read_catfim_outputs(catfim_inputs_path, catfim_outputs_path, huc)\n", + "\n", + "# Subset the CatFIM geometries by site and enforce projection\n", + "catfim_library_filt, points_filt_gdf, flowline_filt_gdf = subset_catfim_geom_by_site(lid, catfim_library, catfim_points, flowline_gdf, EPSG)\n", + "\n", + "# Filter CatFIM library and create the symbology columns\n", + "catfim_library_filt, colordict = subset_apply_symbology_catfim_library(catfim_library, points_filt_gdf, lid, include_record=True)" + ] + }, + { + "cell_type": "markdown", + "id": "0a29618a-7133-43f0-9e8c-568014986ffc", + "metadata": {}, + "source": [ + "### Plotting CatFIM library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e66e0adb-cb23-4a8b-ba58-eb35bc197dde", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "map_catfim_full_extent(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', legend=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85da45bd-51b8-40d0-af52-07c22f8ad08b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "map_catfim_at_site(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', EPSG=5070, basemap=True, site_view=True, legend = True) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7860f8c9-e878-4122-a5ef-288972e1ce5f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "map_catfim_full_extent(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', legend=False)" + ] + }, + { + "cell_type": "markdown", + "id": "ba427afc-2290-4867-ab60-43c48b0c890a", + "metadata": {}, + "source": [ + "### Generate and Plot CatFIM Elevation Cross-section" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d7e18d3-d291-4bb3-b689-4a1bc9c966e7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## Function Inputs ------------------------------------\n", + "\n", + "xsection_length = 1000 # cross-section length, meters or feet, 1000 suggested\n", + "EPSG = 5070\n", + "dist_between_points = 10 # distance between points on cross-section line, 10 suggested\n", + "\n", + "\n", + "## Run Functions ------------------------------------\n", + "\n", + "# Create cross-section\n", + "xsection_gdf = create_perpendicular_cross_section(flowline_filt_gdf, points_filt_gdf, xsection_length, EPSG)\n", + "\n", + "# Map the cross-section\n", + "map_cross_section_geometry(xsection_gdf, points_filt_gdf, flowline_filt_gdf, modifier=100, plot_title=f'Flowline cross-section at site {lid}')\n", + "\n", + "# Get DEM path\n", + "dem_path = generate_dem_path(huc, root_dem_path='/data/inputs/3dep_dems/')\n", + "\n", + "# Create points along cross-section line\n", + "xsection_points_gdf, xsection_midpoint = create_cross_section_points(xsection_gdf, dist_between_points)\n", + "\n", + "# Apply elevation to points\n", + "xsection_points_gdf = get_elevation_for_cross_section_points(dem_path, xsection_points_gdf, EPSG)\n", + "\n", + "# Apply CatFIM stages to points\n", + "xsection_catfim_filt_gdf = apply_catfim_library_to_points(xsection_points_gdf, catfim_library_filt)\n", + "\n", + "# Plot the CatFIM stage cross-section\n", + "plot_catfim_cross_section(xsection_points_gdf, xsection_catfim_filt_gdf, xsection_midpoint, colordict, elev_upper_buffer_ft=10, \n", + " num_points_buffer=5, dist_between_points=10, save_plot=False, \n", + " plot_title = f'CatFIM library elevation cross-section at site {lid}', \n", + " file_label='')\n", + "\n", + "# Map the CatFIM stage cross-section\n", + "map_catfim_cross_section_points(catfim_library_filt, flowline_filt_gdf, xsection_catfim_filt_gdf, colordict, \n", + " modifier=100, EPSG = 5070, plot_title=f'CatFIM library elevation cross-section at site {lid}', \n", + " basemap=True, legend=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d46976cd-a892-4f82-a3b4-ba59eb125b91", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Map the CatFIM stage cross-section\n", + "map_catfim_cross_section_points(catfim_library_filt, flowline_filt_gdf, xsection_catfim_filt_gdf, colordict, \n", + " modifier=100, EPSG=5070, plot_title=f'CatFIM library elevation cross-section at site {lid}', \n", + " basemap=False, legend=True)\n", + "\n", + "# Map the CatFIM stage cross-section\n", + "map_catfim_cross_section_points(catfim_library_filt, flowline_filt_gdf, xsection_catfim_filt_gdf, colordict, \n", + " modifier=100, EPSG=5070, plot_title=f'CatFIM library elevation cross-section at site {lid}', \n", + " basemap=True, legend=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/catfim/vis_categorical_fim.py b/tools/catfim/vis_categorical_fim.py new file mode 100644 index 000000000..9ee588b99 --- /dev/null +++ b/tools/catfim/vis_categorical_fim.py @@ -0,0 +1,829 @@ +#!/usr/bin/env python3 + +import os +import sys +from os import listdir + +import geopandas as gpd +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import rasterio +import requests +import shapely +from PIL import Image +from rasterio.plot import show +from shapely import segmentize +from shapely.geometry import LineString, Point +from shapely.geometry.polygon import Polygon +from shapely.ops import substring + + +''' +This script contains functions that are useful for exploring Categorical FIM outputs. The +functions are designed to be run in a Juputer notebook so that the plots and maps can be +easily viewed. + +''' + +# ------------------------------------------------------------- +# Reading in and processing CatFIM library and geospatial data +# ------------------------------------------------------------- +''' +# Inputs --- + +lid = 'bltn7' +huc = '06010105' +catfim_inputs_path = '/data/previous_fim/fim_4_5_2_11' +catfim_outputs_path = '/data/catfim/emily_test/hand_4_5_11_1_catfim_datavis_flow_based/' +EPSG = 5070 + +# Data processing functions --- + +# Read in the CatFIM outputs +catfim_library, catfim_points, flowline_gdf = read_catfim_outputs(catfim_inputs_path, huc) + +# Subset the CatFIM geometries by site and enforce projection +catfim_library_filt, points_filt_gdf, flowline_filt_gdf = subset_catfim_geom_by_site(lid, catfim_library, catfim_points, EPSG) + +# Filter CatFIM library and create the symbology columns +catfim_library_filt, colordict = subset_apply_symbology_catfim_library(catfim_library, lid, include_record=True) + + +# Mapping functions --- + +map_catfim_full_extent(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', legend=True) + +map_catfim_at_site(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', EPSG=5070, basemap=True, site_view=True, legend = True) + +map_catfim_full_extent(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', legend=False) + +''' + + +def read_catfim_outputs(catfim_inputs_path, catfim_outputs_path, huc): + ''' + Read in HAND and CatFIM data, constructing filepaths as needed. + + Inputs: + - catfim_inputs_path + - catfim_outputs_path + - huc + + Outputs: + - catfim_library + - catfim_points + - flowline_gdf + + catfim_library, catfim_points, flowline_gdf = read_catfim_outputs(catfim_inputs_path, huc) + + ''' + + # Read in HAND output flowlines + flowline_path = os.path.join(catfim_inputs_path, huc, 'nwm_subset_streams_levelPaths_dissolved.gpkg') + flowline_gdf = gpd.read_file(flowline_path) + + # Read in CatFIM outputs + catfim_outputs_mapping_path = os.path.join(catfim_outputs_path, 'mapping') + + for file in os.listdir(catfim_outputs_mapping_path): + if file.endswith('catfim_library.gpkg'): + catfim_library_path = os.path.join(catfim_outputs_path, 'mapping', file) + elif file.endswith('catfim_sites.gpkg'): + catfim_points_path = os.path.join(catfim_outputs_mapping_path, file) + + try: + catfim_library = gpd.read_file(catfim_library_path) + catfim_points = gpd.read_file(catfim_points_path) + + except IOError: + print(f'Error opening CatFIM outputs from {catfim_outputs_path}:') + print(IOError) + sys.exit() + + print('HAND-FIM and CatFIM outputs have been read in.') + + return catfim_library, catfim_points, flowline_gdf + + +def subset_catfim_geom_by_site(lid, catfim_library, catfim_points, flowline_gdf, EPSG): + ''' + Subset CatFIM library and points for a specific site. + + Inputs: + - lid + - catfim_library + - catfim_points + - EPSG + + Outputs: + - catfim_library_filt + - points_filt_gdf + - flowline_filt_gdf + + catfim_library_filt, points_filt_gdf, flowline_filt_gdf = subset_catfim_geom_by_site(lid, catfim_library, catfim_points, flowline_gdf, EPSG) + + ''' + + # Filter points to LID + points_filt_gdf = catfim_points[catfim_points['ahps_lid'] == lid] + + if len(points_filt_gdf) > 1: + print(f'ERROR: Multiple points found for lid {lid}.') + sys.exit() + + # Put the point into the projection of the flowlines + points_filt_gdf = points_filt_gdf.to_crs(EPSG) + + # Find the flowline nearest to the point + flowline_filt_gdf = gpd.sjoin_nearest(flowline_gdf, points_filt_gdf, max_distance=100) + + catfim_library_filt = catfim_library[catfim_library['ahps_lid'] == lid] + catfim_library_filt = catfim_library_filt.to_crs(points_filt_gdf.crs) + + # Put the geometries into a unified projection + points_filt_gdf = points_filt_gdf.to_crs(EPSG) + flowline_filt_gdf = flowline_filt_gdf.to_crs(EPSG) + catfim_library_filt = catfim_library_filt.to_crs(EPSG) + + print(f'Filtered points and flowlines for {lid}.') + + return catfim_library_filt, points_filt_gdf, flowline_filt_gdf + + +def subset_apply_symbology_catfim_library(catfim_library, points_filt_gdf, lid, include_record): + ''' + Subset CatFIM library by site and apply the appropriate symbology. + + Inputs: + - catfim_library + - points_filt_gdf + - lid + - include_record (True or False) + + Outputs: + - catfim_library_filt + + catfim_library_filt = subset_apply_symbology_catfim_library(catfim_library, points_filt_gdf, lid, include_record=True) + + ''' + + catfim_library_filt = catfim_library[catfim_library['ahps_lid'] == lid] + catfim_library_filt = catfim_library_filt.to_crs( + points_filt_gdf.crs + ) # TODO: check that this is doing what we want it to + + # Create dictionary of color and orders + if include_record == True: + categories = ['action', 'minor', 'moderate', 'major', 'record'] + colors = ['yellow', 'orange', 'red', 'purple', 'cyan'] + orders = [5, 4, 3, 2, 1] + elif include_record == False: + categories = ['action', 'minor', 'moderate', 'major'] + colors = ['yellow', 'orange', 'red', 'purple'] + orders = [5, 4, 3, 2] + else: + sys.exit('Invalid input for include_record parameter.') + + colordict = dict(zip(categories, colors)) + orderdict = dict(zip(categories, orders)) + + catfim_library_filt['color'] = catfim_library_filt['magnitude'].apply(lambda x: colordict[x]) + catfim_library_filt['plot_order'] = catfim_library_filt['magnitude'].apply(lambda x: orderdict[x]) + + catfim_library_filt = catfim_library_filt.sort_values(by='plot_order') + + print('Subsetted, filtered, and applied visualization preferences to CatFIM library.') + + return catfim_library_filt, colordict + + +def map_catfim_at_site( + catfim_library_filt, + flowline_filt_gdf, + points_filt_gdf, + colordict, + plot_title, + EPSG, + basemap, + site_view, + legend, +): + ''' + Plot data layers on top of a ESRI basemap in a square bounding box. + The bounding box determined by a site centroid OR the extent of the + CatFIM library for the site. + + Inputs: + - catfim_library_filt + - flowline_filt_gdf + - points_filt_gdf (CatFIM site geodataframe, used as centroid if site_view = True) + - colordict + - plot_title (string, title of plot) + - EPSG (5070 suggested) + - basemap (True or False, determines whether an ESRI basemap is imported) + - site_view (True or False, if true then it will zoom in on site, if False it will show the full CatFIM library for the site) + + map_catfim_at_site(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', EPSG=5070, basemap=True, site_view=True, legend = True) + + ''' + # Create the bounding box + if site_view == True: + + map_centroid = points_filt_gdf + + # Reproject centroid + map_centroid = map_centroid.to_crs(EPSG) + + # Input information to calculate bounding box + map_length = 1000 # 100 ft/m buffer around point, hard-coded for now + + # Calculate bounding box + ycoord = int(map_centroid.get_coordinates()['y'].iloc[0]) + xcoord = int(map_centroid.get_coordinates()['x'].iloc[0]) + + map_length_half = map_length / 2 + + xleft = xcoord - map_length_half + xright = xcoord + map_length_half + ybottom = ycoord - map_length_half + ytop = ycoord + map_length_half + + x = (xleft, xright) + y = (ybottom, ytop) + + elif site_view == False: + + # Get bounding rectangle of filtered CatFIM library + xmin_bounds, ymin_bounds, xmax_bounds, ymax_bounds = catfim_library_filt.total_bounds + + # Get max length of CatFIM bounding rectangle + xlen = xmax_bounds - xmin_bounds + ylen = ymax_bounds - ymin_bounds + max_len = max(xlen, ylen) + + # Create centroid coords + xcoord = xmin_bounds + xlen / 2 + ycoord = ymin_bounds + ylen / 2 + + # Make a new bounding square, using CatFIM centroid and the max rectangle length + map_length_half = max_len / 2 + + xleft = xcoord - map_length_half + xright = xcoord + map_length_half + ybottom = ycoord - map_length_half + ytop = ycoord + map_length_half + + # Organize new bounding box coordinates + x = (xleft, xright) + y = (ybottom, ytop) + + # Assmeble baseplot + f, ax = plt.subplots(figsize=(10, 10)) + + if basemap == True: + # Pull aerial imagery basemap from ESRI API + esri_url = f"https://services.arcgisonline.com/arcgis/rest/services/World_Imagery/MapServer/export?bbox={x[0]}%2C{y[0]}%2C{x[1]}%2C{y[1]}&bboxSR={EPSG}&layers=0&size=&imageSR=5070&transparent=true&dpi=200&f=image" + esri_aerial = Image.open(requests.get(esri_url, stream=True).raw).convert('RGBA') + ax.imshow(esri_aerial, extent=(x[0], x[1], y[0], y[1])) + + if legend == True: + # Create legend from the color dictionary + stages = list(colordict.keys()) + patches = [] + for stage in stages: + patch = mpatches.Patch(color=colordict[stage], label=stage) + patches.append(patch) + + ax.legend(handles=patches) + + # Add additional geodataframe layers + catfim_library_filt.plot(ax=ax, color=catfim_library_filt.color, alpha=0.5) + flowline_filt_gdf.plot(ax=ax, color='black', alpha=0.5) + points_filt_gdf.plot(ax=ax, color='black', alpha=0.5) + + # Set bounds and labels + ax.set_xbound([x[0], x[1]]) + ax.set_ybound([y[0], y[1]]) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_title(plot_title) + + plt.show() + + +def map_catfim_full_extent( + catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title, legend +): + ''' + Plot data layers on top of a ESRI basemap... bounding box determined by a centroid. + + Inputs: + - catfim_library_filt + - flowline_filt_gdf + - points_filt_gdf + - colordict + - plot_title (string, title of plot) + - legend (True or False) + + map_catfim_full_extent(catfim_library_filt, flowline_filt_gdf, points_filt_gdf, colordict, plot_title='', legend=True) + + ''' + + # Set up CatFIM plot + f, ax = plt.subplots() + catfim_library_filt.plot(ax=ax, color=catfim_library_filt.color, alpha=0.75) + + # Additional geodataframe layers + flowline_filt_gdf.plot(ax=ax, color='blue', alpha=0.8) + points_filt_gdf.plot(ax=ax, color='black', alpha=1) + + if legend == True: + # Create legend from the color dictionary + stages = list(colordict.keys()) + patches = [] + for stage in stages: + patch = mpatches.Patch(color=colordict[stage], label=stage) + patches.append(patch) + + ax.legend(handles=patches) + + # Set the plot extent to the bounds of the CatFIM section + minx, miny, maxx, maxy = catfim_library_filt.total_bounds + modifier = 10 # how much to widen the extent by + ax.set_xlim(minx - modifier, maxx + modifier) + ax.set_ylim(miny - modifier, maxy + modifier) + + ax.set_title(plot_title) + ax.set_xticks([]) + ax.set_yticks([]) + plt.show() + + +# ------------------------------------------------------------- +# Generate and Plot CatFIM Elevation Cross-section +# ------------------------------------------------------------- + +''' +# Example Usage: + +# Inputs --- + +xsection_length = 1000 # cross-section length, meters or feet, 1000 suggested +EPSG = 5070 +dist_between_points = 10 # distance between points on cross-section line, 10 suggested + + +# Run functions --- + +# Create cross-section +xsection_gdf = create_perpendicular_cross_section(flowline_filt_gdf, points_filt_gdf, xsection_length, EPSG) + +# Map the cross-section +map_cross_section_geometry(xsection_gdf, points_filt_gdf, flowline_filt_gdf, modifier=100, plot_title=f'Flowline cross-section at site {lid}') + +# Get DEM path +dem_path = generate_dem_path(huc, root_dem_path='/data/inputs/3dep_dems/') + +# Create points along cross-section line +xsection_points_gdf, xsection_midpoint = create_cross_section_points(xsection_gdf, dist_between_points) + +# Apply elevation to points +xsection_points_gdf = get_elevation_for_cross_section_points(dem_path, xsection_points_gdf, EPSG) + +# Apply CatFIM stages to points +xsection_catfim_filt_gdf = apply_catfim_library_to_points(xsection_points_gdf, catfim_library_filt) + +# Plot the CatFIM stage cross-section +plot_catfim_cross_section(xsection_points_gdf, xsection_catfim_filt_gdf, xsection_midpoint, colordict, elev_upper_buffer_ft=10, + num_points_buffer=5, dist_between_points=10, save_plot=False, plot_title = f'CatFIM library elevation cross-section at site {lid}', + file_label='') + +# Map the CatFIM stage cross-section +map_catfim_cross_section_points(catfim_library_filt, flowline_filt_gdf, xsection_catfim_filt_gdf, colordict, + modifier=100, plot_title=f'CatFIM library elevation cross-section at site {lid}', + basemap=True, legend=True) + +''' + + +def create_perpendicular_cross_section(flowline_filt_gdf, points_filt_gdf, xsection_length, EPSG): + ''' + Creates a perpendicular cross-section of a single flowline with a specified length. + + Inputs: + - flowline_filt_gdf + - points_filt_gdf + - xsection_length (in the projection units, 1000 is suggested) + - EPSG (number, 5070 is suggested) + + Outputs: + - xsection_gdf + + xsection_gdf = create_perpendicular_cross_section(flowline_filt_gdf, points_filt_gdf, xsection_length, EPSG) + + ''' + + # Extract the line and the point + line = flowline_filt_gdf.geometry.iloc[0] + point = points_filt_gdf.geometry.iloc[0] + + # Find the segment of the line near the point of interest + segment_length = 20 # meters or feet + segment_start_distance = max(0, line.project(point) - segment_length / 2) + segment_end_distance = min(line.length, segment_start_distance + segment_length) + + # Create a shorter segment from the original line + short_segment = LineString( + [line.interpolate(segment_start_distance), line.interpolate(segment_end_distance)] + ) + + # Calculate the slope of the shorter line segment + line_vector = np.array(short_segment.xy[1]) - np.array(short_segment.xy[0]) + line_vector /= np.linalg.norm(line_vector) # Normalize + perpendicular_vector = np.array([-line_vector[1], line_vector[0]]) # Perpendicular vector + + # Create the cross section line (10 meters long) + half_length = xsection_length / 2 + start_point = ( + point.x + half_length * perpendicular_vector[0], + point.y + half_length * perpendicular_vector[1], + ) + end_point = ( + point.x - half_length * perpendicular_vector[0], + point.y - half_length * perpendicular_vector[1], + ) + new_line = LineString([start_point, end_point]) + + # Create a GeoDataFrame for the new line + xsection_gdf = gpd.GeoDataFrame({'geometry': [new_line]}, crs=flowline_filt_gdf.crs) + + # Project geodataframe + xsection_gdf = xsection_gdf.to_crs(EPSG) + + print(f'Created cross-section of length {xsection_length}.') + + return xsection_gdf + + +def map_cross_section_geometry(xsection_gdf, points_filt_gdf, flowline_filt_gdf, modifier, plot_title): + ''' + Inputs: + - xsection_gdf + - points_filt_gdf + - flowline_filt_gdf + - modifier (buffer distance around cross-section bounds for map bounds, 100 suggested) + - plot_title + + Outputs: + - a simple map of the cross-section geometry + + map_cross_section_geometry(xsection_gdf, points_filt_gdf, flowline_filt_gdf, modifier=100, plot_title=f'Flowline cross-section at site {lid}') + ''' + + # Plot xsection_gdf first to set the extent + ax = xsection_gdf.plot(figsize=(6, 6), color='lightgray') + + # Add data + points_filt_gdf.plot(ax=ax, color='blue', alpha=0.5, zorder=5) # Adjust color and alpha as needed + flowline_filt_gdf.plot(ax=ax, color='green', alpha=0.5) # Adjust color and alpha as needed + + # Set the plot extent to the bounds of the line segment + minx, miny, maxx, maxy = xsection_gdf.total_bounds + ax.set_xlim(minx - modifier, maxx + modifier) + ax.set_ylim(miny - modifier, maxy + modifier) + ax.set_title(plot_title) + ax.set_xticks([]) + ax.set_yticks([]) + + plt.show() + + +def create_cross_section_points(xsection_gdf, dist_between_points): + ''' + Creates points at a specified distance along a cross-section line. + + Inputs: + - xsection_gdf: Cross-section line + - dist_between_points: Distance interval with which to create points. + + Outputs: + - xsection_points_gdf: Points along the cross-section line. + - xsection_midpoint: Distance of halfway point. + + xsection_points_gdf, xsection_midpoint = create_cross_section_points(xsection_gdf, dist_between_points) + + ''' + + # Create points along cross-section line + xsection_line = xsection_gdf.geometry.iloc[0] + + # Initialize cross-section points geometry + xsection_points = shapely.geometry.MultiPoint() + + # Initialize the dist_along_line list + half_dist_between_points = [dist_between_points / 2] + dist_along_line = [] + + # Iterate through cross-section line length and create points along the line + for i in np.arange(0, xsection_line.length, dist_between_points): + + # Get midpoint geometry + s = substring(xsection_line, i, i + dist_between_points) + + # Add value to cross-section points geometry + xsection_points = xsection_points.union(s.boundary) + + # dist_along_line.append(i+dist_between_points) # TODO: should this be dist_between_points/2? to get the middle? + dist_along_line.append(i + half_dist_between_points) # changed so it's the halfway value + + # Turn the multipoint object into a geodataframe + xsection_points_gdf = gpd.GeoDataFrame( + {'geometry': [xsection_points], 'elev': -9999}, crs=xsection_gdf.crs + ).explode(index_parts=True) + + # Add line distance column + xsection_points_gdf['dist_along_line'] = dist_along_line + + # Calculate the cross section halfway point + xsection_midpoint = dist_along_line[int(len(dist_along_line) / 2)] + + print(f'Generated {len(xsection_points_gdf)} cross-section points.') + + return xsection_points_gdf, xsection_midpoint + + +def generate_dem_path(huc, root_dem_path): + ''' + Generates a DEM path using the HUC and the root DEM path. + + Inputs: + - huc (string) + - root DEM path (string, probably /data/inputs/3dep_dems/) + + Outputs: + - HUC DEM path + + dem_path = generate_dem_path(huc, root_dem_path='/data/inputs/3dep_dems/') + + ''' + if huc[0:2] == '19': + dem_name = f'HUC8_{huc}_dem.tif' + dem_path = os.path.join(root_dem_path, '10m_South_Alaska', '23_11_07', dem_name) + else: + huc6 = huc[0:6] + dem_name = f'HUC6_{huc6}_dem.tif' + dem_path = os.path.join(root_dem_path, '10m_5070', dem_name) + + print(f'DEM path generated: {dem_path}') + + return dem_path + + +def get_elevation_for_cross_section_points(dem_path, xsection_points_gdf, EPSG): + ''' + Gets the elevation for at each point from the DEM. + + Inputs: + - dem_path (string) + - xsection_points_gdf + - EPSG (numerical, i.e. 5070) + + Outputs: + - xsection_points_elev_gdf + + xsection_points_gdf = get_elevation_for_cross_section_points(dem_path, xsection_points_gdf, EPSG) + + ''' + + # Read the raster file + with rasterio.open(dem_path) as src: + + # Get raster projection + rast_proj = src.crs + + # Temporarily project cross-section points to match raster (it's easier this way) + xsection_points_temp_proj_gdf = xsection_points_gdf.to_crs(rast_proj) + + # Extract raster values at point locations + values = [] + for point in xsection_points_temp_proj_gdf.geometry: + x, y = point.x, point.y + row, col = src.index(x, y) + value = src.read(1, window=((row, row + 1), (col, col + 1)))[0, 0] + values.append(value) + + # Add values to the point dataframe + xsection_points_temp_proj_gdf['elev'] = values + + # Overwrite the old crosssection points gdf with the new elevation one + xsection_points_elev_gdf = xsection_points_temp_proj_gdf.to_crs(EPSG) + + print('Got elevation for each cross-section point.') + return xsection_points_elev_gdf + + +def apply_catfim_library_to_points(xsection_points_gdf, catfim_library_filt): + ''' + Overlays the filtered CatFIM library onto the cross-section points to provide the lowest potential flood stage for each point. + + Inputs: + - xsection_points_gdf + - catfim_library_filt + + Outputs: + - xsection_catfim_filt_gdf + + xsection_catfim_filt_gdf = apply_catfim_library_to_points(xsection_points_gdf, catfim_library_filt) + ''' + + xsection_points_overlay_gdf = gpd.overlay(xsection_points_gdf, catfim_library_filt, how='intersection') + + distances_list = [] + for i, x in enumerate(xsection_points_overlay_gdf['dist_along_line']): + distances_list.append(xsection_points_overlay_gdf['dist_along_line'][i][0]) + + # Iterate through each distance marker and filter so there's only one row per point (with smallest flood level kept) + xsection_catfim_filt = [] + + for dist in distances_list: + # Filter the rows given distance + filtered_df = xsection_points_overlay_gdf[xsection_points_overlay_gdf['dist_along_line'] == dist] + + # Get the row with the highest plot order (color plotted on top) + min_row = filtered_df.loc[filtered_df['plot_order'].idxmax()] + + # Append the result for this distance marker + xsection_catfim_filt.append(min_row) + + # Convert the results list into a DataFrame to view all results + xsection_catfim_filt_gdf = gpd.GeoDataFrame(xsection_catfim_filt) + + print('Intersected and overlaid cross section points with CatFIM library.') + return xsection_catfim_filt_gdf + + +def plot_catfim_cross_section( + xsection_points_gdf, + xsection_catfim_filt_gdf, + xsection_midpoint, + colordict, + elev_upper_buffer_ft, + num_points_buffer, + dist_between_points, + save_plot, + plot_title, + file_label, +): + ''' + Plots the CatFIM cross section points with the inundation category. + + Inputs: + - xsection_points_gdf + - xsection_catfim_filt_gdf + - xsection_midpoint + - colordict + - elev_upper_buffer_ft (Number of ft above highest CatFIM library point to show, 10 suggested) + - num_points_buffer (Number of points to show on either side of CatFIM library, 5 suggested) + - dist_between_points (Distance between points along the cross-section, 10 suggested) + - save_plot (Whether to save the plot (True/False) + - plot_label (Additional label for plot title, if needed) + - file_label (Additional label for plot file, if needed) + + plot_catfim_cross_section(xsection_points_gdf, xsection_catfim_filt_gdf, xsection_midpoint, colordict, elev_upper_buffer_ft=10, + num_points_buffer=5, save_plot=False, plot_title = f'CatFIM library elevation cross-section at site {lid}', + file_label='') + + ''' + + # Set up map + f, ax = plt.subplots(figsize=(10, 4)) + + # Add elevation line + plt.plot(xsection_points_gdf.dist_along_line, xsection_points_gdf.elev, color='gray', alpha=0.8, zorder=2) + + # Add CatFIM points + plt.scatter( + x=xsection_catfim_filt_gdf.dist_along_line, + y=xsection_catfim_filt_gdf.elev, + color=xsection_catfim_filt_gdf.color, + alpha=0.9, + edgecolor='black', + lw=0.7, + zorder=3, + ) + + # Add a dotted line at the approximate LID location + plt.axvline(x=xsection_midpoint, color='gray', linestyle='--', alpha=0.4) + + # Calculate x axis bounds with points buffer + x_buffer = dist_between_points * num_points_buffer + x_min = xsection_catfim_filt_gdf['dist_along_line'].min() - x_buffer + x_max = xsection_catfim_filt_gdf['dist_along_line'].max() + x_buffer + + # Calculate y axis bounds with an elevation buffer + y_min = xsection_catfim_filt_gdf['elev'].min() - 1 + y_max = xsection_catfim_filt_gdf['elev'].max() + elev_upper_buffer_ft + + # Create legend from the color dictionary + stages = list(colordict.keys()) + patches = [] + for stage in stages: + patch = mpatches.Patch(color=colordict[stage], label=stage) + patches.append(patch) + + ax.legend(handles=patches) + + # Set labels and axis bounds + ax.set_xlabel('') + ax.set_ylabel('Elevation, ft') + ax.set_xbound([x_min, x_max]) + ax.set_ybound([y_min, y_max]) + ax.set_title(plot_title) + + # Save and display plot + if save_plot == True: + # Create plot filename + plot_name = 'catfim_crosssection_' + file_label + '.png' + plt.savefig(plot_name) + print(f'Saved plot as {plot_name}') + + else: + plt.show() + + +def map_catfim_cross_section_points( + catfim_library_filt, + flowline_filt_gdf, + xsection_catfim_filt_gdf, + colordict, + modifier, + EPSG, + plot_title, + basemap, + legend, +): + ''' + Plot data layers on top of a ESRI basemap... bounding box determined by the CatFIM cross section extent. + + Inputs: + - catfim_library_filt + - flowline_filt_gdf + - xsection_catfim_filt_gdf + - colordict + - modifier + - plot_title + - basemap (True or False, depending on if you want the ESRI basemap) + - legend (True or False, depending on if you want a legend) + + map_catfim_cross_section_points(catfim_library_filt, flowline_filt_gdf, xsection_catfim_filt_gdf, colordict, + modifier=100, EPSG=5070, plot_title=f'CatFIM library elevation cross-section at site {lid}', + basemap=True, legend=True) + + ''' + + # Get bounding rectangle of filtered CatFIM library + xmin_bounds, ymin_bounds, xmax_bounds, ymax_bounds = xsection_catfim_filt_gdf.total_bounds + + # Organize new bounding box coordinates + x = (xmin_bounds - modifier, xmax_bounds + modifier) + y = (ymin_bounds - modifier, ymax_bounds + modifier) + + # Assmeble baseplot + f, ax = plt.subplots(figsize=(10, 10)) + + if basemap == True: + # Pull aerial imagery basemap from ESRI API + esri_url = f"https://services.arcgisonline.com/arcgis/rest/services/World_Imagery/MapServer/export?bbox={x[0]}%2C{y[0]}%2C{x[1]}%2C{y[1]}&bboxSR={EPSG}&layers=0&size=&imageSR=5070&transparent=true&dpi=200&f=image" + esri_aerial = Image.open(requests.get(esri_url, stream=True).raw).convert('RGBA') + + ax.imshow(esri_aerial, extent=(x[0], x[1], y[0], y[1])) + + if legend == True: + # Create legend from the color dictionary + stages = list(colordict.keys()) + patches = [] + for stage in stages: + patch = mpatches.Patch(color=colordict[stage], label=stage) + patches.append(patch) + + ax.legend(handles=patches) + + # Plot filtered CatFIM library + catfim_library_filt.plot(ax=ax, color=catfim_library_filt.color, alpha=0.6) + + # Add additional geodataframe layers + flowline_filt_gdf.plot(ax=ax, color='black', alpha=1) + xsection_catfim_filt_gdf.plot( + ax=ax, color=xsection_catfim_filt_gdf['color'], edgecolor='black', lw=0.7, zorder=5 + ) + + # Set bounds and labels + ax.set_xbound([x[0], x[1]]) + ax.set_ybound([y[0], y[1]]) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_title(plot_title) + + plt.show()