From 7b693a87bf82afdf0e6aaeb9b2e696220670f499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Thu, 21 Sep 2023 07:13:44 -0400 Subject: [PATCH 01/18] Copy add_and_verify_crosswalk.py from dev-address-crosswalk-issues --- src/add_and_verify_crosswalk.py | 245 ++++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 src/add_and_verify_crosswalk.py diff --git a/src/add_and_verify_crosswalk.py b/src/add_and_verify_crosswalk.py new file mode 100644 index 000000000..5d74bdde9 --- /dev/null +++ b/src/add_and_verify_crosswalk.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 + +import numpy as np +import geopandas as gpd +from rasterstats import zonal_stats +import argparse +import sys +from utils.shared_functions import mem_profile, getDriver + +# Feb 17, 2023 +# We want to explore using FR methodology as branch zero + + +@mem_profile +def add_and_verify_crosswalk( + input_catchments_fileName, + input_flows_fileName, + input_huc_fileName, + input_nwmflows_fileName, + input_nwmcatras_fileName, + input_nwmcat_fileName, + crosswalk_fileName, + output_catchments_fileName, + output_flows_fileName, + extent, + min_catchment_area, + min_stream_length, +): + input_catchments = gpd.read_file(input_catchments_fileName) + input_flows = gpd.read_file(input_flows_fileName) + input_huc = gpd.read_file(input_huc_fileName) + input_nwmflows = gpd.read_file(input_nwmflows_fileName) + min_catchment_area = float(min_catchment_area) # 0.25# + min_stream_length = float(min_stream_length) # 0.5# + + if extent == 'FR': + ## crosswalk using majority catchment method + + # calculate majority catchments + majority_calc = zonal_stats( + input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True + ) + input_majorities = gpd.GeoDataFrame.from_features(majority_calc) + input_majorities = input_majorities.rename(columns={'majority': 'feature_id'}) + + input_majorities = input_majorities[:][input_majorities['feature_id'].notna()] + if input_majorities.feature_id.dtype != 'int': + input_majorities.feature_id = input_majorities.feature_id.astype(int) + if input_majorities.HydroID.dtype != 'int': + input_majorities.HydroID = input_majorities.HydroID.astype(int) + + if len(input_majorities) < 1: + print('No relevant streams within HUC boundaries.') + sys.exit(0) + else: + input_majorities.to_file(crosswalk_fileName, index=False) + + input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) + if input_nwmflows.feature_id.dtype != 'int': + input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) + relevant_input_nwmflows = input_nwmflows[ + input_nwmflows['feature_id'].isin(input_majorities['feature_id']) + ] + relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_']) + + if input_catchments.HydroID.dtype != 'int': + input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') + output_catchments = output_catchments.merge( + relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id' + ) + + if input_flows.HydroID.dtype != 'int': + input_flows.HydroID = input_flows.HydroID.astype(int) + output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') + if output_flows.HydroID.dtype != 'int': + output_flows.HydroID = output_flows.HydroID.astype(int) + output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') + output_flows = output_flows.merge( + output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID' + ) + + elif (extent == 'MS') | (extent == 'GMS'): + ## crosswalk using stream segment midpoint method + input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) + + # only reduce nwm catchments to mainstems if running mainstems + if extent == 'MS': + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1] + + input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'}) + if input_nwmcat.feature_id.dtype != 'int': + input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) + input_nwmcat = input_nwmcat.set_index('feature_id') + + input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) + if input_nwmflows.feature_id.dtype != 'int': + input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) + + # Get stream midpoint + stream_midpoint = [] + hydroID = [] + for i, lineString in enumerate(input_flows.geometry): + hydroID = hydroID + [input_flows.loc[i, 'HydroID']] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)] + + input_flows_midpoint = gpd.GeoDataFrame( + {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry' + ) + input_flows_midpoint = input_flows_midpoint.set_index('HydroID') + + # Create crosswalk + crosswalk = gpd.sjoin( + input_flows_midpoint, input_nwmcat, how='left', predicate='within' + ).reset_index() + crosswalk = crosswalk.rename(columns={'index_right': 'feature_id'}) + + # fill in missing ms + crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] + for index, stream in crosswalk_missing.iterrows(): + # find closest nwm catchment by distance + distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] + min_dist = min(distances) + nwmcat_index = distances.index(min_dist) + + # update crosswalk + crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[ + nwmcat_index + ].name + crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[ + nwmcat_index + ].AreaSqKM + crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[ + nwmcat_index + ].Shape_Length + crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[ + nwmcat_index + ].Shape_Area + + crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) + crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id') + + if len(crosswalk) < 1: + print('No relevant streams within HUC boundaries.') + sys.exit(0) + else: + crosswalk.to_csv(crosswalk_fileName, index=False) + + if input_catchments.HydroID.dtype != 'int': + input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(crosswalk, on='HydroID') + + if input_flows.HydroID.dtype != 'int': + input_flows.HydroID = input_flows.HydroID.astype(int) + output_flows = input_flows.merge(crosswalk, on='HydroID') + + # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh + if 'areasqkm' not in output_catchments.columns: + output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2) + + output_flows = output_flows.merge( + output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID' + ) + + # write out + output_catchments.to_file( + output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False + ) + + if output_flows.NextDownID.dtype != 'int': + output_flows.NextDownID = output_flows.NextDownID.astype(int) + + output_flows = verify_crosswalk(output_flows, input_nwmflows) + + output_flows.to_file(output_flows_fileName, index=False) + + +def verify_crosswalk(flows, nwm_streams): + # Crosswalk check + # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) + + # Compute the number of intersections between the NWM and DEM-derived flowlines + streams = nwm_streams + n_intersects = 0 + xwalks = [] + intersects = flows.sjoin(streams) + for idx in intersects.index: + if type(intersects.loc[idx, 'HydroID']) == np.int64: + flows_idx = intersects.loc[idx, 'HydroID'] + else: + flows_idx = int(intersects.loc[idx, 'HydroID'].unique()) + + if type(intersects.loc[idx, 'feature_id_right']) == np.int64: + streams_idxs = [intersects.loc[idx, 'feature_id_right']] + else: + streams_idxs = intersects.loc[idx, 'feature_id_right'].unique() + + for streams_idx in streams_idxs: + intersect = gpd.overlay( + flows[flows['HydroID'] == flows_idx], + nwm_streams[nwm_streams['feature_id'] == streams_idx], + keep_geom_type=False, + ) + + if intersect.geometry[0].geom_type == 'Point': + intersect_points = 1 + else: + intersect_points = len(intersect.geometry[0].geoms) + + if intersect_points > n_intersects: + n_intersects = intersect_points + + if n_intersects > 0: + xwalks.append((flows_idx, streams_idx, intersect_points)) + + print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}') + + # Get the maximum number of intersections for each flowline + xwalks = np.array(xwalks) + + return flows + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves' + ) + parser.add_argument('-d', '--input-catchments-fileName', help='DEM derived catchments', required=True) + parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True) + parser.add_argument('-w', '--input-huc-fileName', help='HUC8 boundary', required=True) + parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True) + parser.add_argument('-y', '--input-nwmcatras-fileName', help='NWM catchment raster', required=False) + parser.add_argument('-z', '--input-nwmcat-fileName', help='NWM catchment polygon', required=True) + parser.add_argument('-c', '--crosswalk-fileName', help='Crosswalk table filename', required=True) + parser.add_argument( + '-l', '--output-catchments-fileName', help='Subset crosswalked catchments', required=True + ) + parser.add_argument('-p', '--extent', help='GMS only for now', default='GMS', required=False) + parser.add_argument('-e', '--min-catchment-area', help='Minimum catchment area', required=True) + parser.add_argument('-g', '--min-stream-length', help='Minimum stream length', required=True) + parser.add_argument('-f', '--output-flows-fileName', help='Subset crosswalked streams', required=True) + + args = vars(parser.parse_args()) + + add_and_verify_crosswalk(**args) From 2c8588b212a5f1a4b421ca31bd2b40d3a9e5d079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Fri, 22 Sep 2023 13:41:48 -0400 Subject: [PATCH 02/18] Compute comparisons --- src/add_and_verify_crosswalk.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/add_and_verify_crosswalk.py b/src/add_and_verify_crosswalk.py index 5d74bdde9..1dd5f4500 100644 --- a/src/add_and_verify_crosswalk.py +++ b/src/add_and_verify_crosswalk.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import numpy as np +import pandas as pd import geopandas as gpd from rasterstats import zonal_stats import argparse @@ -181,7 +182,6 @@ def verify_crosswalk(flows, nwm_streams): # Compute the number of intersections between the NWM and DEM-derived flowlines streams = nwm_streams - n_intersects = 0 xwalks = [] intersects = flows.sjoin(streams) for idx in intersects.index: @@ -207,16 +207,28 @@ def verify_crosswalk(flows, nwm_streams): else: intersect_points = len(intersect.geometry[0].geoms) - if intersect_points > n_intersects: - n_intersects = intersect_points - - if n_intersects > 0: - xwalks.append((flows_idx, streams_idx, intersect_points)) + xwalks.append( + [ + flows_idx, + int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]), + streams_idx, + intersect_points, + ] + ) - print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}') + print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}') # Get the maximum number of intersections for each flowline - xwalks = np.array(xwalks) + xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points']) + xwalks = xwalks.drop_duplicates() + xwalks['match'] = xwalks[1] == xwalks[2] + + xwalks_groupby = xwalks[[0, 3]].groupby(0).max() + + xwalks = xwalks.merge(xwalks_groupby, on=0) + xwalks['max'] = xwalks['3_x'] == xwalks['3_y'] + + xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False) return flows From a0f30f770baa8f891fd1f6123b7391d62fc4cd95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Thu, 12 Oct 2023 15:58:00 -0400 Subject: [PATCH 03/18] isort tools/verify_crosswalk.py --- src/add_and_verify_crosswalk.py | 257 -------------------------------- tools/verify_crosswalk.py | 96 ++++++++++++ 2 files changed, 96 insertions(+), 257 deletions(-) delete mode 100644 src/add_and_verify_crosswalk.py create mode 100644 tools/verify_crosswalk.py diff --git a/src/add_and_verify_crosswalk.py b/src/add_and_verify_crosswalk.py deleted file mode 100644 index 1dd5f4500..000000000 --- a/src/add_and_verify_crosswalk.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python3 - -import numpy as np -import pandas as pd -import geopandas as gpd -from rasterstats import zonal_stats -import argparse -import sys -from utils.shared_functions import mem_profile, getDriver - -# Feb 17, 2023 -# We want to explore using FR methodology as branch zero - - -@mem_profile -def add_and_verify_crosswalk( - input_catchments_fileName, - input_flows_fileName, - input_huc_fileName, - input_nwmflows_fileName, - input_nwmcatras_fileName, - input_nwmcat_fileName, - crosswalk_fileName, - output_catchments_fileName, - output_flows_fileName, - extent, - min_catchment_area, - min_stream_length, -): - input_catchments = gpd.read_file(input_catchments_fileName) - input_flows = gpd.read_file(input_flows_fileName) - input_huc = gpd.read_file(input_huc_fileName) - input_nwmflows = gpd.read_file(input_nwmflows_fileName) - min_catchment_area = float(min_catchment_area) # 0.25# - min_stream_length = float(min_stream_length) # 0.5# - - if extent == 'FR': - ## crosswalk using majority catchment method - - # calculate majority catchments - majority_calc = zonal_stats( - input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True - ) - input_majorities = gpd.GeoDataFrame.from_features(majority_calc) - input_majorities = input_majorities.rename(columns={'majority': 'feature_id'}) - - input_majorities = input_majorities[:][input_majorities['feature_id'].notna()] - if input_majorities.feature_id.dtype != 'int': - input_majorities.feature_id = input_majorities.feature_id.astype(int) - if input_majorities.HydroID.dtype != 'int': - input_majorities.HydroID = input_majorities.HydroID.astype(int) - - if len(input_majorities) < 1: - print('No relevant streams within HUC boundaries.') - sys.exit(0) - else: - input_majorities.to_file(crosswalk_fileName, index=False) - - input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': - input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - relevant_input_nwmflows = input_nwmflows[ - input_nwmflows['feature_id'].isin(input_majorities['feature_id']) - ] - relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_']) - - if input_catchments.HydroID.dtype != 'int': - input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') - output_catchments = output_catchments.merge( - relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id' - ) - - if input_flows.HydroID.dtype != 'int': - input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') - if output_flows.HydroID.dtype != 'int': - output_flows.HydroID = output_flows.HydroID.astype(int) - output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') - output_flows = output_flows.merge( - output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID' - ) - - elif (extent == 'MS') | (extent == 'GMS'): - ## crosswalk using stream segment midpoint method - input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) - - # only reduce nwm catchments to mainstems if running mainstems - if extent == 'MS': - input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1] - - input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'}) - if input_nwmcat.feature_id.dtype != 'int': - input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) - input_nwmcat = input_nwmcat.set_index('feature_id') - - input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': - input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - - # Get stream midpoint - stream_midpoint = [] - hydroID = [] - for i, lineString in enumerate(input_flows.geometry): - hydroID = hydroID + [input_flows.loc[i, 'HydroID']] - stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)] - - input_flows_midpoint = gpd.GeoDataFrame( - {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry' - ) - input_flows_midpoint = input_flows_midpoint.set_index('HydroID') - - # Create crosswalk - crosswalk = gpd.sjoin( - input_flows_midpoint, input_nwmcat, how='left', predicate='within' - ).reset_index() - crosswalk = crosswalk.rename(columns={'index_right': 'feature_id'}) - - # fill in missing ms - crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] - for index, stream in crosswalk_missing.iterrows(): - # find closest nwm catchment by distance - distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] - min_dist = min(distances) - nwmcat_index = distances.index(min_dist) - - # update crosswalk - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[ - nwmcat_index - ].name - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[ - nwmcat_index - ].AreaSqKM - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[ - nwmcat_index - ].Shape_Length - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[ - nwmcat_index - ].Shape_Area - - crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) - crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id') - - if len(crosswalk) < 1: - print('No relevant streams within HUC boundaries.') - sys.exit(0) - else: - crosswalk.to_csv(crosswalk_fileName, index=False) - - if input_catchments.HydroID.dtype != 'int': - input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(crosswalk, on='HydroID') - - if input_flows.HydroID.dtype != 'int': - input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(crosswalk, on='HydroID') - - # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh - if 'areasqkm' not in output_catchments.columns: - output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2) - - output_flows = output_flows.merge( - output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID' - ) - - # write out - output_catchments.to_file( - output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False - ) - - if output_flows.NextDownID.dtype != 'int': - output_flows.NextDownID = output_flows.NextDownID.astype(int) - - output_flows = verify_crosswalk(output_flows, input_nwmflows) - - output_flows.to_file(output_flows_fileName, index=False) - - -def verify_crosswalk(flows, nwm_streams): - # Crosswalk check - # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) - - # Compute the number of intersections between the NWM and DEM-derived flowlines - streams = nwm_streams - xwalks = [] - intersects = flows.sjoin(streams) - for idx in intersects.index: - if type(intersects.loc[idx, 'HydroID']) == np.int64: - flows_idx = intersects.loc[idx, 'HydroID'] - else: - flows_idx = int(intersects.loc[idx, 'HydroID'].unique()) - - if type(intersects.loc[idx, 'feature_id_right']) == np.int64: - streams_idxs = [intersects.loc[idx, 'feature_id_right']] - else: - streams_idxs = intersects.loc[idx, 'feature_id_right'].unique() - - for streams_idx in streams_idxs: - intersect = gpd.overlay( - flows[flows['HydroID'] == flows_idx], - nwm_streams[nwm_streams['feature_id'] == streams_idx], - keep_geom_type=False, - ) - - if intersect.geometry[0].geom_type == 'Point': - intersect_points = 1 - else: - intersect_points = len(intersect.geometry[0].geoms) - - xwalks.append( - [ - flows_idx, - int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]), - streams_idx, - intersect_points, - ] - ) - - print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}') - - # Get the maximum number of intersections for each flowline - xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points']) - xwalks = xwalks.drop_duplicates() - xwalks['match'] = xwalks[1] == xwalks[2] - - xwalks_groupby = xwalks[[0, 3]].groupby(0).max() - - xwalks = xwalks.merge(xwalks_groupby, on=0) - xwalks['max'] = xwalks['3_x'] == xwalks['3_y'] - - xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False) - - return flows - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves' - ) - parser.add_argument('-d', '--input-catchments-fileName', help='DEM derived catchments', required=True) - parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True) - parser.add_argument('-w', '--input-huc-fileName', help='HUC8 boundary', required=True) - parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True) - parser.add_argument('-y', '--input-nwmcatras-fileName', help='NWM catchment raster', required=False) - parser.add_argument('-z', '--input-nwmcat-fileName', help='NWM catchment polygon', required=True) - parser.add_argument('-c', '--crosswalk-fileName', help='Crosswalk table filename', required=True) - parser.add_argument( - '-l', '--output-catchments-fileName', help='Subset crosswalked catchments', required=True - ) - parser.add_argument('-p', '--extent', help='GMS only for now', default='GMS', required=False) - parser.add_argument('-e', '--min-catchment-area', help='Minimum catchment area', required=True) - parser.add_argument('-g', '--min-stream-length', help='Minimum stream length', required=True) - parser.add_argument('-f', '--output-flows-fileName', help='Subset crosswalked streams', required=True) - - args = vars(parser.parse_args()) - - add_and_verify_crosswalk(**args) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py new file mode 100644 index 000000000..e42b4a241 --- /dev/null +++ b/tools/verify_crosswalk.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +import argparse + +import geopandas as gpd +import numpy as np +import pandas as pd + + +def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table_fileName): + """ + Verify the crosswalk between the NWM and DEM-derived flowlines. + + Parameters + ---------- + input_flows_fileName : str + DEM-derived flowlines + input_nwmflows_fileName : str + NWM flowlines + crosswalk_fileName : str + Crosswalk table filename + """ + + nwm_streams = gpd.read_file(input_nwmflows_fileName) + flows = gpd.read_file(input_flows_fileName) + + # Crosswalk check + # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) + + # Compute the number of intersections between the NWM and DEM-derived flowlines + streams = nwm_streams + xwalks = [] + intersects = flows.sjoin(streams) + for idx in intersects.index: + if type(intersects.loc[idx, 'HydroID']) == np.int64: + flows_idx = intersects.loc[idx, 'HydroID'] + else: + flows_idx = int(intersects.loc[idx, 'HydroID'].unique()) + + if type(intersects.loc[idx, 'feature_id_right']) == np.int64: + streams_idxs = [intersects.loc[idx, 'feature_id_right']] + else: + streams_idxs = intersects.loc[idx, 'feature_id_right'].unique() + + for streams_idx in streams_idxs: + intersect = gpd.overlay( + flows[flows['HydroID'] == flows_idx], + nwm_streams[nwm_streams['feature_id'] == streams_idx], + keep_geom_type=False, + ) + + if intersect.geometry[0].geom_type == 'Point': + intersect_points = 1 + else: + intersect_points = len(intersect.geometry[0].geoms) + + xwalks.append( + [ + flows_idx, + int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]), + streams_idx, + intersect_points, + ] + ) + + print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}') + + # Get the maximum number of intersections for each flowline + xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points']) + xwalks = xwalks.drop_duplicates() + xwalks['match'] = xwalks[1] == xwalks[2] + + xwalks_groupby = xwalks[[0, 3]].groupby(0).max() + + xwalks = xwalks.merge(xwalks_groupby, on=0) + xwalks['max'] = xwalks['3_x'] == xwalks['3_y'] + + xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False) + + # Save the crosswalk table + xwalks.to_csv(output_table_fileName, index=False) + + return xwalks + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves' + ) + parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True) + parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True) + parser.add_argument('-c', '--output-table-fileName', help='Output table filename', required=True) + + args = vars(parser.parse_args()) + + verify_crosswalk(**args) From e7797b13188d8c9eaa83d92572832b547e5e7e05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Thu, 12 Oct 2023 16:51:46 -0400 Subject: [PATCH 04/18] Debug verify_crosswalk --- tools/verify_crosswalk.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index e42b4a241..d1212c470 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -31,16 +31,13 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table streams = nwm_streams xwalks = [] intersects = flows.sjoin(streams) + intersects['HydroID'] = intersects['HydroID'].astype(int) + intersects['feature_id_right'] = intersects['feature_id_right'].astype(int) + for idx in intersects.index: - if type(intersects.loc[idx, 'HydroID']) == np.int64: - flows_idx = intersects.loc[idx, 'HydroID'] - else: - flows_idx = int(intersects.loc[idx, 'HydroID'].unique()) - - if type(intersects.loc[idx, 'feature_id_right']) == np.int64: - streams_idxs = [intersects.loc[idx, 'feature_id_right']] - else: - streams_idxs = intersects.loc[idx, 'feature_id_right'].unique() + flows_idx = int(intersects.loc[idx, 'HydroID'].unique()) + + streams_idxs = intersects.loc[idx, 'feature_id_right'].unique() for streams_idx in streams_idxs: intersect = gpd.overlay( From b8d8ba5bac72d01ab20f8f1b48d505d982ea1690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Thu, 12 Oct 2023 17:34:31 -0400 Subject: [PATCH 05/18] Minor edits --- tools/verify_crosswalk.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index d1212c470..94d295c68 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -32,17 +32,16 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table xwalks = [] intersects = flows.sjoin(streams) intersects['HydroID'] = intersects['HydroID'].astype(int) - intersects['feature_id_right'] = intersects['feature_id_right'].astype(int) for idx in intersects.index: - flows_idx = int(intersects.loc[idx, 'HydroID'].unique()) + flows_idx = intersects.loc[idx, 'HydroID'].unique() - streams_idxs = intersects.loc[idx, 'feature_id_right'].unique() + streams_idxs = intersects.loc[idx, 'feature_id'].unique() for streams_idx in streams_idxs: intersect = gpd.overlay( flows[flows['HydroID'] == flows_idx], - nwm_streams[nwm_streams['feature_id'] == streams_idx], + nwm_streams[nwm_streams['ID'] == streams_idx], keep_geom_type=False, ) From 0ab05adf3e972291c9e882e92d016a8c55c924b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Mon, 16 Oct 2023 16:00:40 -0400 Subject: [PATCH 06/18] Count segment intersections --- tools/verify_crosswalk.py | 92 +++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 52 deletions(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index 94d295c68..54a27957f 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -8,75 +8,63 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table_fileName): - """ - Verify the crosswalk between the NWM and DEM-derived flowlines. - - Parameters - ---------- - input_flows_fileName : str - DEM-derived flowlines - input_nwmflows_fileName : str - NWM flowlines - crosswalk_fileName : str - Crosswalk table filename - """ - - nwm_streams = gpd.read_file(input_nwmflows_fileName) - flows = gpd.read_file(input_flows_fileName) - # Crosswalk check # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) + flows = gpd.read_file(input_flows_fileName) + nwm_streams = gpd.read_file(input_nwmflows_fileName) + # Compute the number of intersections between the NWM and DEM-derived flowlines streams = nwm_streams xwalks = [] intersects = flows.sjoin(streams) - intersects['HydroID'] = intersects['HydroID'].astype(int) for idx in intersects.index: - flows_idx = intersects.loc[idx, 'HydroID'].unique() - - streams_idxs = intersects.loc[idx, 'feature_id'].unique() - - for streams_idx in streams_idxs: - intersect = gpd.overlay( - flows[flows['HydroID'] == flows_idx], - nwm_streams[nwm_streams['ID'] == streams_idx], - keep_geom_type=False, - ) - - if intersect.geometry[0].geom_type == 'Point': - intersect_points = 1 - else: - intersect_points = len(intersect.geometry[0].geoms) - - xwalks.append( - [ - flows_idx, - int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]), - streams_idx, - intersect_points, - ] - ) - - print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}') + flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique() + + if type(intersects.loc[idx, 'ID']) == np.int64: + streams_idxs = [intersects.loc[idx, 'ID']] + else: + streams_idxs = intersects.loc[idx, 'ID'].unique() + + for flows_id in flows_idx: + for streams_idx in streams_idxs: + intersect = gpd.overlay( + flows[flows['HydroID'] == flows_id], + nwm_streams[nwm_streams['ID'] == streams_idx], + keep_geom_type=False, + ) + + if len(intersect) == 0: + intersect_points = 0 + feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id'] + elif intersect.geometry[0].geom_type == 'Point': + intersect_points = 1 + feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id'] + else: + intersect_points = len(intersect.geometry[0].geoms) + feature_id = int(flows.loc[flows['HydroID'] == flows_id, 'feature_id'].iloc[0]) + + xwalks.append([flows_id, feature_id, streams_idx, intersect_points]) + + print(f'Found {intersect_points} intersections for {flows_id} and {streams_idx}') # Get the maximum number of intersections for each flowline - xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points']) - xwalks = xwalks.drop_duplicates() - xwalks['match'] = xwalks[1] == xwalks[2] + xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'ID', 'intersect_points']) + xwalks['feature_id'] = xwalks['feature_id'].astype(int) + + xwalks['match'] = xwalks['feature_id'] == xwalks['ID'] - xwalks_groupby = xwalks[[0, 3]].groupby(0).max() + xwalks_groupby = xwalks[['HydroID', 'intersect_points']].groupby('HydroID').max() - xwalks = xwalks.merge(xwalks_groupby, on=0) - xwalks['max'] = xwalks['3_x'] == xwalks['3_y'] + xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left') + xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y'] - xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False) + xwalks['crosswalk'] = xwalks['match'] == xwalks['max'] - # Save the crosswalk table xwalks.to_csv(output_table_fileName, index=False) - return xwalks + return flows if __name__ == '__main__': From d8ac9b0cb75b9aca5984ee9b979ee8afeada618c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Mon, 16 Oct 2023 17:38:28 -0400 Subject: [PATCH 07/18] Minor edits --- tools/verify_crosswalk.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index 54a27957f..5fdec6855 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -60,7 +60,11 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left') xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y'] - xwalks['crosswalk'] = xwalks['match'] == xwalks['max'] + xwalks['crosswalk'] = (xwalks['match'] is True) & (xwalks['max'] is True) + + xwalks = xwalks.drop_duplicates() + + xwalks = xwalks.sort_values(by=['HydroID', 'intersect_points_x', 'ID'], ascending=False) xwalks.to_csv(output_table_fileName, index=False) From e171c8f738f42e0ee3857e654bc1d0b61386dead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Tue, 17 Oct 2023 20:53:26 -0400 Subject: [PATCH 08/18] isort --- src/map_headwaters_to_reaches.py | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/map_headwaters_to_reaches.py diff --git a/src/map_headwaters_to_reaches.py b/src/map_headwaters_to_reaches.py new file mode 100644 index 000000000..3a4b1cf6b --- /dev/null +++ b/src/map_headwaters_to_reaches.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +import os + +import geopandas as gpd + + +fim_dir = '/outputs/dev-4.4.2.3/18100201' + +headwaters = gpd.read_file(os.path.join(fim_dir, 'nwm_headwater_points_subset.gpkg')) +reaches = gpd.read_file(os.path.join(fim_dir, 'branches', '0', 'demDerived_reaches_0.shp')) + +# Snap nearest reaches where magnitude is 1 to headwaters +reaches_mag_1 = reaches[reaches['Magnitude'] == 1] +snapped_reaches = gpd.sjoin_nearest(reaches_mag_1, headwaters, how='left', distance_col='distance') +reaches_snapped = reaches.merge(snapped_reaches[['LINKNO', 'ID', 'distance']], on='LINKNO', how='left') + + +def sum_upstream(row): + """Recursive function to sum up all upstream reaches.""" + + sum = 0 + if row['USLINKNO1'] != -1: + sum += sum_upstream(row['USLINKNO1']) + else: + sum += row['ID'] + + if row['USLINKNO2'] != -1: + sum += sum_upstream(row['USLINKNO2']) + else: + sum += row['ID'] + + +# Apply stream algorithm with headwaters IDs +for i, row in reaches_snapped[reaches_snapped['DSLINKNO'] == -1].iterrows(): + sum = sum_upstream(row) From 2964b971ca8005f67614a947f98993eeb31b2b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Thu, 26 Oct 2023 14:33:31 -0400 Subject: [PATCH 09/18] Reformat --- tools/verify_crosswalk.py | 147 ++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 52 deletions(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index 5fdec6855..1b0135d6e 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -7,68 +7,110 @@ import pandas as pd -def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table_fileName): - # Crosswalk check +def verify_crosswalk( + input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName, output_table_fileName +): + # Check for crosswalks between NWM and DEM-derived flowlines # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) flows = gpd.read_file(input_flows_fileName) + flows['HydroID'] = flows['HydroID'].astype(int) nwm_streams = gpd.read_file(input_nwmflows_fileName) - - # Compute the number of intersections between the NWM and DEM-derived flowlines - streams = nwm_streams - xwalks = [] - intersects = flows.sjoin(streams) - - for idx in intersects.index: - flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique() - - if type(intersects.loc[idx, 'ID']) == np.int64: - streams_idxs = [intersects.loc[idx, 'ID']] - else: - streams_idxs = intersects.loc[idx, 'ID'].unique() - - for flows_id in flows_idx: - for streams_idx in streams_idxs: - intersect = gpd.overlay( - flows[flows['HydroID'] == flows_id], - nwm_streams[nwm_streams['ID'] == streams_idx], - keep_geom_type=False, + nwm_streams = nwm_streams.rename(columns={'ID': 'feature_id'}) + nwm_headwaters = gpd.read_file(input_nwm_headwaters_fileName) + + streams_outlets = nwm_streams.loc[~nwm_streams.to.isin(nwm_streams.feature_id), 'feature_id'] + flows_outlets = flows.loc[~flows['NextDownID'].isin(flows['HydroID']), 'HydroID'] + + nwm_streams_headwaters_list = ~nwm_streams['feature_id'].isin(nwm_streams['to']) + # flows_headwaters_list = ~flows['LINKNO'].isin(flows['DSLINKNO']) + flows_headwaters_list = ~flows['HydroID'].isin(flows['NextDownID']) + + nwm_streams_headwaters = nwm_streams[nwm_streams_headwaters_list] + flows_headwaters = flows[flows_headwaters_list] + + # Map headwater points to DEM-derived reaches + flows_headwaters = flows_headwaters.sjoin_nearest(nwm_headwaters) + flows_headwaters = flows_headwaters[['HydroID', 'ID']] + nwm_streams_headwaters = nwm_streams_headwaters.sjoin_nearest(nwm_headwaters) + nwm_streams_headwaters = nwm_streams_headwaters[['feature_id', 'ID']] + + def _hydroid_to_feature_id(df, hydroid, hydroid_attr, feature_id_attr): + return df.loc[df[hydroid_attr] == hydroid, feature_id_attr] + + def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, nextdownid_attr): + # Find upstream segments + data_dict[hydroid] = list(data.loc[data[nextdownid_attr] == hydroid, hydroid_attr].values) + + for hydroid in data_dict[hydroid]: + if hydroid in data_headwaters[hydroid_attr].values: + data_dict[hydroid] = data_headwaters.loc[ + data_headwaters[hydroid_attr] == hydroid, 'ID' + ].values[0] + else: + data_dict = _get_upstream_data( + data, data_headwaters, data_dict, hydroid, hydroid_attr, nextdownid_attr ) - if len(intersect) == 0: - intersect_points = 0 - feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id'] - elif intersect.geometry[0].geom_type == 'Point': - intersect_points = 1 - feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id'] + return data_dict + + flows_dict = dict() + streams_dict = dict() + + # Compare hash tables + for hydroid in flows_outlets: + flows_dict = _get_upstream_data(flows, flows_headwaters, flows_dict, hydroid, 'HydroID', 'NextDownID') + + for feature_id in streams_outlets: + streams_dict = _get_upstream_data( + nwm_streams, nwm_streams_headwaters, streams_dict, feature_id, 'feature_id', 'to' + ) + + results = [] + for flow in flows_dict: + fid = _hydroid_to_feature_id(flows, flow, 'HydroID', 'feature_id').iloc[0] + upstream_fid = flows_dict[flow] + + upstream_fids = [] + nwm_fids = streams_dict[fid] + out_list = [flow, fid, upstream_fids, nwm_fids] + + if type(upstream_fid) != np.int64: + if len(upstream_fid) > 0: + for i in upstream_fid: + # Find upstream feature_id(s) + temp_ids = streams_dict[ + int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0]) + ] + if type(temp_ids) == list: + upstream_fids.append(temp_ids[0]) + else: + upstream_fids.append(temp_ids) + + out_list = [flow, fid, upstream_fids, nwm_fids] + if type(nwm_fids) == np.int64: + nwm_fids = [nwm_fids] + if upstream_fids == nwm_fids: + # 0: Crosswalk is correct + out_list.append(0) else: - intersect_points = len(intersect.geometry[0].geoms) - feature_id = int(flows.loc[flows['HydroID'] == flows_id, 'feature_id'].iloc[0]) - - xwalks.append([flows_id, feature_id, streams_idx, intersect_points]) - - print(f'Found {intersect_points} intersections for {flows_id} and {streams_idx}') - - # Get the maximum number of intersections for each flowline - xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'ID', 'intersect_points']) - xwalks['feature_id'] = xwalks['feature_id'].astype(int) - - xwalks['match'] = xwalks['feature_id'] == xwalks['ID'] - - xwalks_groupby = xwalks[['HydroID', 'intersect_points']].groupby('HydroID').max() - - xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left') - xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y'] - - xwalks['crosswalk'] = (xwalks['match'] is True) & (xwalks['max'] is True) - - xwalks = xwalks.drop_duplicates() + # 1: Crosswalk is incorrect + out_list.append(1) + else: + # 2: Crosswalk is empty + out_list.append(2) + else: + # 3: if upstream is a headwater point + out_list.append(3) - xwalks = xwalks.sort_values(by=['HydroID', 'intersect_points_x', 'ID'], ascending=False) + results.append(out_list) - xwalks.to_csv(output_table_fileName, index=False) + results = pd.DataFrame( + results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status'] + ) + results.to_csv(output_table_fileName, index=False) - return flows + return results if __name__ == '__main__': @@ -77,6 +119,7 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table ) parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True) + parser.add_argument('-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', required=True) parser.add_argument('-c', '--output-table-fileName', help='Output table filename', required=True) args = vars(parser.parse_args()) From c019fc17ac70cf6267df265a0ff8e23d416bf1f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Fri, 27 Oct 2023 15:11:32 -0400 Subject: [PATCH 10/18] Ignore multiple feature_ids --- tools/verify_crosswalk.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index 1b0135d6e..11a66c714 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -69,28 +69,31 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, results = [] for flow in flows_dict: fid = _hydroid_to_feature_id(flows, flow, 'HydroID', 'feature_id').iloc[0] - upstream_fid = flows_dict[flow] + upstream_hid = flows_dict[flow] upstream_fids = [] nwm_fids = streams_dict[fid] out_list = [flow, fid, upstream_fids, nwm_fids] - if type(upstream_fid) != np.int64: - if len(upstream_fid) > 0: - for i in upstream_fid: + if type(upstream_hid) != np.int64: + if len(upstream_hid) > 0: + for i in upstream_hid: # Find upstream feature_id(s) - temp_ids = streams_dict[ - int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0]) - ] - if type(temp_ids) == list: - upstream_fids.append(temp_ids[0]) + temp_fid = int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0]) + + if type(temp_fid) == list: + upstream_fids.append(temp_fid[0]) else: - upstream_fids.append(temp_ids) + upstream_fids.append(temp_fid) out_list = [flow, fid, upstream_fids, nwm_fids] if type(nwm_fids) == np.int64: nwm_fids = [nwm_fids] - if upstream_fids == nwm_fids: + + if fid in upstream_fids: + # Skip duplicate feature_ids + out_list.append(-1) + elif set(upstream_fids) == set(nwm_fids): # 0: Crosswalk is correct out_list.append(0) else: @@ -106,7 +109,7 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, results.append(out_list) results = pd.DataFrame( - results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status'] + data=results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status'] ) results.to_csv(output_table_fileName, index=False) From 838bd7df68555466d2f55292af1022485bba4dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Mon, 30 Oct 2023 09:26:15 -0400 Subject: [PATCH 11/18] Update documentation --- tools/verify_crosswalk.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index 11a66c714..35b975911 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -10,6 +10,28 @@ def verify_crosswalk( input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName, output_table_fileName ): + """ + Tool to check the accuracy of crosswalked attributes + + Parameters + ---------- + input_flows_fileName : str + Path to DEM derived streams + input_nwmflows_fileName : str + Path to subset NWM burnlines + input_nwm_headwaters_fileName : str + Path to subset NWM headwaters + output_table_fileName : str + Path to output table filename + + Returns + ------- + results : pandas.DataFrame + + Usage + ----- + python verify_crosswalk.py -a -b -d -c + """ # Check for crosswalks between NWM and DEM-derived flowlines # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) @@ -117,9 +139,7 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves' - ) + parser = argparse.ArgumentParser(description='Tool to check crosswalk accuracy') parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True) parser.add_argument('-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', required=True) From 70647259772150bb36dcb1a598389386e6739ea0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Wed, 3 Jan 2024 14:04:41 -0500 Subject: [PATCH 12/18] Combine intersections and network --- tools/verify_crosswalk.py | 108 +++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py index 35b975911..652fa7281 100644 --- a/tools/verify_crosswalk.py +++ b/tools/verify_crosswalk.py @@ -28,6 +28,113 @@ def verify_crosswalk( ------- results : pandas.DataFrame + Usage + ----- + python verify_crosswalk.py -a -b -d -c + """ + + intersections_results = _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName) + + intersections_correct = intersections_results['crosswalk'].sum() + intersections_total = len(intersections_results) + intersections_summary = intersections_correct * 100.0 / intersections_total + + network_results = _verify_crosswalk_network( + input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName + ) + + network_results = network_results[network_results['status'] >= 0] + network_correct = len(network_results[network_results['status'] == 0]) + network_total = len(network_results) + network_summary = network_correct * 100.0 / network_total + + results = pd.DataFrame( + data={ + 'type': ['intersections', 'network'], + 'correct': [intersections_correct, network_correct], + 'total': [intersections_total, network_total], + 'percent': [intersections_summary, network_summary], + } + ) + + results.to_csv(output_table_fileName, index=False) + + +def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName): + # Crosswalk check + # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) + + flows = gpd.read_file(input_flows_fileName) + nwm_streams = gpd.read_file(input_nwmflows_fileName) + + # Compute the number of intersections between the NWM and DEM-derived flowlines + streams = nwm_streams + xwalks = [] + intersects = flows.sjoin(streams) + + for idx in intersects.index: + flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique() + + if type(intersects.loc[idx, 'ID']) == np.int64: + streams_idxs = [intersects.loc[idx, 'ID']] + else: + streams_idxs = intersects.loc[idx, 'ID'].unique() + + for flows_id in flows_idx: + for streams_idx in streams_idxs: + intersect = gpd.overlay( + flows[flows['HydroID'] == flows_id], + nwm_streams[nwm_streams['ID'] == streams_idx], + keep_geom_type=False, + ) + + if len(intersect) == 0: + intersect_points = 0 + feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id'] + elif intersect.geometry[0].geom_type == 'Point': + intersect_points = 1 + feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id'] + else: + intersect_points = len(intersect.geometry[0].geoms) + feature_id = int(flows.loc[flows['HydroID'] == flows_id, 'feature_id'].iloc[0]) + + xwalks.append([flows_id, feature_id, streams_idx, intersect_points]) + + # Get the maximum number of intersections for each flowline + xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'ID', 'intersect_points']) + xwalks['feature_id'] = xwalks['feature_id'].astype(int) + + xwalks['match'] = xwalks['feature_id'] == xwalks['ID'] + + xwalks_groupby = xwalks[['HydroID', 'intersect_points']].groupby('HydroID').max() + + xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left') + xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y'] + + xwalks['crosswalk'] = xwalks['match'] == xwalks['max'] + + return xwalks + + +def _verify_crosswalk_network(input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName): + """ + Tool to check the accuracy of crosswalked attributes + + Parameters + ---------- + input_flows_fileName : str + Path to DEM derived streams + input_nwmflows_fileName : str + Path to subset NWM burnlines + input_nwm_headwaters_fileName : str + Path to subset NWM headwaters + output_table_fileName : str + Path to output table filename + + Returns + ------- + results : pandas.DataFrame + Usage ----- python verify_crosswalk.py -a -b -d -c @@ -133,7 +240,6 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, results = pd.DataFrame( data=results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status'] ) - results.to_csv(output_table_fileName, index=False) return results From 3637f5a5dd1969d0965beaea19b8661765c0f92d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Thu, 4 Jan 2024 19:38:40 -0500 Subject: [PATCH 13/18] Add switch to run tool in fim_pipeline --- Dockerfile | 1 + fim_pipeline.sh | 1 + fim_post_processing.sh | 2 +- fim_pre_processing.sh | 5 +++++ src/delineate_hydros_and_produce_HAND.sh | 13 +++++++++++++ 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 512e726dd..dd579c65e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,6 +54,7 @@ ARG depDir=/dependencies ENV inputsDir=$dataDir/inputs ENV outputsDir=/outputs ENV srcDir=$projectDir/src +ENV toolsDir=$projectDir/tools ENV workDir=/fim_temp ENV taudemDir=$depDir/taudem/bin ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin diff --git a/fim_pipeline.sh b/fim_pipeline.sh index 8523d21ef..4e1e76159 100755 --- a/fim_pipeline.sh +++ b/fim_pipeline.sh @@ -48,6 +48,7 @@ usage() will be skipped. -isaws : If this param is included, AWS objects will be used where possible - Note: This feature is not yet implemented. + -x : If this param is included, the crosswalk will be evaluated. Running 'fim_pipeline.sh' is a quicker process than running all three scripts independently; however, diff --git a/fim_post_processing.sh b/fim_post_processing.sh index f8266c94d..4ebb7b5fa 100755 --- a/fim_post_processing.sh +++ b/fim_post_processing.sh @@ -210,7 +210,7 @@ echo echo -e $startDiv"Combining crosswalk tables" # aggregate outputs Tstart -python3 /foss_fim/tools/combine_crosswalk_tables.py \ +python3 $toolsDir/combine_crosswalk_tables.py \ -d $outputDestDir \ -o $outputDestDir/crosswalk_table.csv Tcount diff --git a/fim_pre_processing.sh b/fim_pre_processing.sh index 61eb3ef1a..cff84fa23 100755 --- a/fim_pre_processing.sh +++ b/fim_pre_processing.sh @@ -101,6 +101,9 @@ in -isaws) isAWS=1 ;; + -x) + evaluateCrosswalk=1 + ;; *) ;; esac shift @@ -129,6 +132,7 @@ if [ "$jobBranchLimit" = "" ]; then jobBranchLimit=1; fi if [ -z "$overwrite" ]; then overwrite=0; fi if [ -z "$skipcal" ]; then skipcal=0; fi if [ -z "$isAWS" ]; then isAWS=0; fi +if [ -z "$evaluateCrosswalk" ]; then evaluateCrosswalk=0; fi # validate and set defaults for the deny lists if [ "$deny_unit_list" = "" ] @@ -234,6 +238,7 @@ echo "export deny_branch_zero_list=$deny_branch_zero_list" >> $args_file echo "export has_deny_branch_zero_override=$has_deny_branch_zero_override" >> $args_file echo "export isAWS=$isAWS" >> $args_file echo "export skipcal=$skipcal" >> $args_file +echo "export evaluateCrosswalk=$evaluateCrosswalk" >> $args_file echo "--- Pre-processing is complete" diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh index 2b04da494..2ec982323 100755 --- a/src/delineate_hydros_and_produce_HAND.sh +++ b/src/delineate_hydros_and_produce_HAND.sh @@ -302,3 +302,16 @@ python3 $srcDir/add_crosswalk.py \ -e $min_catchment_area \ -g $min_stream_length Tcount + +## EVALUATE CROSSWALK ## +if [ "$evaluateCrosswalk" = "True" ]; then + echo -e $startDiv"Evaluate crosswalk $hucNumber $current_branch_id" + date -u + Tstart + python3 $toolsDir/verify_crosswalk.py \ + -a $tempCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg \ + -b $b_arg \ + -c $tempCurrentBranchDataDir/verify_crosswalk_$current_branch_id.csv \ + -d $tempHucDataDir/nwm_headwater_points_subset.gpkg + Tcount +fi From 1db7855a98b29788e3eb342a3a841fa8d49efb36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Fri, 5 Jan 2024 09:23:01 -0500 Subject: [PATCH 14/18] Delete unnecessary file --- src/map_headwaters_to_reaches.py | 36 -------------------------------- 1 file changed, 36 deletions(-) delete mode 100644 src/map_headwaters_to_reaches.py diff --git a/src/map_headwaters_to_reaches.py b/src/map_headwaters_to_reaches.py deleted file mode 100644 index 3a4b1cf6b..000000000 --- a/src/map_headwaters_to_reaches.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -import os - -import geopandas as gpd - - -fim_dir = '/outputs/dev-4.4.2.3/18100201' - -headwaters = gpd.read_file(os.path.join(fim_dir, 'nwm_headwater_points_subset.gpkg')) -reaches = gpd.read_file(os.path.join(fim_dir, 'branches', '0', 'demDerived_reaches_0.shp')) - -# Snap nearest reaches where magnitude is 1 to headwaters -reaches_mag_1 = reaches[reaches['Magnitude'] == 1] -snapped_reaches = gpd.sjoin_nearest(reaches_mag_1, headwaters, how='left', distance_col='distance') -reaches_snapped = reaches.merge(snapped_reaches[['LINKNO', 'ID', 'distance']], on='LINKNO', how='left') - - -def sum_upstream(row): - """Recursive function to sum up all upstream reaches.""" - - sum = 0 - if row['USLINKNO1'] != -1: - sum += sum_upstream(row['USLINKNO1']) - else: - sum += row['ID'] - - if row['USLINKNO2'] != -1: - sum += sum_upstream(row['USLINKNO2']) - else: - sum += row['ID'] - - -# Apply stream algorithm with headwaters IDs -for i, row in reaches_snapped[reaches_snapped['DSLINKNO'] == -1].iterrows(): - sum = sum_upstream(row) From 7c1df169c0b823a81527acab5c68db7e4127f8d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Fri, 5 Jan 2024 09:53:52 -0500 Subject: [PATCH 15/18] Rename file --- src/delineate_hydros_and_produce_HAND.sh | 8 ++++---- tools/{verify_crosswalk.py => evaluate_crosswalk.py} | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) rename tools/{verify_crosswalk.py => evaluate_crosswalk.py} (97%) diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh index 2ec982323..d732e888e 100755 --- a/src/delineate_hydros_and_produce_HAND.sh +++ b/src/delineate_hydros_and_produce_HAND.sh @@ -17,7 +17,7 @@ T_total_start ## MASK LEVEE-PROTECTED AREAS FROM DEM ## if [ "$mask_leveed_area_toggle" = "True" ] && [ -f $tempHucDataDir/LeveeProtectedAreas_subset.gpkg ]; then - echo -e $startDiv"Mask levee-protected areas from DEM (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id" + echo -e $startDiv"Mask levee-protected areas from DEM (*Overwrite dem_meters.tif output) $hucNumber $current_branch_id" date -u Tstart python3 $srcDir/mask_dem.py \ @@ -304,14 +304,14 @@ python3 $srcDir/add_crosswalk.py \ Tcount ## EVALUATE CROSSWALK ## -if [ "$evaluateCrosswalk" = "True" ]; then +if [ "$current_branch_id" = "$branch_zero_id" ] && [ "$evaluateCrosswalk" = "1" ] ; then echo -e $startDiv"Evaluate crosswalk $hucNumber $current_branch_id" date -u Tstart - python3 $toolsDir/verify_crosswalk.py \ + python3 $toolsDir/evaluate_crosswalk.py \ -a $tempCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg \ -b $b_arg \ - -c $tempCurrentBranchDataDir/verify_crosswalk_$current_branch_id.csv \ + -c $tempHucDataDir/crosswalk_evaluation_$current_branch_id.csv \ -d $tempHucDataDir/nwm_headwater_points_subset.gpkg Tcount fi diff --git a/tools/verify_crosswalk.py b/tools/evaluate_crosswalk.py similarity index 97% rename from tools/verify_crosswalk.py rename to tools/evaluate_crosswalk.py index 652fa7281..16bdd7063 100644 --- a/tools/verify_crosswalk.py +++ b/tools/evaluate_crosswalk.py @@ -75,7 +75,7 @@ def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileNam for idx in intersects.index: flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique() - if type(intersects.loc[idx, 'ID']) == np.int64: + if isinstance(intersects.loc[idx, 'ID'], np.int64): streams_idxs = [intersects.loc[idx, 'ID']] else: streams_idxs = intersects.loc[idx, 'ID'].unique() @@ -204,19 +204,19 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, nwm_fids = streams_dict[fid] out_list = [flow, fid, upstream_fids, nwm_fids] - if type(upstream_hid) != np.int64: + if not isinstance(upstream_hid, np.int64): if len(upstream_hid) > 0: for i in upstream_hid: # Find upstream feature_id(s) temp_fid = int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0]) - if type(temp_fid) == list: + if isinstance(temp_fid, list): upstream_fids.append(temp_fid[0]) else: upstream_fids.append(temp_fid) out_list = [flow, fid, upstream_fids, nwm_fids] - if type(nwm_fids) == np.int64: + if isinstance(nwm_fids, np.int64): nwm_fids = [nwm_fids] if fid in upstream_fids: From c26df17714876a70e7bf9b1d425ba6d31215ff1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Fri, 5 Jan 2024 16:15:18 -0500 Subject: [PATCH 16/18] Create output file --- src/add_crosswalk.py | 6 +- src/delineate_hydros_and_produce_HAND.sh | 4 +- tools/evaluate_crosswalk.py | 81 +++++++++++++++++------- 3 files changed, 66 insertions(+), 25 deletions(-) diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index fd7c8c450..b10453f61 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -258,7 +258,11 @@ def add_crosswalk( else: update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item() - str_order = output_flows.loc[output_flows.HydroID == short_id]['order_'].item() + output_order = output_flows.loc[output_flows.HydroID == short_id]['order_'] + if len(output_order) == 1: + str_order = output_order.item() + else: + str_order = output_order.max() sml_segs = pd.concat( [ sml_segs, diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh index d732e888e..d76632cbe 100755 --- a/src/delineate_hydros_and_produce_HAND.sh +++ b/src/delineate_hydros_and_produce_HAND.sh @@ -312,6 +312,8 @@ if [ "$current_branch_id" = "$branch_zero_id" ] && [ "$evaluateCrosswalk" = "1" -a $tempCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg \ -b $b_arg \ -c $tempHucDataDir/crosswalk_evaluation_$current_branch_id.csv \ - -d $tempHucDataDir/nwm_headwater_points_subset.gpkg + -d $tempHucDataDir/nwm_headwater_points_subset.gpkg \ + -u $hucNumber \ + -z $current_branch_id Tcount fi diff --git a/tools/evaluate_crosswalk.py b/tools/evaluate_crosswalk.py index 16bdd7063..0051c3600 100644 --- a/tools/evaluate_crosswalk.py +++ b/tools/evaluate_crosswalk.py @@ -7,11 +7,16 @@ import pandas as pd -def verify_crosswalk( - input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName, output_table_fileName +def evaluate_crosswalk( + input_flows_fileName: str, + input_nwmflows_fileName: str, + input_nwm_headwaters_fileName: str, + output_table_fileName: str, + huc: str, + branch: str, ): """ - Tool to check the accuracy of crosswalked attributes + Tool to check the accuracy of crosswalked attributes using two methods: counting the number of intersections between two stream representations and network, which checks the upstream and downstream connectivity of each stream segment. Parameters ---------- @@ -23,6 +28,10 @@ def verify_crosswalk( Path to subset NWM headwaters output_table_fileName : str Path to output table filename + huc : str + HUC ID + branch : str + Branch ID Returns ------- @@ -30,37 +39,56 @@ def verify_crosswalk( Usage ----- - python verify_crosswalk.py -a -b -d -c + python evaluate_crosswalk.py -a -b -d -c -u -z """ - intersections_results = _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName) + intersections_results = _evaluate_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName) intersections_correct = intersections_results['crosswalk'].sum() intersections_total = len(intersections_results) - intersections_summary = intersections_correct * 100.0 / intersections_total + intersections_summary = intersections_correct / intersections_total - network_results = _verify_crosswalk_network( + network_results = _evaluate_crosswalk_network( input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName ) network_results = network_results[network_results['status'] >= 0] network_correct = len(network_results[network_results['status'] == 0]) network_total = len(network_results) - network_summary = network_correct * 100.0 / network_total + network_summary = network_correct / network_total results = pd.DataFrame( data={ - 'type': ['intersections', 'network'], + 'huc': [huc, huc], + 'branch': [branch, branch], + 'method': ['intersections', 'network'], 'correct': [intersections_correct, network_correct], 'total': [intersections_total, network_total], - 'percent': [intersections_summary, network_summary], + 'proportion': [intersections_summary, network_summary], } ) results.to_csv(output_table_fileName, index=False) + return results + + +def _evaluate_crosswalk_intersections(input_flows_fileName: str, input_nwmflows_fileName: str): + """ + Computes the number of intersections between the NWM and DEM-derived flowlines + + Parameters + ---------- + input_flows_fileName : str + Path to DEM derived streams + input_nwmflows_fileName : str + Path to subset NWM burnlines + + Returns + ------- + pandas.DataFrame + """ -def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName): # Crosswalk check # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) @@ -116,9 +144,11 @@ def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileNam return xwalks -def _verify_crosswalk_network(input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName): +def _evaluate_crosswalk_network( + input_flows_fileName: str, input_nwmflows_fileName: str, input_nwm_headwaters_fileName: str +): """ - Tool to check the accuracy of crosswalked attributes + Compares the upstream and downstream connectivity of each stream segment Parameters ---------- @@ -133,12 +163,9 @@ def _verify_crosswalk_network(input_flows_fileName, input_nwmflows_fileName, inp Returns ------- - results : pandas.DataFrame - - Usage - ----- - python verify_crosswalk.py -a -b -d -c + pandas.DataFrame """ + # Check for crosswalks between NWM and DEM-derived flowlines # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose) @@ -246,11 +273,19 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, if __name__ == '__main__': parser = argparse.ArgumentParser(description='Tool to check crosswalk accuracy') - parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True) - parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True) - parser.add_argument('-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', required=True) - parser.add_argument('-c', '--output-table-fileName', help='Output table filename', required=True) + parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', type=str, required=True) + parser.add_argument( + '-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', type=str, required=True + ) + parser.add_argument( + '-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', type=str, required=True + ) + parser.add_argument( + '-c', '--output-table-fileName', help='Output table filename', type=str, required=True + ) + parser.add_argument('-u', '--huc', help='HUC ID', type=str, required=True) + parser.add_argument('-z', '--branch', help='Branch ID', type=str, required=True) args = vars(parser.parse_args()) - verify_crosswalk(**args) + evaluate_crosswalk(**args) From acb4ef50f727e3c416de1ad303bed3dd87d65d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= Date: Fri, 5 Jan 2024 16:44:06 -0500 Subject: [PATCH 17/18] Update CHANGELOG.md --- docs/CHANGELOG.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7fb3bd956..26d8bd14d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,28 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.4.x.x - 2024-01-05 - [PR#1061](https://github.com/NOAA-OWP/inundation-mapping/pull/1061) + +Adds a post-processing tool to compare crosswalked (conflated) `feature_id`s between NWM stream network to DEM-derived reaches. The tool is run if the `-x` flag is added to `fim_pipeline.sh`. Results are computed for branch 0 and saved in a summary file in the HUC output folder. + +### Additions + +- `tools/evaluate_crosswalk.py`: evaluates crosswalk accuracy using two methods: + - intersections: the number of intersections between streamlines + - network (or tree): compares the feature_ids of the immediate upstream segments + +### Changes + +- `Dockerfile`: added `toolsDir` environment variable +- `fim_pipeline.sh`: added `-x` flag to run crosswalk evaluation tool +- `fim_post_processing.sh`: changed hardcoded `/foss_fim/tools` to `toolsDir` environment variable +- `fim_pre_processing.sh`: added `evaluateCrosswalk` environment variable +- `src/` + - `add_crosswalk.py`: fix bug + - `delineate_hydros_and_produce_HAND.sh`: added a call to `verify_crosswalk.py` if evaluateCrosswalk is True. + +

+ ## v4.4.8.3 - 2024-01-05 - [PR#1059](https://github.com/NOAA-OWP/inundation-mapping/pull/1059) Fixes erroneous branch inundation in levee-protected areas. From 4ee8ef3b0f96381497f03dd7c596dfa951b350bb Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:42:39 -0700 Subject: [PATCH 18/18] Update CHANGELOG.md --- docs/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 26d8bd14d..e514eb23c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,7 +1,7 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. -## v4.4.x.x - 2024-01-05 - [PR#1061](https://github.com/NOAA-OWP/inundation-mapping/pull/1061) +## v4.4.8.4 - 2024-01-12 - [PR#1061](https://github.com/NOAA-OWP/inundation-mapping/pull/1061) Adds a post-processing tool to compare crosswalked (conflated) `feature_id`s between NWM stream network to DEM-derived reaches. The tool is run if the `-x` flag is added to `fim_pipeline.sh`. Results are computed for branch 0 and saved in a summary file in the HUC output folder.