From 7b693a87bf82afdf0e6aaeb9b2e696220670f499 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Thu, 21 Sep 2023 07:13:44 -0400
Subject: [PATCH 01/18] Copy add_and_verify_crosswalk.py from
 dev-address-crosswalk-issues

---
 src/add_and_verify_crosswalk.py | 245 ++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 src/add_and_verify_crosswalk.py

diff --git a/src/add_and_verify_crosswalk.py b/src/add_and_verify_crosswalk.py
new file mode 100644
index 000000000..5d74bdde9
--- /dev/null
+++ b/src/add_and_verify_crosswalk.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import geopandas as gpd
+from rasterstats import zonal_stats
+import argparse
+import sys
+from utils.shared_functions import mem_profile, getDriver
+
+# Feb 17, 2023
+# We want to explore using FR methodology as branch zero
+
+
+@mem_profile
+def add_and_verify_crosswalk(
+    input_catchments_fileName,
+    input_flows_fileName,
+    input_huc_fileName,
+    input_nwmflows_fileName,
+    input_nwmcatras_fileName,
+    input_nwmcat_fileName,
+    crosswalk_fileName,
+    output_catchments_fileName,
+    output_flows_fileName,
+    extent,
+    min_catchment_area,
+    min_stream_length,
+):
+    input_catchments = gpd.read_file(input_catchments_fileName)
+    input_flows = gpd.read_file(input_flows_fileName)
+    input_huc = gpd.read_file(input_huc_fileName)
+    input_nwmflows = gpd.read_file(input_nwmflows_fileName)
+    min_catchment_area = float(min_catchment_area)  # 0.25#
+    min_stream_length = float(min_stream_length)  # 0.5#
+
+    if extent == 'FR':
+        ## crosswalk using majority catchment method
+
+        # calculate majority catchments
+        majority_calc = zonal_stats(
+            input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True
+        )
+        input_majorities = gpd.GeoDataFrame.from_features(majority_calc)
+        input_majorities = input_majorities.rename(columns={'majority': 'feature_id'})
+
+        input_majorities = input_majorities[:][input_majorities['feature_id'].notna()]
+        if input_majorities.feature_id.dtype != 'int':
+            input_majorities.feature_id = input_majorities.feature_id.astype(int)
+        if input_majorities.HydroID.dtype != 'int':
+            input_majorities.HydroID = input_majorities.HydroID.astype(int)
+
+        if len(input_majorities) < 1:
+            print('No relevant streams within HUC boundaries.')
+            sys.exit(0)
+        else:
+            input_majorities.to_file(crosswalk_fileName, index=False)
+
+        input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
+        if input_nwmflows.feature_id.dtype != 'int':
+            input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
+        relevant_input_nwmflows = input_nwmflows[
+            input_nwmflows['feature_id'].isin(input_majorities['feature_id'])
+        ]
+        relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_'])
+
+        if input_catchments.HydroID.dtype != 'int':
+            input_catchments.HydroID = input_catchments.HydroID.astype(int)
+        output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
+        output_catchments = output_catchments.merge(
+            relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id'
+        )
+
+        if input_flows.HydroID.dtype != 'int':
+            input_flows.HydroID = input_flows.HydroID.astype(int)
+        output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
+        if output_flows.HydroID.dtype != 'int':
+            output_flows.HydroID = output_flows.HydroID.astype(int)
+        output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id')
+        output_flows = output_flows.merge(
+            output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
+        )
+
+    elif (extent == 'MS') | (extent == 'GMS'):
+        ## crosswalk using stream segment midpoint method
+        input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc)
+
+        # only reduce nwm catchments to mainstems if running mainstems
+        if extent == 'MS':
+            input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]
+
+        input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
+        if input_nwmcat.feature_id.dtype != 'int':
+            input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
+        input_nwmcat = input_nwmcat.set_index('feature_id')
+
+        input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
+        if input_nwmflows.feature_id.dtype != 'int':
+            input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
+
+        # Get stream midpoint
+        stream_midpoint = []
+        hydroID = []
+        for i, lineString in enumerate(input_flows.geometry):
+            hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
+            stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]
+
+        input_flows_midpoint = gpd.GeoDataFrame(
+            {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
+        )
+        input_flows_midpoint = input_flows_midpoint.set_index('HydroID')
+
+        # Create crosswalk
+        crosswalk = gpd.sjoin(
+            input_flows_midpoint, input_nwmcat, how='left', predicate='within'
+        ).reset_index()
+        crosswalk = crosswalk.rename(columns={'index_right': 'feature_id'})
+
+        # fill in missing ms
+        crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()]
+        for index, stream in crosswalk_missing.iterrows():
+            # find closest nwm catchment by distance
+            distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry]
+            min_dist = min(distances)
+            nwmcat_index = distances.index(min_dist)
+
+            # update crosswalk
+            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[
+                nwmcat_index
+            ].name
+            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[
+                nwmcat_index
+            ].AreaSqKM
+            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[
+                nwmcat_index
+            ].Shape_Length
+            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[
+                nwmcat_index
+            ].Shape_Area
+
+        crosswalk = crosswalk.filter(items=['HydroID', 'feature_id'])
+        crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id')
+
+        if len(crosswalk) < 1:
+            print('No relevant streams within HUC boundaries.')
+            sys.exit(0)
+        else:
+            crosswalk.to_csv(crosswalk_fileName, index=False)
+
+        if input_catchments.HydroID.dtype != 'int':
+            input_catchments.HydroID = input_catchments.HydroID.astype(int)
+        output_catchments = input_catchments.merge(crosswalk, on='HydroID')
+
+        if input_flows.HydroID.dtype != 'int':
+            input_flows.HydroID = input_flows.HydroID.astype(int)
+        output_flows = input_flows.merge(crosswalk, on='HydroID')
+
+        # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
+        if 'areasqkm' not in output_catchments.columns:
+            output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
+
+        output_flows = output_flows.merge(
+            output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
+        )
+
+        # write out
+        output_catchments.to_file(
+            output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False
+        )
+
+    if output_flows.NextDownID.dtype != 'int':
+        output_flows.NextDownID = output_flows.NextDownID.astype(int)
+
+    output_flows = verify_crosswalk(output_flows, input_nwmflows)
+
+    output_flows.to_file(output_flows_fileName, index=False)
+
+
+def verify_crosswalk(flows, nwm_streams):
+    # Crosswalk check
+    # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
+
+    # Compute the number of intersections between the NWM and DEM-derived flowlines
+    streams = nwm_streams
+    n_intersects = 0
+    xwalks = []
+    intersects = flows.sjoin(streams)
+    for idx in intersects.index:
+        if type(intersects.loc[idx, 'HydroID']) == np.int64:
+            flows_idx = intersects.loc[idx, 'HydroID']
+        else:
+            flows_idx = int(intersects.loc[idx, 'HydroID'].unique())
+
+        if type(intersects.loc[idx, 'feature_id_right']) == np.int64:
+            streams_idxs = [intersects.loc[idx, 'feature_id_right']]
+        else:
+            streams_idxs = intersects.loc[idx, 'feature_id_right'].unique()
+
+        for streams_idx in streams_idxs:
+            intersect = gpd.overlay(
+                flows[flows['HydroID'] == flows_idx],
+                nwm_streams[nwm_streams['feature_id'] == streams_idx],
+                keep_geom_type=False,
+            )
+
+            if intersect.geometry[0].geom_type == 'Point':
+                intersect_points = 1
+            else:
+                intersect_points = len(intersect.geometry[0].geoms)
+
+            if intersect_points > n_intersects:
+                n_intersects = intersect_points
+
+            if n_intersects > 0:
+                xwalks.append((flows_idx, streams_idx, intersect_points))
+
+                print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}')
+
+    # Get the maximum number of intersections for each flowline
+    xwalks = np.array(xwalks)
+
+    return flows
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves'
+    )
+    parser.add_argument('-d', '--input-catchments-fileName', help='DEM derived catchments', required=True)
+    parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True)
+    parser.add_argument('-w', '--input-huc-fileName', help='HUC8 boundary', required=True)
+    parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True)
+    parser.add_argument('-y', '--input-nwmcatras-fileName', help='NWM catchment raster', required=False)
+    parser.add_argument('-z', '--input-nwmcat-fileName', help='NWM catchment polygon', required=True)
+    parser.add_argument('-c', '--crosswalk-fileName', help='Crosswalk table filename', required=True)
+    parser.add_argument(
+        '-l', '--output-catchments-fileName', help='Subset crosswalked catchments', required=True
+    )
+    parser.add_argument('-p', '--extent', help='GMS only for now', default='GMS', required=False)
+    parser.add_argument('-e', '--min-catchment-area', help='Minimum catchment area', required=True)
+    parser.add_argument('-g', '--min-stream-length', help='Minimum stream length', required=True)
+    parser.add_argument('-f', '--output-flows-fileName', help='Subset crosswalked streams', required=True)
+
+    args = vars(parser.parse_args())
+
+    add_and_verify_crosswalk(**args)

From 2c8588b212a5f1a4b421ca31bd2b40d3a9e5d079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Fri, 22 Sep 2023 13:41:48 -0400
Subject: [PATCH 02/18] Compute comparisons

---
 src/add_and_verify_crosswalk.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/add_and_verify_crosswalk.py b/src/add_and_verify_crosswalk.py
index 5d74bdde9..1dd5f4500 100644
--- a/src/add_and_verify_crosswalk.py
+++ b/src/add_and_verify_crosswalk.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import numpy as np
+import pandas as pd
 import geopandas as gpd
 from rasterstats import zonal_stats
 import argparse
@@ -181,7 +182,6 @@ def verify_crosswalk(flows, nwm_streams):
 
     # Compute the number of intersections between the NWM and DEM-derived flowlines
     streams = nwm_streams
-    n_intersects = 0
     xwalks = []
     intersects = flows.sjoin(streams)
     for idx in intersects.index:
@@ -207,16 +207,28 @@ def verify_crosswalk(flows, nwm_streams):
             else:
                 intersect_points = len(intersect.geometry[0].geoms)
 
-            if intersect_points > n_intersects:
-                n_intersects = intersect_points
-
-            if n_intersects > 0:
-                xwalks.append((flows_idx, streams_idx, intersect_points))
+            xwalks.append(
+                [
+                    flows_idx,
+                    int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]),
+                    streams_idx,
+                    intersect_points,
+                ]
+            )
 
-                print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}')
+            print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}')
 
     # Get the maximum number of intersections for each flowline
-    xwalks = np.array(xwalks)
+    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points'])
+    xwalks = xwalks.drop_duplicates()
+    xwalks['match'] = xwalks[1] == xwalks[2]
+
+    xwalks_groupby = xwalks[[0, 3]].groupby(0).max()
+
+    xwalks = xwalks.merge(xwalks_groupby, on=0)
+    xwalks['max'] = xwalks['3_x'] == xwalks['3_y']
+
+    xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False)
 
     return flows
 

From a0f30f770baa8f891fd1f6123b7391d62fc4cd95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Thu, 12 Oct 2023 15:58:00 -0400
Subject: [PATCH 03/18] isort tools/verify_crosswalk.py

---
 src/add_and_verify_crosswalk.py | 257 --------------------------------
 tools/verify_crosswalk.py       |  96 ++++++++++++
 2 files changed, 96 insertions(+), 257 deletions(-)
 delete mode 100644 src/add_and_verify_crosswalk.py
 create mode 100644 tools/verify_crosswalk.py

diff --git a/src/add_and_verify_crosswalk.py b/src/add_and_verify_crosswalk.py
deleted file mode 100644
index 1dd5f4500..000000000
--- a/src/add_and_verify_crosswalk.py
+++ /dev/null
@@ -1,257 +0,0 @@
-#!/usr/bin/env python3
-
-import numpy as np
-import pandas as pd
-import geopandas as gpd
-from rasterstats import zonal_stats
-import argparse
-import sys
-from utils.shared_functions import mem_profile, getDriver
-
-# Feb 17, 2023
-# We want to explore using FR methodology as branch zero
-
-
-@mem_profile
-def add_and_verify_crosswalk(
-    input_catchments_fileName,
-    input_flows_fileName,
-    input_huc_fileName,
-    input_nwmflows_fileName,
-    input_nwmcatras_fileName,
-    input_nwmcat_fileName,
-    crosswalk_fileName,
-    output_catchments_fileName,
-    output_flows_fileName,
-    extent,
-    min_catchment_area,
-    min_stream_length,
-):
-    input_catchments = gpd.read_file(input_catchments_fileName)
-    input_flows = gpd.read_file(input_flows_fileName)
-    input_huc = gpd.read_file(input_huc_fileName)
-    input_nwmflows = gpd.read_file(input_nwmflows_fileName)
-    min_catchment_area = float(min_catchment_area)  # 0.25#
-    min_stream_length = float(min_stream_length)  # 0.5#
-
-    if extent == 'FR':
-        ## crosswalk using majority catchment method
-
-        # calculate majority catchments
-        majority_calc = zonal_stats(
-            input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True
-        )
-        input_majorities = gpd.GeoDataFrame.from_features(majority_calc)
-        input_majorities = input_majorities.rename(columns={'majority': 'feature_id'})
-
-        input_majorities = input_majorities[:][input_majorities['feature_id'].notna()]
-        if input_majorities.feature_id.dtype != 'int':
-            input_majorities.feature_id = input_majorities.feature_id.astype(int)
-        if input_majorities.HydroID.dtype != 'int':
-            input_majorities.HydroID = input_majorities.HydroID.astype(int)
-
-        if len(input_majorities) < 1:
-            print('No relevant streams within HUC boundaries.')
-            sys.exit(0)
-        else:
-            input_majorities.to_file(crosswalk_fileName, index=False)
-
-        input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
-        if input_nwmflows.feature_id.dtype != 'int':
-            input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
-        relevant_input_nwmflows = input_nwmflows[
-            input_nwmflows['feature_id'].isin(input_majorities['feature_id'])
-        ]
-        relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_'])
-
-        if input_catchments.HydroID.dtype != 'int':
-            input_catchments.HydroID = input_catchments.HydroID.astype(int)
-        output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
-        output_catchments = output_catchments.merge(
-            relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id'
-        )
-
-        if input_flows.HydroID.dtype != 'int':
-            input_flows.HydroID = input_flows.HydroID.astype(int)
-        output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
-        if output_flows.HydroID.dtype != 'int':
-            output_flows.HydroID = output_flows.HydroID.astype(int)
-        output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id')
-        output_flows = output_flows.merge(
-            output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
-        )
-
-    elif (extent == 'MS') | (extent == 'GMS'):
-        ## crosswalk using stream segment midpoint method
-        input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc)
-
-        # only reduce nwm catchments to mainstems if running mainstems
-        if extent == 'MS':
-            input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]
-
-        input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
-        if input_nwmcat.feature_id.dtype != 'int':
-            input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
-        input_nwmcat = input_nwmcat.set_index('feature_id')
-
-        input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
-        if input_nwmflows.feature_id.dtype != 'int':
-            input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
-
-        # Get stream midpoint
-        stream_midpoint = []
-        hydroID = []
-        for i, lineString in enumerate(input_flows.geometry):
-            hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
-            stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]
-
-        input_flows_midpoint = gpd.GeoDataFrame(
-            {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
-        )
-        input_flows_midpoint = input_flows_midpoint.set_index('HydroID')
-
-        # Create crosswalk
-        crosswalk = gpd.sjoin(
-            input_flows_midpoint, input_nwmcat, how='left', predicate='within'
-        ).reset_index()
-        crosswalk = crosswalk.rename(columns={'index_right': 'feature_id'})
-
-        # fill in missing ms
-        crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()]
-        for index, stream in crosswalk_missing.iterrows():
-            # find closest nwm catchment by distance
-            distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry]
-            min_dist = min(distances)
-            nwmcat_index = distances.index(min_dist)
-
-            # update crosswalk
-            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[
-                nwmcat_index
-            ].name
-            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[
-                nwmcat_index
-            ].AreaSqKM
-            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[
-                nwmcat_index
-            ].Shape_Length
-            crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[
-                nwmcat_index
-            ].Shape_Area
-
-        crosswalk = crosswalk.filter(items=['HydroID', 'feature_id'])
-        crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id')
-
-        if len(crosswalk) < 1:
-            print('No relevant streams within HUC boundaries.')
-            sys.exit(0)
-        else:
-            crosswalk.to_csv(crosswalk_fileName, index=False)
-
-        if input_catchments.HydroID.dtype != 'int':
-            input_catchments.HydroID = input_catchments.HydroID.astype(int)
-        output_catchments = input_catchments.merge(crosswalk, on='HydroID')
-
-        if input_flows.HydroID.dtype != 'int':
-            input_flows.HydroID = input_flows.HydroID.astype(int)
-        output_flows = input_flows.merge(crosswalk, on='HydroID')
-
-        # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
-        if 'areasqkm' not in output_catchments.columns:
-            output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
-
-        output_flows = output_flows.merge(
-            output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
-        )
-
-        # write out
-        output_catchments.to_file(
-            output_catchments_fileName, driver=getDriver(output_catchments_fileName), index=False
-        )
-
-    if output_flows.NextDownID.dtype != 'int':
-        output_flows.NextDownID = output_flows.NextDownID.astype(int)
-
-    output_flows = verify_crosswalk(output_flows, input_nwmflows)
-
-    output_flows.to_file(output_flows_fileName, index=False)
-
-
-def verify_crosswalk(flows, nwm_streams):
-    # Crosswalk check
-    # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
-
-    # Compute the number of intersections between the NWM and DEM-derived flowlines
-    streams = nwm_streams
-    xwalks = []
-    intersects = flows.sjoin(streams)
-    for idx in intersects.index:
-        if type(intersects.loc[idx, 'HydroID']) == np.int64:
-            flows_idx = intersects.loc[idx, 'HydroID']
-        else:
-            flows_idx = int(intersects.loc[idx, 'HydroID'].unique())
-
-        if type(intersects.loc[idx, 'feature_id_right']) == np.int64:
-            streams_idxs = [intersects.loc[idx, 'feature_id_right']]
-        else:
-            streams_idxs = intersects.loc[idx, 'feature_id_right'].unique()
-
-        for streams_idx in streams_idxs:
-            intersect = gpd.overlay(
-                flows[flows['HydroID'] == flows_idx],
-                nwm_streams[nwm_streams['feature_id'] == streams_idx],
-                keep_geom_type=False,
-            )
-
-            if intersect.geometry[0].geom_type == 'Point':
-                intersect_points = 1
-            else:
-                intersect_points = len(intersect.geometry[0].geoms)
-
-            xwalks.append(
-                [
-                    flows_idx,
-                    int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]),
-                    streams_idx,
-                    intersect_points,
-                ]
-            )
-
-            print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}')
-
-    # Get the maximum number of intersections for each flowline
-    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points'])
-    xwalks = xwalks.drop_duplicates()
-    xwalks['match'] = xwalks[1] == xwalks[2]
-
-    xwalks_groupby = xwalks[[0, 3]].groupby(0).max()
-
-    xwalks = xwalks.merge(xwalks_groupby, on=0)
-    xwalks['max'] = xwalks['3_x'] == xwalks['3_y']
-
-    xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False)
-
-    return flows
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves'
-    )
-    parser.add_argument('-d', '--input-catchments-fileName', help='DEM derived catchments', required=True)
-    parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True)
-    parser.add_argument('-w', '--input-huc-fileName', help='HUC8 boundary', required=True)
-    parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True)
-    parser.add_argument('-y', '--input-nwmcatras-fileName', help='NWM catchment raster', required=False)
-    parser.add_argument('-z', '--input-nwmcat-fileName', help='NWM catchment polygon', required=True)
-    parser.add_argument('-c', '--crosswalk-fileName', help='Crosswalk table filename', required=True)
-    parser.add_argument(
-        '-l', '--output-catchments-fileName', help='Subset crosswalked catchments', required=True
-    )
-    parser.add_argument('-p', '--extent', help='GMS only for now', default='GMS', required=False)
-    parser.add_argument('-e', '--min-catchment-area', help='Minimum catchment area', required=True)
-    parser.add_argument('-g', '--min-stream-length', help='Minimum stream length', required=True)
-    parser.add_argument('-f', '--output-flows-fileName', help='Subset crosswalked streams', required=True)
-
-    args = vars(parser.parse_args())
-
-    add_and_verify_crosswalk(**args)
diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
new file mode 100644
index 000000000..e42b4a241
--- /dev/null
+++ b/tools/verify_crosswalk.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+
+import argparse
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+
+
+def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table_fileName):
+    """
+    Verify the crosswalk between the NWM and DEM-derived flowlines.
+
+    Parameters
+    ----------
+    input_flows_fileName : str
+        DEM-derived flowlines
+    input_nwmflows_fileName : str
+        NWM flowlines
+    crosswalk_fileName : str
+        Crosswalk table filename
+    """
+
+    nwm_streams = gpd.read_file(input_nwmflows_fileName)
+    flows = gpd.read_file(input_flows_fileName)
+
+    # Crosswalk check
+    # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
+
+    # Compute the number of intersections between the NWM and DEM-derived flowlines
+    streams = nwm_streams
+    xwalks = []
+    intersects = flows.sjoin(streams)
+    for idx in intersects.index:
+        if type(intersects.loc[idx, 'HydroID']) == np.int64:
+            flows_idx = intersects.loc[idx, 'HydroID']
+        else:
+            flows_idx = int(intersects.loc[idx, 'HydroID'].unique())
+
+        if type(intersects.loc[idx, 'feature_id_right']) == np.int64:
+            streams_idxs = [intersects.loc[idx, 'feature_id_right']]
+        else:
+            streams_idxs = intersects.loc[idx, 'feature_id_right'].unique()
+
+        for streams_idx in streams_idxs:
+            intersect = gpd.overlay(
+                flows[flows['HydroID'] == flows_idx],
+                nwm_streams[nwm_streams['feature_id'] == streams_idx],
+                keep_geom_type=False,
+            )
+
+            if intersect.geometry[0].geom_type == 'Point':
+                intersect_points = 1
+            else:
+                intersect_points = len(intersect.geometry[0].geoms)
+
+            xwalks.append(
+                [
+                    flows_idx,
+                    int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]),
+                    streams_idx,
+                    intersect_points,
+                ]
+            )
+
+            print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}')
+
+    # Get the maximum number of intersections for each flowline
+    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points'])
+    xwalks = xwalks.drop_duplicates()
+    xwalks['match'] = xwalks[1] == xwalks[2]
+
+    xwalks_groupby = xwalks[[0, 3]].groupby(0).max()
+
+    xwalks = xwalks.merge(xwalks_groupby, on=0)
+    xwalks['max'] = xwalks['3_x'] == xwalks['3_y']
+
+    xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False)
+
+    # Save the crosswalk table
+    xwalks.to_csv(output_table_fileName, index=False)
+
+    return xwalks
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves'
+    )
+    parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True)
+    parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True)
+    parser.add_argument('-c', '--output-table-fileName', help='Output table filename', required=True)
+
+    args = vars(parser.parse_args())
+
+    verify_crosswalk(**args)

From e7797b13188d8c9eaa83d92572832b547e5e7e05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Thu, 12 Oct 2023 16:51:46 -0400
Subject: [PATCH 04/18] Debug verify_crosswalk

---
 tools/verify_crosswalk.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index e42b4a241..d1212c470 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -31,16 +31,13 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table
     streams = nwm_streams
     xwalks = []
     intersects = flows.sjoin(streams)
+    intersects['HydroID'] = intersects['HydroID'].astype(int)
+    intersects['feature_id_right'] = intersects['feature_id_right'].astype(int)
+
     for idx in intersects.index:
-        if type(intersects.loc[idx, 'HydroID']) == np.int64:
-            flows_idx = intersects.loc[idx, 'HydroID']
-        else:
-            flows_idx = int(intersects.loc[idx, 'HydroID'].unique())
-
-        if type(intersects.loc[idx, 'feature_id_right']) == np.int64:
-            streams_idxs = [intersects.loc[idx, 'feature_id_right']]
-        else:
-            streams_idxs = intersects.loc[idx, 'feature_id_right'].unique()
+        flows_idx = int(intersects.loc[idx, 'HydroID'].unique())
+
+        streams_idxs = intersects.loc[idx, 'feature_id_right'].unique()
 
         for streams_idx in streams_idxs:
             intersect = gpd.overlay(

From b8d8ba5bac72d01ab20f8f1b48d505d982ea1690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Thu, 12 Oct 2023 17:34:31 -0400
Subject: [PATCH 05/18] Minor edits

---
 tools/verify_crosswalk.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index d1212c470..94d295c68 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -32,17 +32,16 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table
     xwalks = []
     intersects = flows.sjoin(streams)
     intersects['HydroID'] = intersects['HydroID'].astype(int)
-    intersects['feature_id_right'] = intersects['feature_id_right'].astype(int)
 
     for idx in intersects.index:
-        flows_idx = int(intersects.loc[idx, 'HydroID'].unique())
+        flows_idx = intersects.loc[idx, 'HydroID'].unique()
 
-        streams_idxs = intersects.loc[idx, 'feature_id_right'].unique()
+        streams_idxs = intersects.loc[idx, 'feature_id'].unique()
 
         for streams_idx in streams_idxs:
             intersect = gpd.overlay(
                 flows[flows['HydroID'] == flows_idx],
-                nwm_streams[nwm_streams['feature_id'] == streams_idx],
+                nwm_streams[nwm_streams['ID'] == streams_idx],
                 keep_geom_type=False,
             )
 

From 0ab05adf3e972291c9e882e92d016a8c55c924b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Mon, 16 Oct 2023 16:00:40 -0400
Subject: [PATCH 06/18] Count segment intersections

---
 tools/verify_crosswalk.py | 92 +++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 52 deletions(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index 94d295c68..54a27957f 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -8,75 +8,63 @@
 
 
 def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table_fileName):
-    """
-    Verify the crosswalk between the NWM and DEM-derived flowlines.
-
-    Parameters
-    ----------
-    input_flows_fileName : str
-        DEM-derived flowlines
-    input_nwmflows_fileName : str
-        NWM flowlines
-    crosswalk_fileName : str
-        Crosswalk table filename
-    """
-
-    nwm_streams = gpd.read_file(input_nwmflows_fileName)
-    flows = gpd.read_file(input_flows_fileName)
-
     # Crosswalk check
     # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
 
+    flows = gpd.read_file(input_flows_fileName)
+    nwm_streams = gpd.read_file(input_nwmflows_fileName)
+
     # Compute the number of intersections between the NWM and DEM-derived flowlines
     streams = nwm_streams
     xwalks = []
     intersects = flows.sjoin(streams)
-    intersects['HydroID'] = intersects['HydroID'].astype(int)
 
     for idx in intersects.index:
-        flows_idx = intersects.loc[idx, 'HydroID'].unique()
-
-        streams_idxs = intersects.loc[idx, 'feature_id'].unique()
-
-        for streams_idx in streams_idxs:
-            intersect = gpd.overlay(
-                flows[flows['HydroID'] == flows_idx],
-                nwm_streams[nwm_streams['ID'] == streams_idx],
-                keep_geom_type=False,
-            )
-
-            if intersect.geometry[0].geom_type == 'Point':
-                intersect_points = 1
-            else:
-                intersect_points = len(intersect.geometry[0].geoms)
-
-            xwalks.append(
-                [
-                    flows_idx,
-                    int(flows.loc[flows['HydroID'] == flows_idx, 'feature_id'].iloc[0]),
-                    streams_idx,
-                    intersect_points,
-                ]
-            )
-
-            print(f'Found {intersect_points} intersections for {flows_idx} and {streams_idx}')
+        flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique()
+
+        if type(intersects.loc[idx, 'ID']) == np.int64:
+            streams_idxs = [intersects.loc[idx, 'ID']]
+        else:
+            streams_idxs = intersects.loc[idx, 'ID'].unique()
+
+        for flows_id in flows_idx:
+            for streams_idx in streams_idxs:
+                intersect = gpd.overlay(
+                    flows[flows['HydroID'] == flows_id],
+                    nwm_streams[nwm_streams['ID'] == streams_idx],
+                    keep_geom_type=False,
+                )
+
+                if len(intersect) == 0:
+                    intersect_points = 0
+                    feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id']
+                elif intersect.geometry[0].geom_type == 'Point':
+                    intersect_points = 1
+                    feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id']
+                else:
+                    intersect_points = len(intersect.geometry[0].geoms)
+                    feature_id = int(flows.loc[flows['HydroID'] == flows_id, 'feature_id'].iloc[0])
+
+                xwalks.append([flows_id, feature_id, streams_idx, intersect_points])
+
+                print(f'Found {intersect_points} intersections for {flows_id} and {streams_idx}')
 
     # Get the maximum number of intersections for each flowline
-    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'feature_id_right', 'intersect_points'])
-    xwalks = xwalks.drop_duplicates()
-    xwalks['match'] = xwalks[1] == xwalks[2]
+    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'ID', 'intersect_points'])
+    xwalks['feature_id'] = xwalks['feature_id'].astype(int)
+
+    xwalks['match'] = xwalks['feature_id'] == xwalks['ID']
 
-    xwalks_groupby = xwalks[[0, 3]].groupby(0).max()
+    xwalks_groupby = xwalks[['HydroID', 'intersect_points']].groupby('HydroID').max()
 
-    xwalks = xwalks.merge(xwalks_groupby, on=0)
-    xwalks['max'] = xwalks['3_x'] == xwalks['3_y']
+    xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left')
+    xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y']
 
-    xwalks['crosswalk'] = np.where(xwalks['match'] == xwalks['max'], True, False)
+    xwalks['crosswalk'] = xwalks['match'] == xwalks['max']
 
-    # Save the crosswalk table
     xwalks.to_csv(output_table_fileName, index=False)
 
-    return xwalks
+    return flows
 
 
 if __name__ == '__main__':

From d8ac9b0cb75b9aca5984ee9b979ee8afeada618c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Mon, 16 Oct 2023 17:38:28 -0400
Subject: [PATCH 07/18] Minor edits

---
 tools/verify_crosswalk.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index 54a27957f..5fdec6855 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -60,7 +60,11 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table
     xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left')
     xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y']
 
-    xwalks['crosswalk'] = xwalks['match'] == xwalks['max']
+    xwalks['crosswalk'] = (xwalks['match'] is True) & (xwalks['max'] is True)
+
+    xwalks = xwalks.drop_duplicates()
+
+    xwalks = xwalks.sort_values(by=['HydroID', 'intersect_points_x', 'ID'], ascending=False)
 
     xwalks.to_csv(output_table_fileName, index=False)
 

From e171c8f738f42e0ee3857e654bc1d0b61386dead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Tue, 17 Oct 2023 20:53:26 -0400
Subject: [PATCH 08/18] isort

---
 src/map_headwaters_to_reaches.py | 36 ++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 src/map_headwaters_to_reaches.py

diff --git a/src/map_headwaters_to_reaches.py b/src/map_headwaters_to_reaches.py
new file mode 100644
index 000000000..3a4b1cf6b
--- /dev/null
+++ b/src/map_headwaters_to_reaches.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+import os
+
+import geopandas as gpd
+
+
+fim_dir = '/outputs/dev-4.4.2.3/18100201'
+
+headwaters = gpd.read_file(os.path.join(fim_dir, 'nwm_headwater_points_subset.gpkg'))
+reaches = gpd.read_file(os.path.join(fim_dir, 'branches', '0', 'demDerived_reaches_0.shp'))
+
+# Snap nearest reaches where magnitude is 1 to headwaters
+reaches_mag_1 = reaches[reaches['Magnitude'] == 1]
+snapped_reaches = gpd.sjoin_nearest(reaches_mag_1, headwaters, how='left', distance_col='distance')
+reaches_snapped = reaches.merge(snapped_reaches[['LINKNO', 'ID', 'distance']], on='LINKNO', how='left')
+
+
+def sum_upstream(row):
+    """Recursive function to sum up all upstream reaches."""
+
+    sum = 0
+    if row['USLINKNO1'] != -1:
+        sum += sum_upstream(row['USLINKNO1'])
+    else:
+        sum += row['ID']
+
+    if row['USLINKNO2'] != -1:
+        sum += sum_upstream(row['USLINKNO2'])
+    else:
+        sum += row['ID']
+
+
+# Apply stream algorithm with headwaters IDs
+for i, row in reaches_snapped[reaches_snapped['DSLINKNO'] == -1].iterrows():
+    sum = sum_upstream(row)

From 2964b971ca8005f67614a947f98993eeb31b2b8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Thu, 26 Oct 2023 14:33:31 -0400
Subject: [PATCH 09/18] Reformat

---
 tools/verify_crosswalk.py | 147 ++++++++++++++++++++++++--------------
 1 file changed, 95 insertions(+), 52 deletions(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index 5fdec6855..1b0135d6e 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -7,68 +7,110 @@
 import pandas as pd
 
 
-def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table_fileName):
-    # Crosswalk check
+def verify_crosswalk(
+    input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName, output_table_fileName
+):
+    # Check for crosswalks between NWM and DEM-derived flowlines
     # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
 
     flows = gpd.read_file(input_flows_fileName)
+    flows['HydroID'] = flows['HydroID'].astype(int)
     nwm_streams = gpd.read_file(input_nwmflows_fileName)
-
-    # Compute the number of intersections between the NWM and DEM-derived flowlines
-    streams = nwm_streams
-    xwalks = []
-    intersects = flows.sjoin(streams)
-
-    for idx in intersects.index:
-        flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique()
-
-        if type(intersects.loc[idx, 'ID']) == np.int64:
-            streams_idxs = [intersects.loc[idx, 'ID']]
-        else:
-            streams_idxs = intersects.loc[idx, 'ID'].unique()
-
-        for flows_id in flows_idx:
-            for streams_idx in streams_idxs:
-                intersect = gpd.overlay(
-                    flows[flows['HydroID'] == flows_id],
-                    nwm_streams[nwm_streams['ID'] == streams_idx],
-                    keep_geom_type=False,
+    nwm_streams = nwm_streams.rename(columns={'ID': 'feature_id'})
+    nwm_headwaters = gpd.read_file(input_nwm_headwaters_fileName)
+
+    streams_outlets = nwm_streams.loc[~nwm_streams.to.isin(nwm_streams.feature_id), 'feature_id']
+    flows_outlets = flows.loc[~flows['NextDownID'].isin(flows['HydroID']), 'HydroID']
+
+    nwm_streams_headwaters_list = ~nwm_streams['feature_id'].isin(nwm_streams['to'])
+    # flows_headwaters_list = ~flows['LINKNO'].isin(flows['DSLINKNO'])
+    flows_headwaters_list = ~flows['HydroID'].isin(flows['NextDownID'])
+
+    nwm_streams_headwaters = nwm_streams[nwm_streams_headwaters_list]
+    flows_headwaters = flows[flows_headwaters_list]
+
+    # Map headwater points to DEM-derived reaches
+    flows_headwaters = flows_headwaters.sjoin_nearest(nwm_headwaters)
+    flows_headwaters = flows_headwaters[['HydroID', 'ID']]
+    nwm_streams_headwaters = nwm_streams_headwaters.sjoin_nearest(nwm_headwaters)
+    nwm_streams_headwaters = nwm_streams_headwaters[['feature_id', 'ID']]
+
+    def _hydroid_to_feature_id(df, hydroid, hydroid_attr, feature_id_attr):
+        return df.loc[df[hydroid_attr] == hydroid, feature_id_attr]
+
+    def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr, nextdownid_attr):
+        # Find upstream segments
+        data_dict[hydroid] = list(data.loc[data[nextdownid_attr] == hydroid, hydroid_attr].values)
+
+        for hydroid in data_dict[hydroid]:
+            if hydroid in data_headwaters[hydroid_attr].values:
+                data_dict[hydroid] = data_headwaters.loc[
+                    data_headwaters[hydroid_attr] == hydroid, 'ID'
+                ].values[0]
+            else:
+                data_dict = _get_upstream_data(
+                    data, data_headwaters, data_dict, hydroid, hydroid_attr, nextdownid_attr
                 )
 
-                if len(intersect) == 0:
-                    intersect_points = 0
-                    feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id']
-                elif intersect.geometry[0].geom_type == 'Point':
-                    intersect_points = 1
-                    feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id']
+        return data_dict
+
+    flows_dict = dict()
+    streams_dict = dict()
+
+    # Compare hash tables
+    for hydroid in flows_outlets:
+        flows_dict = _get_upstream_data(flows, flows_headwaters, flows_dict, hydroid, 'HydroID', 'NextDownID')
+
+    for feature_id in streams_outlets:
+        streams_dict = _get_upstream_data(
+            nwm_streams, nwm_streams_headwaters, streams_dict, feature_id, 'feature_id', 'to'
+        )
+
+    results = []
+    for flow in flows_dict:
+        fid = _hydroid_to_feature_id(flows, flow, 'HydroID', 'feature_id').iloc[0]
+        upstream_fid = flows_dict[flow]
+
+        upstream_fids = []
+        nwm_fids = streams_dict[fid]
+        out_list = [flow, fid, upstream_fids, nwm_fids]
+
+        if type(upstream_fid) != np.int64:
+            if len(upstream_fid) > 0:
+                for i in upstream_fid:
+                    # Find upstream feature_id(s)
+                    temp_ids = streams_dict[
+                        int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0])
+                    ]
+                    if type(temp_ids) == list:
+                        upstream_fids.append(temp_ids[0])
+                    else:
+                        upstream_fids.append(temp_ids)
+
+                out_list = [flow, fid, upstream_fids, nwm_fids]
+                if type(nwm_fids) == np.int64:
+                    nwm_fids = [nwm_fids]
+                if upstream_fids == nwm_fids:
+                    # 0: Crosswalk is correct
+                    out_list.append(0)
                 else:
-                    intersect_points = len(intersect.geometry[0].geoms)
-                    feature_id = int(flows.loc[flows['HydroID'] == flows_id, 'feature_id'].iloc[0])
-
-                xwalks.append([flows_id, feature_id, streams_idx, intersect_points])
-
-                print(f'Found {intersect_points} intersections for {flows_id} and {streams_idx}')
-
-    # Get the maximum number of intersections for each flowline
-    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'ID', 'intersect_points'])
-    xwalks['feature_id'] = xwalks['feature_id'].astype(int)
-
-    xwalks['match'] = xwalks['feature_id'] == xwalks['ID']
-
-    xwalks_groupby = xwalks[['HydroID', 'intersect_points']].groupby('HydroID').max()
-
-    xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left')
-    xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y']
-
-    xwalks['crosswalk'] = (xwalks['match'] is True) & (xwalks['max'] is True)
-
-    xwalks = xwalks.drop_duplicates()
+                    # 1: Crosswalk is incorrect
+                    out_list.append(1)
+            else:
+                # 2: Crosswalk is empty
+                out_list.append(2)
+        else:
+            # 3: if upstream is a headwater point
+            out_list.append(3)
 
-    xwalks = xwalks.sort_values(by=['HydroID', 'intersect_points_x', 'ID'], ascending=False)
+        results.append(out_list)
 
-    xwalks.to_csv(output_table_fileName, index=False)
+    results = pd.DataFrame(
+        results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status']
+    )
+    results.to_csv(output_table_fileName, index=False)
 
-    return flows
+    return results
 
 
 if __name__ == '__main__':
@@ -77,6 +119,7 @@ def verify_crosswalk(input_flows_fileName, input_nwmflows_fileName, output_table
     )
     parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True)
     parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True)
+    parser.add_argument('-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', required=True)
     parser.add_argument('-c', '--output-table-fileName', help='Output table filename', required=True)
 
     args = vars(parser.parse_args())

From c019fc17ac70cf6267df265a0ff8e23d416bf1f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Fri, 27 Oct 2023 15:11:32 -0400
Subject: [PATCH 10/18] Ignore multiple feature_ids

---
 tools/verify_crosswalk.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index 1b0135d6e..11a66c714 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -69,28 +69,31 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr,
     results = []
     for flow in flows_dict:
         fid = _hydroid_to_feature_id(flows, flow, 'HydroID', 'feature_id').iloc[0]
-        upstream_fid = flows_dict[flow]
+        upstream_hid = flows_dict[flow]
 
         upstream_fids = []
         nwm_fids = streams_dict[fid]
         out_list = [flow, fid, upstream_fids, nwm_fids]
 
-        if type(upstream_fid) != np.int64:
-            if len(upstream_fid) > 0:
-                for i in upstream_fid:
+        if type(upstream_hid) != np.int64:
+            if len(upstream_hid) > 0:
+                for i in upstream_hid:
                     # Find upstream feature_id(s)
-                    temp_ids = streams_dict[
-                        int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0])
-                    ]
-                    if type(temp_ids) == list:
-                        upstream_fids.append(temp_ids[0])
+                    temp_fid = int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0])
+
+                    if type(temp_fid) == list:
+                        upstream_fids.append(temp_fid[0])
                     else:
-                        upstream_fids.append(temp_ids)
+                        upstream_fids.append(temp_fid)
 
                 out_list = [flow, fid, upstream_fids, nwm_fids]
                 if type(nwm_fids) == np.int64:
                     nwm_fids = [nwm_fids]
-                if upstream_fids == nwm_fids:
+
+                if fid in upstream_fids:
+                    # Skip duplicate feature_ids
+                    out_list.append(-1)
+                elif set(upstream_fids) == set(nwm_fids):
                     # 0: Crosswalk is correct
                     out_list.append(0)
                 else:
@@ -106,7 +109,7 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr,
         results.append(out_list)
 
     results = pd.DataFrame(
-        results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status']
+        data=results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status']
     )
     results.to_csv(output_table_fileName, index=False)
 

From 838bd7df68555466d2f55292af1022485bba4dd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Mon, 30 Oct 2023 09:26:15 -0400
Subject: [PATCH 11/18] Update documentation

---
 tools/verify_crosswalk.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index 11a66c714..35b975911 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -10,6 +10,28 @@
 def verify_crosswalk(
     input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName, output_table_fileName
 ):
+    """
+    Tool to check the accuracy of crosswalked attributes
+
+    Parameters
+    ----------
+    input_flows_fileName : str
+        Path to DEM derived streams
+    input_nwmflows_fileName : str
+        Path to subset NWM burnlines
+    input_nwm_headwaters_fileName : str
+        Path to subset NWM headwaters
+    output_table_fileName : str
+        Path to output table filename
+
+    Returns
+    -------
+    results : pandas.DataFrame
+
+    Usage
+    -----
+    python verify_crosswalk.py -a <input_flows_fileName> -b <input_nwmflows_fileName> -d <input_nwm_headwaters_fileName> -c <output_table_fileName>
+    """
     # Check for crosswalks between NWM and DEM-derived flowlines
     # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
 
@@ -117,9 +139,7 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr,
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='Crosswalk for MS/FR/GMS networks; calculate synthetic rating curves; update short rating curves'
-    )
+    parser = argparse.ArgumentParser(description='Tool to check crosswalk accuracy')
     parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True)
     parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True)
     parser.add_argument('-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', required=True)

From 70647259772150bb36dcb1a598389386e6739ea0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Wed, 3 Jan 2024 14:04:41 -0500
Subject: [PATCH 12/18] Combine intersections and network

---
 tools/verify_crosswalk.py | 108 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

diff --git a/tools/verify_crosswalk.py b/tools/verify_crosswalk.py
index 35b975911..652fa7281 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/verify_crosswalk.py
@@ -28,6 +28,113 @@ def verify_crosswalk(
     -------
     results : pandas.DataFrame
 
+    Usage
+    -----
+    python verify_crosswalk.py -a <input_flows_fileName> -b <input_nwmflows_fileName> -d <input_nwm_headwaters_fileName> -c <output_table_fileName>
+    """
+
+    intersections_results = _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName)
+
+    intersections_correct = intersections_results['crosswalk'].sum()
+    intersections_total = len(intersections_results)
+    intersections_summary = intersections_correct * 100.0 / intersections_total
+
+    network_results = _verify_crosswalk_network(
+        input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName
+    )
+
+    network_results = network_results[network_results['status'] >= 0]
+    network_correct = len(network_results[network_results['status'] == 0])
+    network_total = len(network_results)
+    network_summary = network_correct * 100.0 / network_total
+
+    results = pd.DataFrame(
+        data={
+            'type': ['intersections', 'network'],
+            'correct': [intersections_correct, network_correct],
+            'total': [intersections_total, network_total],
+            'percent': [intersections_summary, network_summary],
+        }
+    )
+
+    results.to_csv(output_table_fileName, index=False)
+
+
+def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName):
+    # Crosswalk check
+    # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
+
+    flows = gpd.read_file(input_flows_fileName)
+    nwm_streams = gpd.read_file(input_nwmflows_fileName)
+
+    # Compute the number of intersections between the NWM and DEM-derived flowlines
+    streams = nwm_streams
+    xwalks = []
+    intersects = flows.sjoin(streams)
+
+    for idx in intersects.index:
+        flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique()
+
+        if type(intersects.loc[idx, 'ID']) == np.int64:
+            streams_idxs = [intersects.loc[idx, 'ID']]
+        else:
+            streams_idxs = intersects.loc[idx, 'ID'].unique()
+
+        for flows_id in flows_idx:
+            for streams_idx in streams_idxs:
+                intersect = gpd.overlay(
+                    flows[flows['HydroID'] == flows_id],
+                    nwm_streams[nwm_streams['ID'] == streams_idx],
+                    keep_geom_type=False,
+                )
+
+                if len(intersect) == 0:
+                    intersect_points = 0
+                    feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id']
+                elif intersect.geometry[0].geom_type == 'Point':
+                    intersect_points = 1
+                    feature_id = flows.loc[flows['HydroID'] == flows_id, 'feature_id']
+                else:
+                    intersect_points = len(intersect.geometry[0].geoms)
+                    feature_id = int(flows.loc[flows['HydroID'] == flows_id, 'feature_id'].iloc[0])
+
+                xwalks.append([flows_id, feature_id, streams_idx, intersect_points])
+
+    # Get the maximum number of intersections for each flowline
+    xwalks = pd.DataFrame(xwalks, columns=['HydroID', 'feature_id', 'ID', 'intersect_points'])
+    xwalks['feature_id'] = xwalks['feature_id'].astype(int)
+
+    xwalks['match'] = xwalks['feature_id'] == xwalks['ID']
+
+    xwalks_groupby = xwalks[['HydroID', 'intersect_points']].groupby('HydroID').max()
+
+    xwalks = xwalks.merge(xwalks_groupby, on='HydroID', how='left')
+    xwalks['max'] = xwalks['intersect_points_x'] == xwalks['intersect_points_y']
+
+    xwalks['crosswalk'] = xwalks['match'] == xwalks['max']
+
+    return xwalks
+
+
+def _verify_crosswalk_network(input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName):
+    """
+    Tool to check the accuracy of crosswalked attributes
+
+    Parameters
+    ----------
+    input_flows_fileName : str
+        Path to DEM derived streams
+    input_nwmflows_fileName : str
+        Path to subset NWM burnlines
+    input_nwm_headwaters_fileName : str
+        Path to subset NWM headwaters
+    output_table_fileName : str
+        Path to output table filename
+
+    Returns
+    -------
+    results : pandas.DataFrame
+
     Usage
     -----
     python verify_crosswalk.py -a <input_flows_fileName> -b <input_nwmflows_fileName> -d <input_nwm_headwaters_fileName> -c <output_table_fileName>
@@ -133,7 +240,6 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr,
     results = pd.DataFrame(
         data=results, columns=['HydroID', 'feature_id', 'upstream_fids', 'upstream_nwm_fids', 'status']
     )
-    results.to_csv(output_table_fileName, index=False)
 
     return results
 

From 3637f5a5dd1969d0965beaea19b8661765c0f92d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Thu, 4 Jan 2024 19:38:40 -0500
Subject: [PATCH 13/18] Add switch to run tool in fim_pipeline

---
 Dockerfile                               |  1 +
 fim_pipeline.sh                          |  1 +
 fim_post_processing.sh                   |  2 +-
 fim_pre_processing.sh                    |  5 +++++
 src/delineate_hydros_and_produce_HAND.sh | 13 +++++++++++++
 5 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 512e726dd..dd579c65e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -54,6 +54,7 @@ ARG depDir=/dependencies
 ENV inputsDir=$dataDir/inputs
 ENV outputsDir=/outputs
 ENV srcDir=$projectDir/src
+ENV toolsDir=$projectDir/tools
 ENV workDir=/fim_temp
 ENV taudemDir=$depDir/taudem/bin
 ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin
diff --git a/fim_pipeline.sh b/fim_pipeline.sh
index 8523d21ef..4e1e76159 100755
--- a/fim_pipeline.sh
+++ b/fim_pipeline.sh
@@ -48,6 +48,7 @@ usage()
                             will be skipped.
       -isaws            : If this param is included, AWS objects will be used where possible
                         - Note: This feature is not yet implemented.
+      -x                : If this param is included, the crosswalk will be evaluated.
 
 
     Running 'fim_pipeline.sh' is a quicker process than running all three scripts independently; however,
diff --git a/fim_post_processing.sh b/fim_post_processing.sh
index f8266c94d..4ebb7b5fa 100755
--- a/fim_post_processing.sh
+++ b/fim_post_processing.sh
@@ -210,7 +210,7 @@ echo
 echo -e $startDiv"Combining crosswalk tables"
 # aggregate outputs
 Tstart
-python3 /foss_fim/tools/combine_crosswalk_tables.py \
+python3 $toolsDir/combine_crosswalk_tables.py \
     -d $outputDestDir \
     -o $outputDestDir/crosswalk_table.csv
 Tcount
diff --git a/fim_pre_processing.sh b/fim_pre_processing.sh
index 61eb3ef1a..cff84fa23 100755
--- a/fim_pre_processing.sh
+++ b/fim_pre_processing.sh
@@ -101,6 +101,9 @@ in
     -isaws)
         isAWS=1
         ;;
+    -x)
+        evaluateCrosswalk=1
+        ;;
     *) ;;
     esac
     shift
@@ -129,6 +132,7 @@ if [ "$jobBranchLimit" = "" ]; then jobBranchLimit=1; fi
 if [ -z "$overwrite" ]; then overwrite=0; fi
 if [ -z "$skipcal" ]; then skipcal=0; fi
 if [ -z "$isAWS" ]; then isAWS=0; fi
+if [ -z "$evaluateCrosswalk" ]; then evaluateCrosswalk=0; fi
 
 # validate and set defaults for the deny lists
 if [ "$deny_unit_list" = "" ]
@@ -234,6 +238,7 @@ echo "export deny_branch_zero_list=$deny_branch_zero_list" >> $args_file
 echo "export has_deny_branch_zero_override=$has_deny_branch_zero_override" >> $args_file
 echo "export isAWS=$isAWS" >> $args_file
 echo "export skipcal=$skipcal" >> $args_file
+echo "export evaluateCrosswalk=$evaluateCrosswalk" >> $args_file
 
 echo "--- Pre-processing is complete"
 
diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh
index 2b04da494..2ec982323 100755
--- a/src/delineate_hydros_and_produce_HAND.sh
+++ b/src/delineate_hydros_and_produce_HAND.sh
@@ -302,3 +302,16 @@ python3 $srcDir/add_crosswalk.py \
     -e $min_catchment_area \
     -g $min_stream_length
 Tcount
+
+## EVALUATE CROSSWALK ##
+if [ "$evaluateCrosswalk" = "True" ]; then
+    echo -e $startDiv"Evaluate crosswalk $hucNumber $current_branch_id"
+    date -u
+    Tstart
+    python3 $toolsDir/verify_crosswalk.py \
+        -a $tempCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg \
+        -b $b_arg \
+        -c $tempCurrentBranchDataDir/verify_crosswalk_$current_branch_id.csv \
+        -d $tempHucDataDir/nwm_headwater_points_subset.gpkg
+    Tcount
+fi

From 1db7855a98b29788e3eb342a3a841fa8d49efb36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Fri, 5 Jan 2024 09:23:01 -0500
Subject: [PATCH 14/18] Delete unnecessary file

---
 src/map_headwaters_to_reaches.py | 36 --------------------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 src/map_headwaters_to_reaches.py

diff --git a/src/map_headwaters_to_reaches.py b/src/map_headwaters_to_reaches.py
deleted file mode 100644
index 3a4b1cf6b..000000000
--- a/src/map_headwaters_to_reaches.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-
-import geopandas as gpd
-
-
-fim_dir = '/outputs/dev-4.4.2.3/18100201'
-
-headwaters = gpd.read_file(os.path.join(fim_dir, 'nwm_headwater_points_subset.gpkg'))
-reaches = gpd.read_file(os.path.join(fim_dir, 'branches', '0', 'demDerived_reaches_0.shp'))
-
-# Snap nearest reaches where magnitude is 1 to headwaters
-reaches_mag_1 = reaches[reaches['Magnitude'] == 1]
-snapped_reaches = gpd.sjoin_nearest(reaches_mag_1, headwaters, how='left', distance_col='distance')
-reaches_snapped = reaches.merge(snapped_reaches[['LINKNO', 'ID', 'distance']], on='LINKNO', how='left')
-
-
-def sum_upstream(row):
-    """Recursive function to sum up all upstream reaches."""
-
-    sum = 0
-    if row['USLINKNO1'] != -1:
-        sum += sum_upstream(row['USLINKNO1'])
-    else:
-        sum += row['ID']
-
-    if row['USLINKNO2'] != -1:
-        sum += sum_upstream(row['USLINKNO2'])
-    else:
-        sum += row['ID']
-
-
-# Apply stream algorithm with headwaters IDs
-for i, row in reaches_snapped[reaches_snapped['DSLINKNO'] == -1].iterrows():
-    sum = sum_upstream(row)

From 7c1df169c0b823a81527acab5c68db7e4127f8d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Fri, 5 Jan 2024 09:53:52 -0500
Subject: [PATCH 15/18] Rename file

---
 src/delineate_hydros_and_produce_HAND.sh             | 8 ++++----
 tools/{verify_crosswalk.py => evaluate_crosswalk.py} | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)
 rename tools/{verify_crosswalk.py => evaluate_crosswalk.py} (97%)

diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh
index 2ec982323..d732e888e 100755
--- a/src/delineate_hydros_and_produce_HAND.sh
+++ b/src/delineate_hydros_and_produce_HAND.sh
@@ -17,7 +17,7 @@ T_total_start
 
 ## MASK LEVEE-PROTECTED AREAS FROM DEM ##
 if [ "$mask_leveed_area_toggle" = "True" ] && [ -f $tempHucDataDir/LeveeProtectedAreas_subset.gpkg ]; then
-    echo -e $startDiv"Mask levee-protected areas from DEM (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id"
+    echo -e $startDiv"Mask levee-protected areas from DEM (*Overwrite dem_meters.tif output) $hucNumber $current_branch_id"
     date -u
     Tstart
     python3 $srcDir/mask_dem.py \
@@ -304,14 +304,14 @@ python3 $srcDir/add_crosswalk.py \
 Tcount
 
 ## EVALUATE CROSSWALK ##
-if [ "$evaluateCrosswalk" = "True" ]; then
+if [ "$current_branch_id" = "$branch_zero_id" ] && [ "$evaluateCrosswalk" = "1" ] ; then
     echo -e $startDiv"Evaluate crosswalk $hucNumber $current_branch_id"
     date -u
     Tstart
-    python3 $toolsDir/verify_crosswalk.py \
+    python3 $toolsDir/evaluate_crosswalk.py \
         -a $tempCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg \
         -b $b_arg \
-        -c $tempCurrentBranchDataDir/verify_crosswalk_$current_branch_id.csv \
+        -c $tempHucDataDir/crosswalk_evaluation_$current_branch_id.csv \
         -d $tempHucDataDir/nwm_headwater_points_subset.gpkg
     Tcount
 fi
diff --git a/tools/verify_crosswalk.py b/tools/evaluate_crosswalk.py
similarity index 97%
rename from tools/verify_crosswalk.py
rename to tools/evaluate_crosswalk.py
index 652fa7281..16bdd7063 100644
--- a/tools/verify_crosswalk.py
+++ b/tools/evaluate_crosswalk.py
@@ -75,7 +75,7 @@ def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileNam
     for idx in intersects.index:
         flows_idx = intersects.loc[intersects.index == idx, 'HydroID'].unique()
 
-        if type(intersects.loc[idx, 'ID']) == np.int64:
+        if isinstance(intersects.loc[idx, 'ID'], np.int64):
             streams_idxs = [intersects.loc[idx, 'ID']]
         else:
             streams_idxs = intersects.loc[idx, 'ID'].unique()
@@ -204,19 +204,19 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr,
         nwm_fids = streams_dict[fid]
         out_list = [flow, fid, upstream_fids, nwm_fids]
 
-        if type(upstream_hid) != np.int64:
+        if not isinstance(upstream_hid, np.int64):
             if len(upstream_hid) > 0:
                 for i in upstream_hid:
                     # Find upstream feature_id(s)
                     temp_fid = int(_hydroid_to_feature_id(flows, i, 'HydroID', 'feature_id').iloc[0])
 
-                    if type(temp_fid) == list:
+                    if isinstance(temp_fid, list):
                         upstream_fids.append(temp_fid[0])
                     else:
                         upstream_fids.append(temp_fid)
 
                 out_list = [flow, fid, upstream_fids, nwm_fids]
-                if type(nwm_fids) == np.int64:
+                if isinstance(nwm_fids, np.int64):
                     nwm_fids = [nwm_fids]
 
                 if fid in upstream_fids:

From c26df17714876a70e7bf9b1d425ba6d31215ff1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Fri, 5 Jan 2024 16:15:18 -0500
Subject: [PATCH 16/18] Create output file

---
 src/add_crosswalk.py                     |  6 +-
 src/delineate_hydros_and_produce_HAND.sh |  4 +-
 tools/evaluate_crosswalk.py              | 81 +++++++++++++++++-------
 3 files changed, 66 insertions(+), 25 deletions(-)

diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py
index fd7c8c450..b10453f61 100755
--- a/src/add_crosswalk.py
+++ b/src/add_crosswalk.py
@@ -258,7 +258,11 @@ def add_crosswalk(
             else:
                 update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item()
 
-            str_order = output_flows.loc[output_flows.HydroID == short_id]['order_'].item()
+            output_order = output_flows.loc[output_flows.HydroID == short_id]['order_']
+            if len(output_order) == 1:
+                str_order = output_order.item()
+            else:
+                str_order = output_order.max()
             sml_segs = pd.concat(
                 [
                     sml_segs,
diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh
index d732e888e..d76632cbe 100755
--- a/src/delineate_hydros_and_produce_HAND.sh
+++ b/src/delineate_hydros_and_produce_HAND.sh
@@ -312,6 +312,8 @@ if [ "$current_branch_id" = "$branch_zero_id" ] && [ "$evaluateCrosswalk" = "1"
         -a $tempCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg \
         -b $b_arg \
         -c $tempHucDataDir/crosswalk_evaluation_$current_branch_id.csv \
-        -d $tempHucDataDir/nwm_headwater_points_subset.gpkg
+        -d $tempHucDataDir/nwm_headwater_points_subset.gpkg \
+        -u $hucNumber \
+        -z $current_branch_id
     Tcount
 fi
diff --git a/tools/evaluate_crosswalk.py b/tools/evaluate_crosswalk.py
index 16bdd7063..0051c3600 100644
--- a/tools/evaluate_crosswalk.py
+++ b/tools/evaluate_crosswalk.py
@@ -7,11 +7,16 @@
 import pandas as pd
 
 
-def verify_crosswalk(
-    input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName, output_table_fileName
+def evaluate_crosswalk(
+    input_flows_fileName: str,
+    input_nwmflows_fileName: str,
+    input_nwm_headwaters_fileName: str,
+    output_table_fileName: str,
+    huc: str,
+    branch: str,
 ):
     """
-    Tool to check the accuracy of crosswalked attributes
+    Tool to check the accuracy of crosswalked attributes using two methods: counting the number of intersections between two stream representations and network, which checks the upstream and downstream connectivity of each stream segment.
 
     Parameters
     ----------
@@ -23,6 +28,10 @@ def verify_crosswalk(
         Path to subset NWM headwaters
     output_table_fileName : str
         Path to output table filename
+    huc : str
+        HUC ID
+    branch : str
+        Branch ID
 
     Returns
     -------
@@ -30,37 +39,56 @@ def verify_crosswalk(
 
     Usage
     -----
-    python verify_crosswalk.py -a <input_flows_fileName> -b <input_nwmflows_fileName> -d <input_nwm_headwaters_fileName> -c <output_table_fileName>
+    python evaluate_crosswalk.py -a <input_flows_fileName> -b <input_nwmflows_fileName> -d <input_nwm_headwaters_fileName> -c <output_table_fileName> -u <huc> -z <branch>
     """
 
-    intersections_results = _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName)
+    intersections_results = _evaluate_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName)
 
     intersections_correct = intersections_results['crosswalk'].sum()
     intersections_total = len(intersections_results)
-    intersections_summary = intersections_correct * 100.0 / intersections_total
+    intersections_summary = intersections_correct / intersections_total
 
-    network_results = _verify_crosswalk_network(
+    network_results = _evaluate_crosswalk_network(
         input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName
     )
 
     network_results = network_results[network_results['status'] >= 0]
     network_correct = len(network_results[network_results['status'] == 0])
     network_total = len(network_results)
-    network_summary = network_correct * 100.0 / network_total
+    network_summary = network_correct / network_total
 
     results = pd.DataFrame(
         data={
-            'type': ['intersections', 'network'],
+            'huc': [huc, huc],
+            'branch': [branch, branch],
+            'method': ['intersections', 'network'],
             'correct': [intersections_correct, network_correct],
             'total': [intersections_total, network_total],
-            'percent': [intersections_summary, network_summary],
+            'proportion': [intersections_summary, network_summary],
         }
     )
 
     results.to_csv(output_table_fileName, index=False)
 
+    return results
+
+
+def _evaluate_crosswalk_intersections(input_flows_fileName: str, input_nwmflows_fileName: str):
+    """
+    Computes the number of intersections between the NWM and DEM-derived flowlines
+
+    Parameters
+    ----------
+    input_flows_fileName : str
+        Path to DEM derived streams
+    input_nwmflows_fileName : str
+        Path to subset NWM burnlines
+
+    Returns
+    -------
+    pandas.DataFrame
+    """
 
-def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileName):
     # Crosswalk check
     # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
 
@@ -116,9 +144,11 @@ def _verify_crosswalk_intersections(input_flows_fileName, input_nwmflows_fileNam
     return xwalks
 
 
-def _verify_crosswalk_network(input_flows_fileName, input_nwmflows_fileName, input_nwm_headwaters_fileName):
+def _evaluate_crosswalk_network(
+    input_flows_fileName: str, input_nwmflows_fileName: str, input_nwm_headwaters_fileName: str
+):
     """
-    Tool to check the accuracy of crosswalked attributes
+    Compares the upstream and downstream connectivity of each stream segment
 
     Parameters
     ----------
@@ -133,12 +163,9 @@ def _verify_crosswalk_network(input_flows_fileName, input_nwmflows_fileName, inp
 
     Returns
     -------
-    results : pandas.DataFrame
-
-    Usage
-    -----
-    python verify_crosswalk.py -a <input_flows_fileName> -b <input_nwmflows_fileName> -d <input_nwm_headwaters_fileName> -c <output_table_fileName>
+    pandas.DataFrame
     """
+
     # Check for crosswalks between NWM and DEM-derived flowlines
     # fh.vprint('Checking for crosswalks between NWM and DEM-derived flowlines', verbose)
 
@@ -246,11 +273,19 @@ def _get_upstream_data(data, data_headwaters, data_dict, hydroid, hydroid_attr,
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Tool to check crosswalk accuracy')
-    parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', required=True)
-    parser.add_argument('-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', required=True)
-    parser.add_argument('-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', required=True)
-    parser.add_argument('-c', '--output-table-fileName', help='Output table filename', required=True)
+    parser.add_argument('-a', '--input-flows-fileName', help='DEM derived streams', type=str, required=True)
+    parser.add_argument(
+        '-b', '--input-nwmflows-fileName', help='Subset NWM burnlines', type=str, required=True
+    )
+    parser.add_argument(
+        '-d', '--input-nwm-headwaters-fileName', help='Subset NWM headwaters', type=str, required=True
+    )
+    parser.add_argument(
+        '-c', '--output-table-fileName', help='Output table filename', type=str, required=True
+    )
+    parser.add_argument('-u', '--huc', help='HUC ID', type=str, required=True)
+    parser.add_argument('-z', '--branch', help='Branch ID', type=str, required=True)
 
     args = vars(parser.parse_args())
 
-    verify_crosswalk(**args)
+    evaluate_crosswalk(**args)

From acb4ef50f727e3c416de1ad303bed3dd87d65d32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CMattLuck-NOAA=E2=80=9D?= <matt.luck@noaa.gov>
Date: Fri, 5 Jan 2024 16:44:06 -0500
Subject: [PATCH 17/18] Update CHANGELOG.md

---
 docs/CHANGELOG.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 7fb3bd956..26d8bd14d 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -1,6 +1,28 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.4.x.x - 2024-01-05 - [PR#1061](https://github.com/NOAA-OWP/inundation-mapping/pull/1061)
+
+Adds a post-processing tool to compare crosswalked (conflated) `feature_id`s between NWM stream network to DEM-derived reaches. The tool is run if the `-x` flag is added to `fim_pipeline.sh`. Results are computed for branch 0 and saved in a summary file in the HUC output folder.
+
+### Additions
+
+- `tools/evaluate_crosswalk.py`: evaluates crosswalk accuracy using two methods:
+    - intersections: the number of intersections between streamlines
+    - network (or tree): compares the feature_ids of the immediate upstream segments
+
+### Changes
+
+- `Dockerfile`: added `toolsDir` environment variable
+- `fim_pipeline.sh`: added `-x` flag to run crosswalk evaluation tool
+- `fim_post_processing.sh`: changed hardcoded `/foss_fim/tools` to `toolsDir` environment variable
+- `fim_pre_processing.sh`: added `evaluateCrosswalk` environment variable
+- `src/`
+    - `add_crosswalk.py`: fix bug
+    - `delineate_hydros_and_produce_HAND.sh`: added a call to `verify_crosswalk.py` if evaluateCrosswalk is True.
+
+<br/><br/>
+
 ## v4.4.8.3 - 2024-01-05 - [PR#1059](https://github.com/NOAA-OWP/inundation-mapping/pull/1059)
 
 Fixes erroneous branch inundation in levee-protected areas.

From 4ee8ef3b0f96381497f03dd7c596dfa951b350bb Mon Sep 17 00:00:00 2001
From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com>
Date: Fri, 12 Jan 2024 10:42:39 -0700
Subject: [PATCH 18/18] Update CHANGELOG.md

---
 docs/CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 26d8bd14d..e514eb23c 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -1,7 +1,7 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
-## v4.4.x.x - 2024-01-05 - [PR#1061](https://github.com/NOAA-OWP/inundation-mapping/pull/1061)
+## v4.4.8.4 - 2024-01-12 - [PR#1061](https://github.com/NOAA-OWP/inundation-mapping/pull/1061)
 
 Adds a post-processing tool to compare crosswalked (conflated) `feature_id`s between NWM stream network to DEM-derived reaches. The tool is run if the `-x` flag is added to `fim_pipeline.sh`. Results are computed for branch 0 and saved in a summary file in the HUC output folder.