nholschuh
diff --git a/‎CReSIS_Sectors_forDataSearch_Ant.mat
935 KB b/‎CReSIS_Sectors_forDataSearch_Ant.mat
935 KB
diff --git a/‎CReSIS_Sectors_forDataSearch_Gre.mat
453 KB b/‎CReSIS_Sectors_forDataSearch_Gre.mat
453 KB
diff --git a/‎cresis_dataaggregator.py
+145 b/‎cresis_dataaggregator.py
+145
diff --git a/‎cresis_season.py
+7-7 b/‎cresis_season.py
+7-7
diff --git a/‎crossovers.py
+14-9 b/‎crossovers.py
+14-9
diff --git a/‎cumulativedistribution.py
+33 b/‎cumulativedistribution.py
+33
diff --git a/‎distance_separator.py
+20-10 b/‎distance_separator.py
+20-10
@@ -0,0 +1,145 @@
+################ This is the import statement required to reference scripts within the package
+import os,sys,glob
+ndh_tools_path_opts = [
+    '/mnt/data01/Code/',
+    '/home/common/HolschuhLab/Code/'
+]
+for i in ndh_tools_path_opts:
+    if os.path.isfile(i): sys.path.append(i)
+################################################################################################
+
+from collections.abc import Iterable
+import numpy as np
+import NDH_Tools as ndh
+
+
+def cresis_dataaggregator(filelist,remove_totaldata=0,savename='',depthcap=0,at_samples=[],at_samples_type=0):
+    """
+    % (C) Nick Holschuh - Amherst College -- 2022 ([email protected])
+    %
+    % This function can take a list of CReSIS Radar files and extract the relevent bits     
+    %
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % The inputs are:
+    %    filelist - list of filenames to read
+    %    remove_totaldata - flag [0 or 1] which dictates whether or not to preserve the radargrams
+    %    savename - a string for the name of the .mat file to write
+    %    depthcap - an index for the maximum depth sample to include
+    %    at_samples - list of lists, including the start and end along-track sample to use, or a an
+    %                 outline to act as bouunds for the aggregated data
+    %    at_samples_type - 0 is defined indecies, 1 is an outline
+    %
+    %%%%%%%%%%%%%%%
+    % The outputs are:
+    %    save_dict - dictionary containing:
+    %       Data_Vals: The array with coordinate and profile information
+    %       DV_Info: Metadata describing the columns in Data_Vals
+    %       start_indecies: The index within the larger data array when each new file starts
+    %       filenames: The original filenames included in the aggregation
+    %
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    """     
+    print_flag = 0
+    start_flag = 1
+    filenames = []
+    Aggregated_Data = []
+    start_indecies = [0]
+    Data_Vals = []
+    
+    ###################### We loop through the list of input files
+    for f_ind,fn in enumerate(filelist):
+    
+        radar_data = ndh.loadmat(fn)
+        final_varlist = []
+    
+        ################## We create a Bottom object if needed
+        if 'Surface' not in radar_data.keys():
+            radar_data['Surface'] = radar_data['Latitude'].copy()*np.NaN
+        if 'Bottom' not in radar_data.keys():
+            radar_data['Bottom'] = radar_data['Surface'].copy()*np.NaN
+    
+        ################## Calculate Polarstereo coordinates
+        xy = ndh.polarstereo_fwd(radar_data['Latitude'], radar_data['Longitude'])
+        radar_data['x'] = xy['x']
+        radar_data['y'] = xy['y']
+        radar_data['distance'] = ndh.polarstereo_fwd(radar_data['x'],radar_data['y'])
+        final_varlist.append('x')
+        final_varlist.append('y')
+    
+        ################## We apply the depthcap, if prescribed
+        if depthcap > 0:
+            radar_data['Data'] = radar_data['Data'][:depthcap,:]
+            radar_data['Time'] = radar_data['Time'][:depthcap]
+            
+        ################## This object is used to subset along tack, if requested
+        if len(at_samples) == 0:
+            trace_index = np.arange(0,len(radar_data['Latitude']))
+        else:
+            if at_samples_type == 0:
+                trace_index = np.arange(at_samples[f_ind][0],at_samples[f_ind][1]+1)
+            else:
+                trace_index = np.where(ndh.within(np.stack([radar_data['x'],radar_data['y']]).T,at_samples))[0]
+            radar_data['Data'] = radar_data['Data'][:,trace_index]
+    
+        ################## All objects that are the same shape as latitude get subset
+        orig_len = len(radar_data['Latitude'])
+        for key_opt in radar_data.keys():
+            if isinstance(radar_data[key_opt],type(radar_data['Latitude'])):
+                shape_array = np.array(radar_data[key_opt].shape)
+                if len(shape_array) > 0:
+                    if np.max(shape_array == orig_len) == 1:
+                        final_varlist.append(key_opt)
+        
+        for kk in final_varlist:
+            if len(radar_data[kk]) == orig_len:
+                radar_data[kk] = radar_data[kk][trace_index]
+    
+        ################## Here we extract date info from the filename
+        fn_parts = fn.split('/')[-1].split('.')[0].split('_')
+        
+        year = int(fn_parts[1][0:4])
+        month = int(fn_parts[1][4:6])
+        day = int(fn_parts[1][6:8])
+        seg = int(fn_parts[2])
+        frm = int(fn_parts[3])
+        file_ind = f_ind
+    
+    
+        ################## Then, we try and construct the object and concatenate everything. Some files had trouble with this
+        temp_Data_Vals = np.vstack((radar_data['x'],radar_data['y'],radar_data['Latitude'],radar_data['Longitude'],radar_data['Elevation'],
+                                    radar_data['Surface'],radar_data['Bottom'],radar_data['GPS_time'],trace_index,
+                                    np.ones(radar_data['Latitude'].shape)*year, np.ones(radar_data['Latitude'].shape)*month, np.ones(radar_data['Latitude'].shape)*day, 
+                                    np.ones(radar_data['Latitude'].shape)*seg, np.ones(radar_data['Latitude'].shape)*frm, np.ones(radar_data['Latitude'].shape)*file_ind)).T
+    
+        ################## The initial file starts the objects, subsequent files add to them
+        if start_flag > 1:
+            Data_Vals = np.concatenate((Data_Vals, temp_Data_Vals), axis=0)
+            if print_flag == 1:
+                print('Completed File '+str(f_ind)+' - '+fn)
+        else:
+            start_indecies = [0]
+            Data_Vals = temp_Data_Vals
+            start_flag = start_flag+1
+            if print_flag == 1:
+                print('Started with file '+str(f_ind)+' - '+fn)
+    
+        ################## The start index for each new file is logged, along with the file name
+        start_indecies.append(start_indecies[-1] + len(trace_index))
+        filenames.append(fn)
+    
+        ################## Finally, the radargrams are appended if desired
+        if remove_totaldata == 0:
+            Aggregated_Data.append([radar_data['Data'],radar_data['distance'],radar_data['Time']])
+        
+        
+    start_indecies = start_indecies[:-1]
+    DV_Info = ['X Coordinate (ps)','Y Coordinate (ps)','Latitude','Longitude','Flight Elevation','Surface Pick','Bottom Pick','GPS Time','Trace Index','Year','Month','Day','Segment','Frame','File Index']
+    
+    save_dict = {'Data_Vals':Data_Vals,'DV_Info':DV_Info,'start_indecies':start_indecies,'filenames':filenames,'Aggregated_Data':Aggregated_Data}
+    if len(savename) > 0:
+        ndh.savemat(save_dict,savename)
+        
+
+    return save_dict
+
+
@@ -72,18 +72,18 @@ def cresis_season(y,m=0,d=0,ant1_gre2=1):
 
     match_ind = np.where(full_dates[:,0] == target_date)[0]
     exact_flag = 1;
-    
+
+    if len(match_ind) == 0:
+        match_ind = ndh.find_nearest(full_dates[:,0],target_date);
+        match_ind = match_ind['index'][0]
+        exact_flag = 0;
+
     if ant1_gre2 == 1:
         if len(match_ind) > 1:
             match_ind = match_ind[0]
     else:
          if len(match_ind) > 1:
-            match_ind = match_ind[1]   
-        
-    if len(match_ind) == 0:
-        match_ind = ndh.find_nearest(full_dates[:,0],target_date);
-        match_ind = match_ind['index'][0]
-        exact_flag = 0;
+            match_ind = match_ind[1]  
 
     ############### You have to subtract one from the match ind to deal with matlabs indexing
     if full_dates[match_ind,2] == 1:
 
@@ -19,17 +19,22 @@ def crossovers(line1,line2):
     %       2: The position of the true crossover coordinates
     %
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    %%
+    %% Adapted from: https://stackoverflow.com/questions/17928452/find-all-intersections-of-xy-data-point-graph-with-numpy
     """
     import numpy.core.umath_tests as ut
-    
-    x_down = line1[:,0]
-    y_down = line1[:,1]
-    x_up = line2[:,0]
-    y_up = line2[:,1]   
-    
-    p = np.column_stack((x_down, y_down))
-    q = np.column_stack((x_up, y_up))
+
+    ############# Pretty sure we don't need this
+    if 0:
+        x_down = line1[:,0]
+        y_down = line1[:,1]
+        x_up = line2[:,0]
+        y_up = line2[:,1]   
+        
+        p = np.column_stack((x_down, y_down))
+        q = np.column_stack((x_up, y_up))
+    else:
+        p = line1
+        q = line2
 
     (p0, p1, q0, q1) = p[:-1], p[1:], q[:-1], q[1:]
     rhs = q0 - p0[:, np.newaxis, :]
 
@@ -0,0 +1,33 @@
+import numpy as np
+
+def cumulativedistribution(input_data,bins=50):
+    """
+    % (C) Nick Holschuh - Amherst College -- 2024 ([email protected])
+    % This function an input dataset and calculates a cumulative distribution function
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % The inputs are as follows:
+    %
+    % input_data -- the array of values to use to calculate the cdf
+    % bins -- the number of bins to assume in calculating the cdf.
+    %
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % The outputs are as follows:
+    %
+    % bin_vals -- These are the x coordinates on the cumulative distribution, 
+    %             that correspond to the values on the right edge of each bin
+    % cdf -- This is the cumulative distribution
+    % pdf -- [NOT STRICTLY CORRECT] This is the percentage of the distribution
+    %        that falls within each bin. This changes with bin size, so it is
+    %        not a true pdf (which, in principle, has infinite bins)
+    %
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    %%
+    """
+    # getting data of the histogram 
+    count,bins_vals = np.histogram(input_data, bins=bins)
+    # finding the PDF of the histogram using count values 
+    pdf=count/sum(count)
+    # using numpy np.cumsum to calculate the CDF
+    # We can also find using the PDF values by looping and adding 
+    cdf=np.cumsum(pdf)
+    return bins_vals[1:],cdf,pdf
@@ -29,20 +29,30 @@ def distance_separator(in_x,in_y,distance_sep):
     %%
     """
 
+    if isinstance(in_x,type([])):
+        in_x = np.array(in_x)
+
+    if isinstance(in_y,type([])):
+        in_y = np.array(in_y)
 
     dists = distance_vector(in_x,in_y,1)
     naninds = np.where(dists > distance_sep)[0]+1
+    if np.max(naninds) > len(dists)-1:
+        naninds = naninds[:-1]
 
-    in_x_sep = list_separator(in_x,naninds)
-    in_y_sep = list_separator(in_y,naninds)
-    
-    out_x = np.array([])
-    out_y = np.array([])
-    for i in in_x_sep:
-        out_x = np.concatenate([out_x,i,np.atleast_1d(np.NaN)])
-    for i in in_y_sep:
-        out_y = np.concatenate([out_y,i,np.atleast_1d(np.NaN)])
+    out_xy = np.ones([len(dists)+len(naninds),2])*np.NaN
 
-    return out_x, out_y
+    ind_adjust = np.zeros(len(dists))
+    ind_adjust[naninds] = 1
+    ind_adjust = np.cumsum(ind_adjust)
+
+    orig_ind = np.arange(0,len(in_x))
+    orig_ind = orig_ind+ind_adjust
+    orig_ind = orig_ind.astype(int)
+
+    out_xy[orig_ind,0] = in_x
+    out_xy[orig_ind,1] = in_y
+
+    return out_xy