calumbradbury
diff --git a/‎backup_trmm_stats.py
+56 b/‎backup_trmm_stats.py
+56
diff --git a/‎basin_ks_testing.py
+48 b/‎basin_ks_testing.py
+48
diff --git a/‎basin_precip_gradient.py
+124 b/‎basin_precip_gradient.py
+124
@@ -0,0 +1,56 @@
+#backup stats writer
+#recalculates basin average and writes output to all_basins_trmmm
+import csv
+import pandas as pd
+import sys
+
+target = '/exports/csce/datastore/geos/users/s1134744/LSDTopoTools/Topographic_projects/Himalayan_front/himalaya_all/'
+
+#LSDTopoTools specific imports
+#Loading the LSDTT setup configuration
+setup_file = open('/exports/csce/datastore/geos/users/s1134744/LSDTopoTools/Git_projects/LSDAutomation/chi_analysis_automation/chi_automation.config','r')
+LSDMT_PT = setup_file.readline().rstrip()
+LSDMT_MF = setup_file.readline().rstrip()
+Iguanodon = setup_file.readline().rstrip() 
+setup_file.close()
+
+sys.path.append(LSDMT_PT)
+sys.path.append(LSDMT_MF)
+sys.path.append(Iguanodon)
+
+from LSDPlottingTools import LSDMap_MOverNPlotting as MN
+from LSDMapFigure import PlottingHelpers as Helper
+import Iguanodon31 as Ig
+
+#creating csv with rainfall averaged over basin segments, and including MN data
+with open(target+"_output_basin_TRMM_recalculated.csv","wb") as csvfile:
+  csvWriter = csv.writer(csvfile,delimiter=',')
+  csvWriter.writerow(("basin_key","mean jun/jul/aug rainfall (mm)"))
+
+  #opening source of climate data
+with open(target+'_output_MChiSegmented_nodata.csv','r') as csvfile:
+  pandasDF = pd.read_csv(csvfile,delimiter=',')
+      
+        #opening basin directory
+  with open(target+'_output_AllBasinsInfo.csv','r') as csvfile_2:
+    csvReader = csv.reader(csvfile_2,delimiter=',')
+    next(csvReader)
+        
+    for row in csvReader:
+      basin_number = int(row[5])
+      selected_DF = pandasDF.loc[pandasDF['basin_key'] == basin_number]
+      #getting burned data series for the basin
+      pandas_list = selected_DF['burned_data']
+      #print pandas_list
+      mean_rainfall = pandas_list.mean()
+        
+      with open(target+'_output_basin_TRMM_recalculated.csv', 'a') as csvfile_3:
+        csvWriter = csv.writer(csvfile_3,delimiter=',')
+        csvWriter.writerow((basin_number,mean_rainfall))
+
+with open(target+'_output_basin_TRMM.csv','r') as csvfile:
+  pandas_a = pd.read_csv(csvfile,delimiter=',')
+  with open(target+'_output_basin_TRMM_recalculated.csv','r') as csvfile_b:
+    pandas_b = pd.read_csv(csvfile_b,delimiter=',')
+    pandas_a = pandas_a.merge(pandas_b, on=["basin_key"])
+    pandas_a.to_csv(target+'_output_basin_TRMM_new.csv', mode = "w", header = True, index = False)
@@ -0,0 +1,48 @@
+#BASIN  STATS INCLUDING KS
+
+from scipy import stats
+import os
+import csv
+import pandas as pd
+
+directory = '/exports/csce/datastore/geos/users/s1134744/LSDTopoTools/Topographic_projects/'
+
+files = ['full_himalaya/full_concavity_basins_summary.csv','full_himalaya/concavity_bootstrap_basins_summary_processed.csv',
+         'full_himalaya_5000/full_concavity_basins_summary.csv','full_himalaya_5000/concavity_bootstrap_basins_summary_processed.csv']
+         
+def returnSeries(source):
+    DF = pd.read_csv(source,delimiter=',')
+    series = DF['concavity_bootstrap']
+    return series
+
+def ks_2sample_test(pandasSeries_A,pandasSeries_B):
+    list_A = pandasSeries_A.tolist()
+    list_B = pandasSeries_B.tolist()
+    statistic,p_value = stats.ks_2samp(list_A,list_B)
+    return statistic,p_value
+
+    
+full_full_DF = returnSeries(directory+files[0]) 
+full_GLIMS_DF = returnSeries(directory+files[1]) 
+clip_full_DF = returnSeries(directory+files[2]) 
+clip_GLIMS_DF = returnSeries(directory+files[3])
+
+print "full DEM/full DEM",ks_2sample_test(full_full_DF,full_full_DF) 
+print "full DEM/FULL-GLIMS",ks_2sample_test(full_full_DF,full_GLIMS_DF) 
+print "full DEM/Clip-Full",ks_2sample_test(full_full_DF,clip_full_DF) 
+print "full DEM/Clip-GLIMS",ks_2sample_test(full_full_DF,clip_GLIMS_DF) 
+print ".............\n"
+print "full GLIMS/full DEM",ks_2sample_test(full_GLIMS_DF,full_full_DF) 
+print "full GLIMS/FULL-GLIMS",ks_2sample_test(full_GLIMS_DF,full_GLIMS_DF) 
+print "full GLIMS/Clip-Full",ks_2sample_test(full_GLIMS_DF,clip_full_DF) 
+print "full GLIMS/Clip-GLIMS",ks_2sample_test(full_GLIMS_DF,clip_GLIMS_DF) 
+print ".............\n"
+print "clip FULL/full DEM",ks_2sample_test(clip_full_DF,full_full_DF) 
+print "clip FULL/FULL-GLIMS",ks_2sample_test(clip_full_DF,full_GLIMS_DF) 
+print "clip FULL/Clip-Full",ks_2sample_test(clip_full_DF,clip_full_DF) 
+print "clip FULL/Clip-GLIMS",ks_2sample_test(clip_full_DF,clip_GLIMS_DF) 
+print ".............\n"
+print "clip GLIMS/full DEM",ks_2sample_test(clip_GLIMS_DF,full_full_DF) 
+print "clip GLIMS/FULL-GLIMS",ks_2sample_test(clip_GLIMS_DF,full_GLIMS_DF) 
+print "clip GLIMS/Clip-Full",ks_2sample_test(clip_GLIMS_DF,clip_full_DF) 
+print "clip GLIMS/Clip-GLIMS",ks_2sample_test(clip_GLIMS_DF,clip_GLIMS_DF) 
@@ -0,0 +1,124 @@
+#script to extract max/min precipitation from each basin. Used to calculate gradient
+import matplotlib
+
+matplotlib.use("Agg")
+import pandas as pd
+import csv
+import os
+from matplotlib import pyplot as plt
+
+target = '/exports/csce/datastore/geos/users/s1134744/LSDTopoTools/Topographic_projects/Himalayan_front/'
+#name = 'himalaya_processed.csv'
+
+def getMaxMin(dataFrame,basin_key):
+  basinDataFrame = dataFrame.loc[dataFrame["basin_key"] == basin_key]
+  try:
+    precipitationSeries = basinDataFrame["precipitation"]
+  
+  except:
+    precipitationSeries = basinDataFrame["secondary_burned_data"]
+  #print precipitationSeries
+  
+  #maxPrecip = precipitationSeries.max()
+  #minPrecip = precipitationSeries.min()
+  maxPrecip = precipitationSeries.first_valid_index()
+  minPrecip = precipitationSeries.last_valid_index()
+  
+  
+  return maxPrecip, minPrecip 
+
+
+def pathCollector(path,name):
+  #returns lists of paths and names
+  with open(path+name+'.csv','r') as csvfile:
+    csvReader = csv.reader(csvfile,delimiter=',')
+    next(csvReader)
+    full_paths = []
+    dem_names = []
+    write_names = []
+    for row in csvReader:
+          max_basin = (int(row[6])/2)+int(row[5])
+          full_path = path+str(row[0])+'/'+("%.2f" %float(row[2]))+'_'+("%.2f" %float(row[3]))+'_'+str(row[0])+'_'+str(row[1])+'/'+str(row[5])+'/'
+          dem_name = str(row[0])+'_'+str(row[1])
+          write_name = str(row[1])+str(row[5])+'_'+str((int(row[6])/2)+int(row[5]))
+          full_paths.append(full_path)
+          dem_names.append(dem_name)
+          write_names.append(write_name)
+    return full_paths,dem_names,write_names  
+
+def basinLists(path):
+  with open(path+'_AllBasinsInfo.csv','r') as basincsv:
+    basinPandas = pd.read_csv(basincsv,delimiter=',')
+    basins = basinPandas["basin_key"]
+    basin_list = basins.tolist()
+    return basin_list
+
+def getMChiSegmentedPandas(path):
+  with open(path+'_MChiSegmented_burned.csv') as mChiSource:
+    pandasDF = pd.read_csv(mChiSource, delimiter=',')
+    return pandasDF
+
+def mainOperation(full_paths,dem_names,write_names,dem_record,basins,maxs,mins):
+  
+  #looping through each tile
+  for x,y,z in zip(full_paths,dem_names,write_names):
+    #getting basin list for tile
+    try:
+      basin_list = basinLists(x+z)
+      pandasDF = getMChiSegmentedPandas(x+z)  
+      for a in basin_list:
+        max_precip,min_precip = getMaxMin(pandasDF,a)
+        dem_record.append(y)
+        basins.append(a)
+        maxs.append(max_precip)
+        mins.append(min_precip)
+    except(IOError):
+      print("IOError, some info does not exist %s"%(y))
+  
+  return dem_record,basins,maxs,mins
+
+def scatterPlot(dataFrame):
+  
+  # Create a figure
+  fig = plt.figure(1, figsize=(18,9))
+
+  # Create an axes
+  ax = fig.add_subplot(111)
+  #plt.ylabel("", fontsize = 24)
+  plt.title(("precip_gradient"), fontsize = 32)
+  
+  # Create the boxplot
+  #bp = ax.boxplot(data_to_plot, labels=header_list, showfliers=False)
+  bp = dataFrame.plot.scatter(x=[2],y=[3],c='DarkBlue')
+  
+  plt.tick_params(axis='both', which='major', labelsize=18)
+  # Save the figure
+  fig.savefig(target+'precip_gradient_scatter.png', bbox_inches='tight')  
+  #required to clear the axes. Each call of this function wouldn't do that otherwise.
+  plt.cla()
+
+#getting base lists
+
+full_paths,dem_names,write_names = pathCollector(target,'himalaya_processed')
+full_paths_b,dem_names_b,write_names_b = pathCollector(target,'himalaya_b_processed')
+
+dem_record = []
+basins = []
+maxs = []
+mins = []
+
+dem_record,basins,maxs,mins = mainOperation(full_paths,dem_names,write_names,dem_record,basins,maxs,mins)
+
+dem_record,basins,maxs,mins = mainOperation(full_paths_b,dem_names_b,write_names_b,dem_record,basins,maxs,mins)
+
+
+
+demDF = pd.Series(dem_record)
+basinDF = pd.Series(basins)
+maxDF = pd.Series(maxs)
+minDF = pd.Series(mins)
+
+print demDF,basinDF,maxDF,minDF
+export_DF = pd.concat([demDF,basinDF,maxDF,minDF],axis=1)
+export_DF.to_csv(target+'precip_gradient_data_first_last.csv',mode='w',header=True,index=False)
+scatterPlot(export_DF)