-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFigure2.py
257 lines (212 loc) · 10.4 KB
/
Figure2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# %%
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from statsmodels.nonparametric.smoothers_lowess import lowess
import geopandas as gpd
import matplotlib
from sklearn.utils import resample
from sklearn.metrics import mean_squared_error
from matplotlib import gridspec
from scipy.optimize import curve_fit
import pickle
import os
meanprops = {
"marker": "D",
"markerfacecolor": "white",
"markeredgecolor": "black",
"markersize": "3",
"markeredgewidth": 0.25
}
flierprops = {
"marker": "^",
"markerfacecolor": "white",
"markeredgecolor": "black",
"markersize": "3",
"markeredgewidth": 0.5
}
fontsize = 7
palette = sns.color_palette("colorblind")
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
sns.set(font_scale=1.0, style="white")
df = pd.read_csv('data/figure2_data/fig2_data.csv')
trampush_csv = pd.read_csv("data/figure2_data/TrampushDataCleanProcessed.csv")
df_hist = pd.read_excel('data/manuscript_data/Supplementary Table 1.xlsx')
df_hist['normalized_distance'] = df_hist['normalized_distance'].astype(float)
n_simulations = 10000
ridge_elev_error = 0.6
floodplain_elev_error = 0.6
water_surface_elev_error = 0.25
xgb_depth_error = 1.0
superelevation_column = 'beta'
uncertainty_column = 'beta_uncertainty'
for index, row in df.iterrows():
all_superelevations = []
for _ in range(n_simulations):
i = np.random.choice([1, 2, 3])
ridge_height = row[f'are{i}_m'] - row[f'fpe{i}_m']
sampled_ridge_height = ridge_height + np.random.triangular(-ridge_elev_error, 0, ridge_elev_error)
sampled_water_surface_elevation = row[f'wse{i}_m'] + np.random.triangular(-water_surface_elev_error, 0, water_surface_elev_error)
sampled_xgb_depth = row['xgb_depth'] + np.random.triangular(-xgb_depth_error, 0, xgb_depth_error)
if row['method_used'] == 1:
superelevation = sampled_ridge_height / sampled_xgb_depth
elif row['method_used'] == 2:
superelevation = sampled_ridge_height / (row[f'are{i}_m'] - sampled_water_surface_elevation)
elif row['method_used'] == 3:
superelevation = sampled_ridge_height / (row[f'are{i}_m'] - (sampled_water_surface_elevation - sampled_xgb_depth))
all_superelevations.append(superelevation)
df.at[index, superelevation_column] = np.mean(all_superelevations)
df.at[index, uncertainty_column] = np.std(all_superelevations)
boot_df = df # Now df contains all the needed columns.
boot_df.to_csv("data/figure2_data/fig2_data_boot.csv", index=False)
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
def plot_scatter_with_lowess(ax, data, x_col, y_col, frac=1, len_boot=5000, color_data=None, palette=palette, edgecolor='black'):
# Extract data from DataFrame
x = data[x_col]
y = np.log(data[y_col]) # Log-transform y values
# If color_data is specified, use hue in scatterplot
if color_data:
sns.scatterplot(x=data[x_col], y=data[y_col], hue=data[color_data], s=20, ax=ax, palette=palette, hue_order=['Delta', 'Fan', 'Alluvial Plain'], edgecolor=edgecolor)
else:
sns.scatterplot(x=data[x_col], y=data[y_col], ax=ax)
# Fit the LOWESS model to the log-transformed data
lowess_results = lowess(y, x, frac=frac, it=5)
y_predicted = lowess_results[:, 1]
residuals = y - y_predicted
# Perform bootstrapping to estimate the confidence intervals in log space
y_bootstrapped = np.empty((len_boot, len(y)))
for i in range(len_boot):
residuals_bootstrap = resample(residuals)
y_bootstrapped[i] = y_predicted + residuals_bootstrap
# Calculate the lower and upper bounds of the confidence intervals in log space
y_ci_lower = np.percentile(y_bootstrapped, 2.5, axis=0)
y_ci_upper = np.percentile(y_bootstrapped, 97.5, axis=0)
# Transform the predicted values and confidence intervals back to linear space for plotting
y_predicted = np.exp(y_predicted)
y_ci_lower = np.exp(y_ci_lower)
y_ci_upper = np.exp(y_ci_upper)
# Plot the LOWESS fit and the confidence intervals
ax.plot(lowess_results[:, 0], y_predicted, color='red', linestyle='--', linewidth=1.)
ax.fill_between(lowess_results[:, 0], y_ci_lower, y_ci_upper, color='lightgray', alpha=0.3) # Lighter gray
# Formatting and labeling
ax.set_xlabel(x_col)
ax.set_ylabel(y_col)
ax.set_yscale('log')
ax.grid(True)
ax.legend().set_visible(False)
ax.yaxis.set_major_formatter(plt.FuncFormatter(format_func))
return ax
# Then include the custom function before plotting:
def format_func(value, tick_number):
return "{:.0f}".format(value)
def plot_boxplot(ax, x_data, y_data, order, ylabel=''):
# Plot the boxplot
sns.boxplot(data=df, x=x_data, y=y_data, hue='geomorphology', showfliers=False, flierprops=flierprops,
showmeans=True, meanprops=meanprops,
order=order, hue_order=['Delta', 'Fan', 'Alluvial Plain'], ax=ax, palette=palette, linewidth=0.5, width=0.75)
# Calculate the IQR to filter out outliers
Q1 = df.groupby(x_data)[y_data].quantile(0.25)
Q3 = df.groupby(x_data)[y_data].quantile(0.75)
IQR = Q3 - Q1
# Align the indices of Q1 and Q3 with df
filter = df.apply(lambda row: (row[y_data] >= (Q1[row[x_data]] - 1.5 * IQR[row[x_data]])) &
(row[y_data] <= (Q3[row[x_data]] + 1.5 * IQR[row[x_data]])), axis=1)
# Add points to the boxplot, excluding outliers
sns.stripplot(data=df[filter], x=x_data, y=y_data, hue='geomorphology', order=order, hue_order=['Delta', 'Fan', 'Alluvial Plain'],
dodge=False, marker='o', alpha=0.6, palette=palette, ax=ax, linewidth=0.25, size=2, edgecolor='gray', jitter=0.2)
ax.grid(axis='y', visible=True)
ax.set_ylabel(ylabel, labelpad=10, rotation=0)
labels = ax.get_xticklabels()
ax.set_xticks([0, 1, 2])
ax.set_xticklabels(labels, fontsize=5, ha='center')
ax.legend().set_visible(False) # Hide the legend to avoid duplication
#%%
import matplotlib.ticker as ticker
# Custom formatter function
def custom_formatter(x, pos):
# Check if the value is less than 1 and format it with an additional decimal place
if x < 1:
return '{:.1f}'.format(x) # 1 decimal places for values less than 1
elif x <= 0.1:
return '{:.2f}'.format(x) # 2 decimal places for values less than .1
elif x == 0:
return '{:.0f}'.format(x)
else:
return '{:.0f}'.format(x) # 0 decimal places for all other values
# Create a figure
fig_width, fig_height = 7, 3.5 # Adjusted width to accommodate the new column
fig = plt.figure(figsize=(fig_width, fig_height))
# Divide the figure into 7 rows and 3 columns
gs1 = gridspec.GridSpec(7, 3, width_ratios=[.5, .55, 1.95], height_ratios=[2.5, .5, .5, .5, .5, .5, .5], wspace=.10, hspace=.7) # Updated width ratios
ax_new_hist = fig.add_subplot(gs1[0, 2:3])
sns.histplot(data=df_hist, x='normalized_distance', kde=False, fill=True,
alpha=1, edgecolor='black', stat="percent", binwidth=.05, color='#56b4e9', linewidth=.65, ax=ax_new_hist) #'#4FA0CA'
ax_new_hist.grid(False)
ax_new_hist.set_ylim(0, 45)
ax_new_hist.set_xlabel('$X_N$', fontsize=6, labelpad=-3)
ax_new_hist.set_ylabel('% Occurrence', fontsize=6, labelpad=-.5)
ax_new_hist.tick_params(axis='both', which='major', labelsize=6, width=0, length=2)
sns.despine(ax=ax_new_hist)
ax0 = fig.add_subplot(gs1[1:4, 2])
ax0.errorbar(boot_df['spr'], boot_df['beta'], yerr=boot_df['beta_uncertainty'], capsize=3.5, capthick=0.5, ls='none', lw=.5, color='black', markeredgecolor='black', markersize=8, zorder=1)
plot_scatter_with_lowess(ax0, df, 'spr', 'beta', color_data='geomorphology', palette=palette)
ax0.yaxis.set_major_formatter(ticker.FuncFormatter(custom_formatter))
ax0.set_xlabel('')
ax0.set_ylabel('')
ax0.set_xscale('log')
ax1 = fig.add_subplot(gs1[4:7, 2])
ax1.errorbar(df['spr'], df['gamma'], yerr=[df['yerr_lower_relative'], df['yerr_upper_relative']], capsize=3.5, capthick=0.5, ls='none', lw=.5, color='black', markeredgecolor='black', markersize=8, zorder=1)
plot_scatter_with_lowess(ax1, df, 'spr', 'gamma', color_data='geomorphology', palette=palette)
ax1.set_xlabel(r'proximal and steep $\longleftarrow SPR^{\star} \longrightarrow$ distal and gentle', fontsize=fontsize, labelpad=1)
ax1.set_xscale('log')
ax1.yaxis.set_major_formatter(ticker.FuncFormatter(custom_formatter))
ax3 = fig.add_subplot(gs1[1:4, 1])
plot_boxplot(ax3, 'geomorphology', 'beta', ['Fan', 'Alluvial Plain', 'Delta'])
ax3.set_xlabel('')
ax3.set_ylabel('')
ax4 = fig.add_subplot(gs1[4:7, 1])
plot_boxplot(ax4, 'geomorphology', 'gamma', ['Fan', 'Alluvial Plain', 'Delta'])
ax4.set_xlabel('')
ax4.set_ylabel('')
ax4.set_xlabel('Geomorphology', fontsize=fontsize)
ax6 = fig.add_subplot(gs1[1:4, 0])
sns.histplot(data=df, y='beta', bins=15, edgecolor='k', color='pink', alpha=1, stat="probability", ax=ax6, zorder=3, linewidth=.5)
ax6.set_ylabel(r'$\beta$', rotation=0, fontsize=fontsize)
ax6.set_yticks([0, 1, 2, 3, 4, 5])
ax6.set_xticks([0, 0.1, 0.2, 0.3, 0.4])
ax6.grid(False)
ax6.xaxis.set_major_formatter(ticker.FuncFormatter(custom_formatter))
ax6.set_xlabel('')
ax6.axhspan(.5, 1.1, color='#BDBDBD', alpha=.6, fill=True, zorder=1)
ax6.set_ylim(0, 5.5)
median_superelevation = df['beta'].median()
ax6.axhline(y=median_superelevation, color='black', linestyle='-.', linewidth=.75, zorder=4)
ax7 = fig.add_subplot(gs1[4:7, 0])
ax7.grid(False)
sns.histplot(data=df, y='gamma', bins=15, edgecolor='k', color='pink', alpha=1, stat="probability", ax=ax7, zorder=3, linewidth=.5)
ax7.set_ylabel(r'$\gamma$', rotation=0, fontsize=fontsize)
ax7.set_xlabel('Probability', fontsize=fontsize)
ax7.axhspan(3, 10, color='#BDBDBD', alpha=0.6, zorder=1)
ax7.set_ylim(-.5, 35)
median_XS_DS = df['gamma'].median()
ax7.axhline(y=median_XS_DS, color="black", linestyle='-.', linewidth=.75, zorder=4)
ax7.set_xticks([0, 0.1, 0.2, 0.3, 0.4, 0.5])
ax7.xaxis.set_major_formatter(ticker.FuncFormatter(custom_formatter))
all_axes = [ax_new_hist, ax0, ax1, ax3, ax4, ax6, ax7]
for ax in all_axes:
for spine in ax.spines.values():
spine.set_linewidth(.5)
if ax == ax_new_hist:
ax.tick_params(axis='both', labelsize=5, pad=-.2)
else:
ax.tick_params(axis='both', labelsize=5, pad=-3.5)
plt.margins(0, 0)
plt.savefig('figure_plotting/figures/figure2.png', dpi=300)
plt.savefig('figure_plotting/figures/figure2.pdf')
plt.show()
# %%