Skip to content

Commit 739624d

Browse files
adding fixed lambda HIV
1 parent 506e6e2 commit 739624d

File tree

3 files changed

+134
-4
lines changed

3 files changed

+134
-4
lines changed

examples/HIV/fixed.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import functools
2+
3+
import numpy as np
4+
from scipy.stats import norm as ndist
5+
6+
import regreg.api as rr
7+
8+
# load in the X matrix
9+
10+
from selection.tests.instance import HIV_NRTI
11+
X_full = HIV_NRTI(datafile="NRTI_DATA.txt", standardize=False)[0]
12+
13+
from learn_selection.utils import full_model_inference, liu_inference, pivot_plot
14+
from learn_selection.core import split_sampler, keras_fit
15+
from learn_selection.Rutils import lasso_glmnet, cv_glmnet_lam
16+
17+
boot_design = False
18+
19+
def simulate(s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=5000, seed=0):
20+
21+
# description of statistical problem
22+
23+
n, p = X_full.shape
24+
25+
if boot_design:
26+
idx = np.random.choice(np.arange(n), n, replace=True)
27+
X = X_full[idx] # bootstrap X to make it really an IID sample, i.e. don't condition on X throughout
28+
X += 0.1 * np.std(X) * np.random.standard_normal(X.shape) # to make non-degenerate
29+
else:
30+
X = X_full.copy()
31+
32+
X = X - np.mean(X, 0)[None, :]
33+
X = X / np.std(X, 0)[None, :]
34+
35+
n, p = X.shape
36+
truth = np.zeros(p)
37+
truth[:s] = np.linspace(signal[0], signal[1], s)
38+
np.random.shuffle(truth)
39+
truth /= np.sqrt(n)
40+
truth *= sigma
41+
42+
y = X.dot(truth) + sigma * np.random.standard_normal(n)
43+
44+
lam_min, lam_1se = cv_glmnet_lam(X.copy(), y.copy(), seed=seed)
45+
lam_min, lam_1se = n * lam_min, n * lam_1se
46+
47+
XTX = X.T.dot(X)
48+
XTXi = np.linalg.inv(XTX)
49+
resid = y - X.dot(XTXi.dot(X.T.dot(y)))
50+
dispersion = np.linalg.norm(resid)**2 / (n-p)
51+
52+
S = X.T.dot(y)
53+
covS = dispersion * X.T.dot(X)
54+
splitting_sampler = split_sampler(X * y[:, None], covS)
55+
56+
def meta_algorithm(XTX, XTXi, lam, sampler):
57+
58+
p = XTX.shape[0]
59+
success = np.zeros(p)
60+
61+
loss = rr.quadratic_loss((p,), Q=XTX)
62+
pen = rr.l1norm(p, lagrange=lam)
63+
64+
scale = 0.
65+
noisy_S = sampler(scale=scale)
66+
loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
67+
problem = rr.simple_problem(loss, pen)
68+
soln = problem.solve(max_its=100, tol=1.e-10)
69+
success += soln != 0
70+
return set(np.nonzero(success)[0])
71+
72+
lam = 4. * np.sqrt(n)
73+
selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
74+
75+
# run selection algorithm
76+
77+
df = full_model_inference(X,
78+
y,
79+
truth,
80+
selection_algorithm,
81+
splitting_sampler,
82+
success_params=(1, 1),
83+
B=B,
84+
fit_probability=keras_fit,
85+
fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
86+
87+
if False: # df is not None:
88+
liu_df = liu_inference(X,
89+
y,
90+
lam,
91+
dispersion,
92+
truth,
93+
alpha=alpha)
94+
95+
return pd.merge(df, liu_df, on='variable')
96+
else:
97+
return df
98+
99+
if __name__ == "__main__":
100+
import statsmodels.api as sm
101+
import matplotlib.pyplot as plt
102+
import pandas as pd
103+
104+
U = np.linspace(0, 1, 101)
105+
plt.clf()
106+
107+
init_seed = np.fabs(np.random.standard_normal() * 500)
108+
for i in range(500):
109+
df = simulate(seed=init_seed+i)
110+
csvfile = 'HIV_fixed.csv'
111+
outbase = csvfile[:-4]
112+
113+
if df is not None and i > 0:
114+
115+
try:
116+
df = pd.concat([df, pd.read_csv(csvfile)])
117+
except FileNotFoundError:
118+
pass
119+
df.to_csv(csvfile, index=False)
120+
121+
if len(df['pivot']) > 0:
122+
pivot_ax, lengths_ax = pivot_plot(df, outbase)
123+
# liu_pivot = df['liu_pivot']
124+
# liu_pivot = liu_pivot[~np.isnan(liu_pivot)]
125+
# pivot_ax.plot(U, sm.distributions.ECDF(liu_pivot)(U), 'gray', label='Liu CV',
126+
# linewidth=3)
127+
# pivot_ax.legend()
128+
# fig = pivot_ax.figure
129+
# fig.savefig(csvfile[:-4] + '.pdf')
130+

examples/multi_target/lasso_example_multi.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from selection.tests.instance import gaussian_instance
99

1010
from learn_selection.utils import full_model_inference, pivot_plot
11-
from learn_selection.core import split_sampler, logit_fit
11+
from learn_selection.core import split_sampler, keras_fit
1212

1313
def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
1414

@@ -64,8 +64,8 @@ def meta_algorithm(XTX, XTXi, lam, sampler):
6464
splitting_sampler,
6565
success_params=(1, 1),
6666
B=B,
67-
fit_probability=logit_fit,
68-
fit_args={'df':20})
67+
fit_probability=keras_fit,
68+
fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
6969

7070
if __name__ == "__main__":
7171
import statsmodels.api as sm

examples/multi_target/lasso_example_multi_CV.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from learn_selection.utils import full_model_inference, pivot_plot
1111
from learn_selection.core import split_sampler, keras_fit
12+
from learn_selection.Rutils import lasso_glmnet
1213

1314
def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
1415

@@ -28,7 +29,6 @@ def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
2829

2930
S = X.T.dot(y)
3031
covS = dispersion * X.T.dot(X)
31-
smooth_sampler = normal_sampler(S, covS)
3232
splitting_sampler = split_sampler(X * y[:, None], covS)
3333

3434
def meta_algorithm(X, XTXi, resid, sampler):

0 commit comments

Comments
 (0)