Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: jonathan-taylor/selective-inference
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: snigdhagit/selective-inference
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
  • 5 commits
  • 4 files changed
  • 1 contributor

Commits on Jul 19, 2021

  1. commit changes so far

    Snigdha Panigrahi committed Jul 19, 2021
    Copy the full SHA
    c1d7acd View commit details
  2. test: output file for mle

    Snigdha Panigrahi committed Jul 19, 2021
    Copy the full SHA
    29d2ceb View commit details
  3. made same test

    Snigdha Panigrahi committed Jul 19, 2021
    Copy the full SHA
    b575094 View commit details

Commits on Jul 20, 2021

  1. update test for comparison

    Snigdha Panigrahi committed Jul 20, 2021
    Copy the full SHA
    e224b46 View commit details
  2. cleaned up test for comparisons of mle

    Snigdha Panigrahi committed Jul 20, 2021
    Copy the full SHA
    3989729 View commit details
1 change: 1 addition & 0 deletions selectinf/randomized/exact_reference.py
Original file line number Diff line number Diff line change
@@ -76,6 +76,7 @@ def __init__(self,

self.opt_linear = query.opt_linear
self.useIP = useIP
self.inverse_info = inverse_info

def summary(self,
alternatives=None,
2 changes: 2 additions & 0 deletions selectinf/randomized/query.py
Original file line number Diff line number Diff line change
@@ -1446,6 +1446,8 @@ def selective_MLE(observed_target,
final_estimator = target_cov.dot(_prec).dot(observed_target) \
+ target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + C

T11 = target_cov.dot(target_lin.T.dot(prec_opt))

unbiased_estimator = target_cov.dot(_prec).dot(observed_target) + target_cov.dot(
_P - target_lin.T.dot(prec_opt).dot(target_off))

115 changes: 75 additions & 40 deletions selectinf/randomized/tests/test_exact_reference.py
Original file line number Diff line number Diff line change
@@ -4,15 +4,16 @@
from ..lasso import lasso, selected_targets
from ..exact_reference import exact_grid_inference

def test_approx_pivot(n=500,
p=100,
signal_fac=1.,
s=5,
sigma=2.,
rho=0.4,
randomizer_scale=1.,
equicorrelated=False,
useIP=False):
def test_inf(n=500,
p=100,
signal_fac=1.,
s=5,
sigma=2.,
rho=0.4,
randomizer_scale=1.,
equicorrelated=False,
useIP=False,
CI=True):

while True:

@@ -66,37 +67,71 @@ def test_approx_pivot(n=500,
cov_target_score,
useIP=useIP)

pivot = exact_grid_inf._pivots(beta_target)
if CI is False:
pivot = exact_grid_inf._pivots(beta_target)
return pivot

else:
lci, uci = exact_grid_inf._intervals(level=0.90)
coverage = (lci < beta_target) * (uci > beta_target)
length = uci - lci
mle_length = 1.65*2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
return np.mean(coverage), np.mean(length), np.mean(mle_length)

def main(nsim=300, CI = False):

if CI is False:

import matplotlib as mpl
mpl.use('tkagg')
import matplotlib.pyplot as plt
from statsmodels.distributions.empirical_distribution import ECDF

_pivot = []
for i in range(nsim):
_pivot.extend(test_inf(n=100,
p=400,
signal_fac=1.,
s=0,
sigma=2.,
rho=0.30,
randomizer_scale=0.7,
equicorrelated=True,
useIP=False,
CI=False))

print("iteration completed ", i)

plt.clf()
ecdf_pivot = ECDF(np.asarray(_pivot))
grid = np.linspace(0, 1, 101)
plt.plot(grid, ecdf_pivot(grid), c='blue')
plt.plot(grid, grid, 'k--')
plt.show()

else:
coverage_ = 0.
length_ = 0.
mle_length_= 0.
for n in range(nsim):
cov, len, mle_len = test_inf(n=400,
p=100,
signal_fac=0.5,
s=5,
sigma=2.,
rho=0.30,
randomizer_scale=0.7,
equicorrelated=True,
useIP=False,
CI=True)

coverage_ += cov
length_ += len
mle_length_ += mle_len
print("coverage so far ", coverage_ / (n + 1.))
print("lengths so far ", length_ / (n + 1.), mle_length_/ (n + 1.))
print("iteration completed ", n + 1)

return pivot

def main(nsim=300):

import matplotlib as mpl
mpl.use('tkagg')
import matplotlib.pyplot as plt
from statsmodels.distributions.empirical_distribution import ECDF

_pivot = []
for i in range(nsim):
_pivot.extend(test_approx_pivot(n=100,
p=400,
signal_fac=1.,
s=0,
sigma=2.,
rho=0.30,
randomizer_scale=0.7,
equicorrelated=True,
useIP=False))

print("iteration completed ", i)

plt.clf()
ecdf_pivot = ECDF(np.asarray(_pivot))
grid = np.linspace(0, 1, 101)
plt.plot(grid, ecdf_pivot(grid), c='blue')
plt.plot(grid, grid, 'k--')
plt.show()

if __name__ == "__main__":
main(nsim=100)
main(nsim=100, CI=True)
145 changes: 118 additions & 27 deletions selectinf/randomized/tests/test_selective_MLE_high.py
Original file line number Diff line number Diff line change
@@ -143,19 +143,18 @@ def test_selected_targets(n=2000,
result = conv.selective_MLE(observed_target,
cov_target,
cov_target_score)[0]
estimate = result['MLE']

pval = result['pvalue']
intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])

beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))

coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])

# print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))

return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals



def test_instance():
n, p, s = 500, 100, 5
X = np.random.standard_normal((n, p))
@@ -279,33 +278,125 @@ def test_selected_targets_disperse(n=500,
return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals


def main(nsim=500, full=False):
P0, PA, cover, length_int = [], [], [], []
from statsmodels.distributions import ECDF
# def main(nsim=500, full=False):
# P0, PA, cover, length_int = [], [], [], []
# from statsmodels.distributions import ECDF
#
# n, p, s = 500, 100, 0
#
# for i in range(nsim):
# if full:
# if n > p:
# full_dispersion = True
# else:
# full_dispersion = False
# p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
# avg_length = intervals[:, 1] - intervals[:, 0]
# else:
# full_dispersion = True
# p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
# avg_length = intervals[:, 1] - intervals[:, 0]
#
# cover.extend(cover_)
# P0.extend(p0)
# PA.extend(pA)
# # print(
# # np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
# # np.mean(avg_length), 'null pvalue + power + length')
# print("coverage and lengths ", np.mean(cover), np.mean(avg_length))


def test_selected_instance(seedn,
n=2000,
p=200,
signal_fac=1.2,
s=5,
sigma=2,
rho=0.7,
randomizer_scale=1.,
full_dispersion=True):
"""
Compare to R randomized lasso
"""

n, p, s = 500, 100, 0
inst, const = gaussian_instance, lasso.gaussian
signal = np.sqrt(signal_fac * 2 * np.log(p))

while True:
np.random.seed(seed=seedn)
X, Y, beta = inst(n=n,
p=p,
signal=signal,
s=s,
equicorrelated=True,
rho=rho,
sigma=sigma,
random_signs=True)[:3]

idx = np.arange(p)
sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))

n, p = X.shape

sigma_ = np.std(Y)
W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_

conv = const(X,
Y,
W,
ridge_term=0.,
randomizer_scale=randomizer_scale * sigma_)

signs = conv.fit()
nonzero = signs != 0
print("dimensions", n, p, nonzero.sum())

if nonzero.sum() > 0:
dispersion = None
if full_dispersion:
dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)

(observed_target,
cov_target,
cov_target_score,
alternatives) = selected_targets(conv.loglike,
conv._W,
nonzero,
dispersion=dispersion)

result = conv.selective_MLE(observed_target,
cov_target,
cov_target_score)[0]

return result['MLE'], result['lower_confidence'], result['upper_confidence']

def main(nsim =50):

import pandas as pd
column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf"]
master_DF = pd.DataFrame(columns=column_names)
DF = pd.DataFrame(columns=column_names)

n, p, s = 500, 100, 5
for i in range(nsim):
if full:
if n > p:
full_dispersion = True
else:
full_dispersion = False
p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
avg_length = intervals[:, 1] - intervals[:, 0]
else:
full_dispersion = True
p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
avg_length = intervals[:, 1] - intervals[:, 0]

cover.extend(cover_)
P0.extend(p0)
PA.extend(pA)
# print(
# np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
# np.mean(avg_length), 'null pvalue + power + length')
print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
full_dispersion = True
mle, lower_conf, upper_conf = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion)
DF["MLE"] = pd.Series(mle)
DF["Lower Conf"] = pd.Series(lower_conf)
DF["Upper Conf"] = pd.Series(upper_conf)
DF["Experiment Replicate"] = pd.Series((i*np.ones(len(mle),int)).tolist())

master_DF = DF.append(master_DF, ignore_index=True)

import os
outpath = os.path.dirname(__file__)

outfile_mse_html = os.path.join(outpath, "compare_mle_old.html")
outfile_mse_csv = os.path.join(outpath, "compare_mle_old.csv")

master_DF.to_html(outfile_mse_html, index=False)
master_DF.to_csv(outfile_mse_csv, index=False)

if __name__ == "__main__":
main(nsim=50)
main(nsim=50)