Skip to content

Commit b6dd8ae

Browse files
updating .travis.yml to do a doc build
1 parent e906fbb commit b6dd8ae

15 files changed

+812
-232
lines changed

.travis.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ matrix:
6969
env:
7070
- INSTALL_TYPE=requirements
7171
- DEPENDS=
72+
- python: 3.6
73+
sudo: true
74+
dist: trusty
75+
env:
76+
- DOC_BUILD=1
77+
7278
before_install:
7379
- source travis-tools/utils.sh
7480
- travis_before_install
@@ -84,6 +90,14 @@ before_install:
8490

8591
install:
8692
# Install selectinf
93+
- |
94+
echo "backend : agg" > matplotlibrc
95+
if [ "$DOC_BUILD" ]; then # doc build
96+
pip install -r doc-requirements.txt
97+
cd doc
98+
jupytext --sync source/*/*.ipynb
99+
# Build without the API documentation, for the doctests
100+
make html
87101
- if [ "$RUN_R_TESTS" ]; then
88102
sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
89103
pip install rpy2 statsmodels -c constraints.txt ;

doc-requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ numpydoc
66
matplotlib
77
texext
88
nb2plots
9-
rpy2
109
seaborn
1110
statsmodels
1211
tensorflow
1312
keras
1413
nbsphinx
14+
jupytext

doc/Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,8 @@ doctest:
122122
@echo
123123
@echo "The overview file is in build/doctest."
124124

125+
github: html
126+
# Needs ghp-import (pip install ghp-import)
127+
ghp-import -n -p $(BUILDROOT)/html/
128+
@echo
129+
@echo "Published to Github"

doc/source/learning/Basic_example.Rmd

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
---
2+
jupyter:
3+
jupytext:
4+
cell_metadata_filter: all,-slideshow
5+
formats: ipynb,Rmd
6+
text_representation:
7+
extension: .Rmd
8+
format_name: rmarkdown
9+
format_version: '1.1'
10+
jupytext_version: 1.1.1
11+
kernelspec:
12+
display_name: Python 3
13+
language: python
14+
name: python3
15+
---
16+
17+
# Simple example
18+
19+
Here we run a simple linear regression model (even without intercept)
20+
and make a selection when the $Z$ score is larger than 2.
21+
22+
The functions `partial_model_inference` and `pivot_plot` below are just simulation utilities
23+
used to simulate results in least squares regression. The underlying functionality
24+
is contained in the function `selectinf.learning.core.infer_general_target`.
25+
26+
27+
```{python collapsed=TRUE}
28+
import functools
29+
30+
import numpy as np, pandas as pd
31+
import matplotlib.pyplot as plt
32+
# %matplotlib inline
33+
34+
from selectinf.tests.instance import gaussian_instance
35+
36+
from selectinf.learning.utils import partial_model_inference, pivot_plot
37+
from selectinf.learning.core import normal_sampler
38+
from selectinf.learning.Rfitters import logit_fit
39+
```
40+
41+
```{python}
42+
np.random.seed(0) # for replicability
43+
def simulate(n=20, p=1, s=1, signal=1, sigma=2, alpha=0.1, B=2000):
44+
45+
# description of statistical problem
46+
47+
X, y, truth = gaussian_instance(n=n,
48+
p=p,
49+
s=s,
50+
equicorrelated=False,
51+
rho=0.5,
52+
sigma=sigma,
53+
signal=signal,
54+
random_signs=True,
55+
scale=False)[:3]
56+
57+
dispersion = sigma**2
58+
59+
S = X.T.dot(y)
60+
covS = dispersion * X.T.dot(X)
61+
sampler = normal_sampler(S, covS)
62+
63+
def base_algorithm(X, dispersion, sampler):
64+
65+
success = np.zeros(p)
66+
67+
scale = 0.
68+
noisy_S = sampler(scale=scale)
69+
70+
Z = noisy_S / np.sqrt(np.linalg.norm(X)**2 * dispersion)
71+
if Z > 2:
72+
return set([0])
73+
else:
74+
return set([])
75+
76+
selection_algorithm = functools.partial(base_algorithm, X, dispersion)
77+
78+
# run selection algorithm
79+
80+
return partial_model_inference(X,
81+
y,
82+
truth,
83+
selection_algorithm,
84+
sampler,
85+
B=B,
86+
fit_probability=logit_fit,
87+
fit_args={'df':20})
88+
```
89+
90+
```{python}
91+
dfs = []
92+
for i in range(1000):
93+
df = simulate()
94+
if df is not None:
95+
dfs.append(df)
96+
```
97+
98+
```{python}
99+
fig = plt.figure(figsize=(8, 8))
100+
results = pd.concat(dfs)
101+
pivot_plot(results, fig=fig);
102+
```
103+
104+
```{python collapsed=TRUE}
105+
106+
```

doc/source/learning/Basic_example.ipynb

Lines changed: 199 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
---
2+
jupyter:
3+
jupytext:
4+
cell_metadata_filter: all,-slideshow
5+
formats: ipynb,Rmd
6+
text_representation:
7+
extension: .Rmd
8+
format_name: rmarkdown
9+
format_version: '1.1'
10+
jupytext_version: 1.1.1
11+
kernelspec:
12+
display_name: Python 3
13+
language: python
14+
name: python3
15+
---
16+
17+
# Inference in the full model
18+
19+
This is the same example as considered in [Liu et al.](https://arxiv.org/abs/1801.09037) though we
20+
do not consider the special analysis in that paper. We let the computer
21+
guide us in correcting for selection.
22+
23+
The functions `full_model_inference` and `pivot_plot` below are just simulation utilities
24+
used to simulate results in least squares regression. The underlying functionality
25+
is contained in the function `selectinf.learning.core.infer_full_target`.
26+
27+
```{python}
28+
import functools
29+
30+
import numpy as np, pandas as pd
31+
import matplotlib.pyplot as plt
32+
# %matplotlib inline
33+
import regreg.api as rr
34+
35+
from selectinf.tests.instance import gaussian_instance # to generate the data
36+
from selectinf.learning.core import normal_sampler # our representation of the (limiting) Gaussian data
37+
38+
from selectinf.learning.utils import full_model_inference, pivot_plot
39+
from selectinf.learning.Rfitters import logit_fit
40+
```
41+
42+
We will know generate some data from an OLS regression model and fit the LASSO
43+
with a fixed value of $\lambda$. In the simulation world, we know the
44+
true parameters, hence we can then return
45+
pivots for each variable selected by the LASSO. These pivots should look
46+
(marginally) like a draw from `np.random.sample`. This is the plot below.
47+
48+
```{python}
49+
np.random.seed(0) # for replicability
50+
51+
def simulate(n=100,
52+
p=20,
53+
s=5,
54+
signal=(0.5, 1),
55+
sigma=2,
56+
alpha=0.1,
57+
B=4000,
58+
verbose=False):
59+
60+
# description of statistical problem
61+
62+
X, y, truth = gaussian_instance(n=n,
63+
p=p,
64+
s=s,
65+
equicorrelated=False,
66+
rho=0.5,
67+
sigma=sigma,
68+
signal=signal,
69+
random_signs=True,
70+
scale=False)[:3]
71+
72+
dispersion = sigma**2
73+
74+
S = X.T.dot(y)
75+
covS = dispersion * X.T.dot(X)
76+
77+
# this declares our target as linear in S where S has a given covariance
78+
sampler = normal_sampler(S, covS)
79+
80+
def base_algorithm(XTX, lam, sampler):
81+
82+
p = XTX.shape[0]
83+
success = np.zeros(p)
84+
85+
loss = rr.quadratic_loss((p,), Q=XTX)
86+
pen = rr.l1norm(p, lagrange=lam)
87+
88+
scale = 0.
89+
noisy_S = sampler(scale=scale)
90+
loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
91+
problem = rr.simple_problem(loss, pen)
92+
soln = problem.solve(max_its=50, tol=1.e-6)
93+
success += soln != 0
94+
95+
return set(np.nonzero(success)[0])
96+
97+
XTX = X.T.dot(X)
98+
XTXi = np.linalg.inv(XTX)
99+
resid = y - X.dot(XTXi.dot(X.T.dot(y)))
100+
dispersion = np.linalg.norm(resid)**2 / (n-p)
101+
102+
lam = 3.5 * np.sqrt(n)
103+
selection_algorithm = functools.partial(base_algorithm, XTX, lam)
104+
if verbose:
105+
print(selection_algorithm(sampler))
106+
# run selection algorithm
107+
108+
return full_model_inference(X,
109+
y,
110+
truth,
111+
selection_algorithm,
112+
sampler,
113+
success_params=(1, 1),
114+
B=B,
115+
fit_probability=logit_fit,
116+
fit_args={'df':20})
117+
```
118+
119+
Let's take a look at what we get as a return value:
120+
121+
```{python}
122+
while True:
123+
df = simulate(verbose=True)
124+
if df is not None:
125+
break
126+
df.columns
127+
```
128+
129+
```{python}
130+
dfs = []
131+
for i in range(10):
132+
df = simulate()
133+
if df is not None:
134+
dfs.append(df)
135+
```
136+
137+
```{python}
138+
fig = plt.figure(figsize=(8, 8))
139+
results = pd.concat(dfs)
140+
pivot_plot(results, fig=fig);
141+
```
142+
143+
```{python collapsed=TRUE}
144+
145+
```

0 commit comments

Comments
 (0)