diff --git a/.readthedocs.yml b/.readthedocs.yml index bbfd45f45..cb5b32965 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -18,12 +18,18 @@ sphinx: #formats: all # Optionally set the version of Python and requirements required to build your docs + python: - version: 3.6 + version: 3.8 install: + - requirements: https://raw.githubusercontent.com/jonathan-taylor/regreg/master/requirements.txt + - method: pip + path: https://github.com/jonathan-taylor/regreg.git - requirements: requirements.txt - - requirements: doc-requirements.txt + - requirements: doc/requirements.txt - method: setuptools path: . +submodules: + include: all \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 11e16d88b..24d8ebb83 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ dist: trusty python: - 2.7 - 3.5 + - 3.6 notifications: email: false addons: @@ -69,6 +70,7 @@ matrix: env: - INSTALL_TYPE=requirements - DEPENDS= + before_install: - source travis-tools/utils.sh - travis_before_install @@ -87,8 +89,9 @@ install: - if [ "$RUN_R_TESTS" ]; then sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp; pip install rpy2 statsmodels -c constraints.txt ; - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"; - sudo Rscript -e "install.packages(c('glmnet', 'intervals', 'adaptMCMC', 'SLOPE', 'knockoff'), repos='http://cloud.r-project.org')"; + sudo Rscript -e "install.packages(c('devtools', 'intervals', 'adaptMCMC', 'SLOPE'), repos='http://cloud.r-project.org')"; + sudo Rscript -e "require(devtools); install_version('glmnet', version='2.0-18', repos='http://cloud.r-project.org')"; + sudo Rscript -e "install.packages('knockoff', repos='http://cloud.r-project.org')"; git clone https://github.com/jonathan-taylor/R-selective.git; cd R-selective; git submodule init; @@ -107,11 +110,22 @@ script: # No figure windows for mpl; quote to hide : from travis-ci yaml parsing - pip install -r requirements.txt -c constraints.txt; # older rpy2 # Change into an innocuous directory and find tests from installation - - mkdir for_testing - - cd for_testing - 'echo "backend : agg" > matplotlibrc' - + - | + if [ "$DOC_BUILD" ]; then + pip install -r doc-requirements.txt; + cd doc; + jupytext --sync source/*/*.ipynb; + sudo apt-get install pandoc; + make html; + fi + # + # # Build the htmlwithout the API documentation, for the doctests + # + # fi # Doctests only on platforms that have compatible fp output + - mkdir for_testing + - cd for_testing - if [ `uname` == "Darwin" ] || [ "${TRAVIS_PYTHON_VERSION:0:1}" == "3" ]; then DOCTEST_ARGS="--with-doctest"; diff --git a/C-software b/C-software index 851279ffb..7a3d663fe 160000 --- a/C-software +++ b/C-software @@ -1 +1 @@ -Subproject commit 851279ffb326b145d00af45b87e7d857e3941ec9 +Subproject commit 7a3d663feadaf6c61400359fe8fe95a61099b645 diff --git a/appveyor.yml b/appveyor.yml index 86ae986cd..6e121c1ee 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,9 +28,6 @@ environment: - PYTHON: C:\Python36-x64 NP_BUILD_DEP: "1.13.3" NP_TEST_DEP: "1.13.3" - - PYTHON: C:\Python35-x64 - NP_BUILD_DEP: "1.13.3" - NP_TEST_DEP: "1.13.3" - PYTHON: C:\Python37 NP_BUILD_DEP: "1.14.5" @@ -39,9 +36,14 @@ environment: - PYTHON: C:\Python36 NP_BUILD_DEP: "1.13.3" NP_TEST_DEP: "1.13.3" - - PYTHON: C:\Python35 - NP_BUILD_DEP: "1.13.3" - NP_TEST_DEP: "1.13.3" + + # problem with pandas + cython for py35 + # - PYTHON: C:\Python35-x64 + # NP_BUILD_DEP: "1.13.3" + # NP_TEST_DEP: "1.13.3" + # - PYTHON: C:\Python35 + # NP_BUILD_DEP: "1.13.3" + # NP_TEST_DEP: "1.13.3" install: - cmd: echo "Using cmd" diff --git a/doc/Gaussian queries.Rmd b/doc/Gaussian queries.Rmd new file mode 100644 index 000000000..3dfb026f8 --- /dev/null +++ b/doc/Gaussian queries.Rmd @@ -0,0 +1,169 @@ +--- +jupyter: + jupytext: + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.2' + jupytext_version: 1.10.2 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +## KKT conditions + +$$ +\omega = \nabla \ell(o) + u + \epsilon o. +$$ + +## Current terms used in selective MLE + +- `observed_score_state`: for LASSO this is $S=-X^TY$ (and for any linear regression), in general it should be +$\nabla \ell(\beta^*) - Q(\beta^*)\beta^*$, call this $A$ + +- `opt_offset`: this is $\hat{u}$ or (changed everywhere to `observed_subgrad`) + +- `opt_linear`: this is $\nabla^2 \ell(\hat{\beta}) + \epsilon I$ restricted to "selected" subspace, call this $L$ + +## Rewrite of KKT + +$$ +\omega = Lo + S + u. +$$ + +## More terms in the code + +- Randomization precision `randomizer_prec` call this $\Theta_{\omega}=\Sigma_{\omega}^{-1}$ so $\omega \sim N(0, \Theta^{-1})$. + +- `cond_cov`= $\Sigma_{o|S,u}$, `cond_mean`, `cond_precision`=$\Sigma_{o|S,u}^{-1}=\Theta_{o|S,u}$: +describe implied law of $o|S,u$. These are computed in `_setup_implied_gaussian`. Specifically, we have + +$$ +\begin{aligned} +\Sigma_{o|S,u} = (L^T\Theta L)^{-1} +\end{aligned} +$$ + +- `regress_opt` (formerly `logdens_linear`) call this $A$: this is the regression of $o$ onto $S+u$, in the implied +Gaussian given $u,S$ i.e. + +$$ +E[o|S,u] = A(S+u) = -\Sigma_{o|S,u} L^T \Theta_{\omega}(S+u). +$$ + +- `cond_mean` is the conditional mean of $o|S,u$ evaluated at observed $S,u$: $A(S+u)_{obs}$. Or, `regress_opt_score(observed_score_state + observed_subgrad)` + + +## Target related + +- `observed_target, target_cov, target_prec`: not much explanation needed $\hat{\theta}, \Sigma_{\hat{\theta}}, \Theta_{\hat{\theta}} = \Sigma_{\hat{\theta}}^{-1}$ + +- `target_score_cov`: $\Sigma_{\hat{\theta},S}$ + +- `regress_target`: regression of target onto score, formally this would be $\Sigma_{\hat{\theta},S}\Theta_S $ (transpose of usual way of writing regression, not in code yet), let's call it $B$ for now + +- `cov_product`: $\Sigma_S \Theta_{\omega}$: product of score covariance and randomization precision. + +- `cov_score`: $\Sigma_S$ + +- `score_offset = observed_score_state + observed_subgrad`=$S+u$ + +### In `selective_MLE` + +- `target_linear`: $\Sigma_{S,\hat{\theta}}\Theta_{\hat{\theta}}= \Sigma_S B^T\Theta_{\hat{\theta}}$ (changed name to `regress_score_target`) + +- `target_offset`: $S+u-\Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} = S+u - \Sigma_{S,\hat{\theta}} \Theta_{\hat{\theta}} \hat{\theta}$ (changed name to `resid_score_target`) + +- `target_lin`: $A\Sigma_S B^T \Theta_{\hat{\theta}} = -(L^T\Theta_{\omega}L)^{-1} L^T\Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}}$ (changed name to `regress_opt_target` + +- `target_off`: $A(S+u - \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta})$ `resid_opt_target` + +- `_P`: $\Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} (S+u-\Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta}) = \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} (S+u) - \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} = \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} (S+u) - \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} \Sigma_{\omega} \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} $. +Let's call `_P` $\xi$ + +- `_prec`: $\Theta_{\hat{\theta}} + \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} +- \Theta_{\hat{\theta}} B \Sigma_S A^T \Theta_{o|S,u} A \Sigma_S B^T \Theta_{\hat{\theta}}$ + +- `C`: something that can be computed with all of the above... I guess (but am not sure) that `_prec` is +the precision of the (best case, no-selection) unbiased estimate of our target when we condition on $N,u$ + +- More precisely, + +$$ +\begin{aligned} +\Theta_{\hat{\theta}} C &= \xi + (A\Sigma_S B^T \Theta_{\hat{\theta}})^T L^T \Theta_{\omega} L (A\Sigma_S B^T \Theta_{\hat{\theta}})^T \hat{\theta} - (A\Sigma_S B^T \Theta_{\hat{\theta}})^T L^T \Theta_{\omega} L A(S+u) \\ +&= \xi + \Theta_{\hat{\theta}}B \left(\Sigma_S A^T L^T\Theta_{\omega} L A \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} - \Sigma_S A^T L^T\Theta_{\omega} L A(S+u) \right) \\ +&= \xi + \Theta_{\hat{\theta}}B \left(\Sigma_S \Theta_{\omega} L (L^T\Theta_{\omega} L)^{-1} L^T \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} + \Sigma_S \Theta_{\omega}L A(S+u) \right) \\ +\end{aligned} +$$ + +The expression $A(S+u)$ is `cond_mean` and the other term can be computed straightforwardly. We've used the fact +$$ +A\Sigma_S = -\Sigma_{o|S,u}L^T\Theta_{\omega} \Sigma_S =- (L^T\Theta_{\omega}L)^{-1}L^T\Theta_{\omega}\Sigma_S +$$ + + + + + +- Don't know what to sensibly call the last three things... but `_P` and `_prec` are the arguments to the +optimization problem so these are what needs computing. I did change `_prec` to `prec_target_nosel` + +- `cov_target.dot(regress_opt_target.T.dot(prec_opt))`. This is + +$$-\Sigma_{\hat{\theta}} \Theta_{\hat{\theta}}B \Sigma_S\Theta_{\omega} L (L^T\Theta_{\omega}L)^{-1} (L^T\Theta_{\omega} L) = B \Sigma_S\Theta_{\omega} L$$ + +- `regress_opt_target.T.dot(prec_opt)`. This is + +$$-\Theta_{\hat{\theta}}B \Sigma_S\Theta_{\omega} L (L^T\Theta_{\omega}L)^{-1} (L^T\Theta_{\omega} L) = \Theta_{\hat{\theta}} B \Sigma_S\Theta_{\omega} L$$ + +- `regress_opt_target.T.dot(prec_opt).dot(regress_opt_target)`: This is + +$$ +\Theta_{\hat{\theta}}B \Sigma_S\Theta_{\omega} L (L^T\Theta_{\omega}L)^{-1} L^T\Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} +$$ + + +### Computational considerations? + + +#### Case 1: $\Theta_{\omega}^{1/2}$ is known + + +Another potential downside to all this is that these matrices will generally be $p \times p$. I think in `price_of_selection` I had written some way of doing part of this without having to form all of these matrices +explicitly. However, the difference of the last two matrices in `_prec` can be computed (if we know $\Sigma_{\omega}^{\pm 1/2}$ as identity minus rank $E$ matrix I think and +$$ +A^T\Sigma_{o|S,u}A = \Theta_{\omega} L^T \Sigma_{o|S,u} L \Theta_{\omega} +$$ +so we want to compute +$$ +\Theta_{\omega} - \Theta_{\omega} L^T \Sigma_{o|S,u} L \Theta_{\omega} = \Theta_{\omega}^{1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Theta_{\omega}^{1/2} +$$ +with $P$ projection onto $\text{row}(\Sigma_{\omega})$. So we need to compute projection on to a $E$-dimensional +subspace of $\text{row}(\Sigma_{\omega})$. Morally, this makes sense even if $\Sigma_{\omega}$ is not full rank but seems a little sketchy. + +We might also try computing +$$ +\begin{aligned} +\Sigma_S\Theta_{\omega}\Sigma_S - \Sigma_S\Theta_{\omega} L^T \Sigma_{o|S,u} L \Theta_{\omega} \Sigma_S &= \Sigma_S \Theta_{\omega}^{1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Theta_{\omega}^{1/2} \Sigma_S \\ +&= \Sigma_S \Theta_{\omega} \Theta_{\omega}^{-1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Theta_{\omega}^{-1/2} \Theta_{\omega} \Sigma_S \\ +&= \Sigma_S \Theta_{\omega} \Sigma_{\omega}^{1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Sigma_{\omega}^{1/2} \Theta_{\omega} \Sigma_S \\ +&= \Sigma_S \Theta_{\omega} (\Sigma_{\omega} - PL^T (L^T\Theta_{\omega} L)^{-1} LP) \Theta_{\omega} \Sigma_S \\ +&= \Sigma_S \Theta_{\omega} (\Sigma_{\omega} - L^T (L^T\Theta_{\omega} L)^{-1} L) \Theta_{\omega} \Sigma_S \\ +\end{aligned} +$$ + +## Three matrices + +- All the computations above can be expressed of some target specific info like $B, \Theta_{\hat{\theta}}, \Sigma_{\hat{\theta}}, \hat{\theta}$ and + +$$ +\begin{aligned} +M_1 &= \Sigma_S \Theta_{\omega} \\ +M_2 &= M_1 \Sigma_{\omega} M_1^T \\ +M_3 &= M_1 L (L^T\Sigma_{\omega}L)^{-1} L M_1^T +\end{aligned} +$$ \ No newline at end of file diff --git a/doc/Gaussian queries.ipynb b/doc/Gaussian queries.ipynb new file mode 100644 index 000000000..89d0cbc46 --- /dev/null +++ b/doc/Gaussian queries.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## KKT conditions\n", + "\n", + "$$\n", + "\\omega = \\nabla \\ell(o) + u + \\epsilon o.\n", + "$$\n", + "\n", + "## Current terms used in selective MLE\n", + "\n", + "- `observed_score_state`: for LASSO this is $S=-X^TY$ (and for any linear regression), in general it should be\n", + "$\\nabla \\ell(\\beta^*) - Q(\\beta^*)\\beta^*$, call this $A$\n", + "\n", + "- `opt_offset`: this is $\\hat{u}$ or (changed everywhere to `observed_subgrad`)\n", + "\n", + "- `opt_linear`: this is $\\nabla^2 \\ell(\\hat{\\beta}) + \\epsilon I$ restricted to \"selected\" subspace, call this $L$\n", + "\n", + "## Rewrite of KKT\n", + "\n", + "$$\n", + "\\omega = Lo + S + u.\n", + "$$\n", + "\n", + "## More terms in the code\n", + "\n", + "- Randomization precision `randomizer_prec` call this $\\Theta_{\\omega}=\\Sigma_{\\omega}^{-1}$ so $\\omega \\sim N(0, \\Theta^{-1})$.\n", + "\n", + "- `cond_cov`= $\\Sigma_{o|S,u}$, `cond_mean`, `cond_precision`=$\\Sigma_{o|S,u}^{-1}=\\Theta_{o|S,u}$:\n", + "describe implied law of $o|S,u$. These are computed in `_setup_implied_gaussian`. Specifically, we have\n", + "\n", + "$$\n", + "\\begin{aligned}\n", + "\\Sigma_{o|S,u} = (L^T\\Theta L)^{-1}\n", + "\\end{aligned}\n", + "$$\n", + "\n", + "- `regress_opt` (formerly `logdens_linear`) call this $A$: this is the regression of $o$ onto $S+u$, in the implied\n", + "Gaussian given $u,S$ i.e.\n", + "\n", + "$$\n", + "E[o|S,u] = A(S+u) = -\\Sigma_{o|S,u} L^T \\Theta_{\\omega}(S+u).\n", + "$$\n", + "\n", + "- `cond_mean` is the conditional mean of $o|S,u$ evaluated at observed $S,u$: $A(S+u)_{obs}$. Or, `regress_opt_score(observed_score_state + observed_subgrad)`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Target related\n", + "\n", + "- `observed_target, target_cov, target_prec`: not much explanation needed $\\hat{\\theta}, \\Sigma_{\\hat{\\theta}}, \\Theta_{\\hat{\\theta}} = \\Sigma_{\\hat{\\theta}}^{-1}$\n", + "\n", + "- `target_score_cov`: $\\Sigma_{\\hat{\\theta},S}$\n", + "\n", + "- `regress_target`: regression of target onto score, formally this would be $\\Sigma_{\\hat{\\theta},S}\\Theta_S $ (transpose of usual way of writing regression, not in code yet), let's call it $B$ for now\n", + "\n", + "- `cov_product`: $\\Sigma_S \\Theta_{\\omega}$: product of score covariance and randomization precision.\n", + "\n", + "- `cov_score`: $\\Sigma_S$\n", + "\n", + "- `score_offset = observed_score_state + observed_subgrad`=$S+u$\n", + "\n", + "### In `selective_MLE`\n", + "\n", + "- `target_linear`: $\\Sigma_{S,\\hat{\\theta}}\\Theta_{\\hat{\\theta}}= \\Sigma_S B^T\\Theta_{\\hat{\\theta}}$ (changed name to `regress_score_target`)\n", + "\n", + "- `target_offset`: $S+u-\\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} = S+u - \\Sigma_{S,\\hat{\\theta}} \\Theta_{\\hat{\\theta}} \\hat{\\theta}$ (changed name to `resid_score_target`)\n", + "\n", + "- `target_lin`: $A\\Sigma_S B^T \\Theta_{\\hat{\\theta}} = -(L^T\\Theta_{\\omega}L)^{-1} L^T\\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}}$ (changed name to `regress_opt_target`\n", + "\n", + "- `target_off`: $A(S+u - \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta})$ `resid_opt_target`\n", + "\n", + "- `_P`: $\\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} (S+u-\\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta}) = \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} (S+u) - \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} = \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} (S+u) - \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} \\Sigma_{\\omega} \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} $.\n", + "Let's call `_P` $\\xi$\n", + "\n", + "- `_prec`: $\\Theta_{\\hat{\\theta}} + \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}}\n", + "- \\Theta_{\\hat{\\theta}} B \\Sigma_S A^T \\Theta_{o|S,u} A \\Sigma_S B^T \\Theta_{\\hat{\\theta}}$\n", + "\n", + "- `C`: something that can be computed with all of the above... I guess (but am not sure) that `_prec` is \n", + "the precision of the (best case, no-selection) unbiased estimate of our target when we condition on $N,u$ \n", + "\n", + "- More precisely,\n", + "\n", + "$$\n", + "\\begin{aligned}\n", + "\\Theta_{\\hat{\\theta}} C &= \\xi + (A\\Sigma_S B^T \\Theta_{\\hat{\\theta}})^T L^T \\Theta_{\\omega} L (A\\Sigma_S B^T \\Theta_{\\hat{\\theta}})^T \\hat{\\theta} - (A\\Sigma_S B^T \\Theta_{\\hat{\\theta}})^T L^T \\Theta_{\\omega} L A(S+u) \\\\\n", + "&= \\xi + \\Theta_{\\hat{\\theta}}B \\left(\\Sigma_S A^T L^T\\Theta_{\\omega} L A \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} - \\Sigma_S A^T L^T\\Theta_{\\omega} L A(S+u) \\right) \\\\\n", + "&= \\xi + \\Theta_{\\hat{\\theta}}B \\left(\\Sigma_S \\Theta_{\\omega} L (L^T\\Theta_{\\omega} L)^{-1} L^T \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} + \\Sigma_S \\Theta_{\\omega}L A(S+u) \\right) \\\\\n", + "\\end{aligned}\n", + "$$\n", + "\n", + "The expression $A(S+u)$ is `cond_mean` and the other term can be computed straightforwardly. We've used the fact\n", + "$$\n", + "A\\Sigma_S = -\\Sigma_{o|S,u}L^T\\Theta_{\\omega} \\Sigma_S =- (L^T\\Theta_{\\omega}L)^{-1}L^T\\Theta_{\\omega}\\Sigma_S\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "- Don't know what to sensibly call the last three things... but `_P` and `_prec` are the arguments to the\n", + "optimization problem so these are what needs computing. I did change `_prec` to `prec_target_nosel`\n", + "\n", + "- `cov_target.dot(regress_opt_target.T.dot(prec_opt))`. This is\n", + "\n", + "$$-\\Sigma_{\\hat{\\theta}} \\Theta_{\\hat{\\theta}}B \\Sigma_S\\Theta_{\\omega} L (L^T\\Theta_{\\omega}L)^{-1} (L^T\\Theta_{\\omega} L) = B \\Sigma_S\\Theta_{\\omega} L$$\n", + "\n", + "- `regress_opt_target.T.dot(prec_opt)`. This is\n", + "\n", + "$$-\\Theta_{\\hat{\\theta}}B \\Sigma_S\\Theta_{\\omega} L (L^T\\Theta_{\\omega}L)^{-1} (L^T\\Theta_{\\omega} L) = \\Theta_{\\hat{\\theta}} B \\Sigma_S\\Theta_{\\omega} L$$\n", + "\n", + "- `regress_opt_target.T.dot(prec_opt).dot(regress_opt_target)`: This is\n", + "\n", + "$$\n", + "\\Theta_{\\hat{\\theta}}B \\Sigma_S\\Theta_{\\omega} L (L^T\\Theta_{\\omega}L)^{-1} L^T\\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}}\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Computational considerations?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Case 1: $\\Theta_{\\omega}^{1/2}$ is known" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another potential downside to all this is that these matrices will generally be $p \\times p$. I think in `price_of_selection` I had written some way of doing part of this without having to form all of these matrices\n", + "explicitly. However, the difference of the last two matrices in `_prec` can be computed (if we know $\\Sigma_{\\omega}^{\\pm 1/2}$ as identity minus rank $E$ matrix I think and\n", + "$$\n", + "A^T\\Sigma_{o|S,u}A = \\Theta_{\\omega} L^T \\Sigma_{o|S,u} L \\Theta_{\\omega}\n", + "$$\n", + "so we want to compute\n", + "$$\n", + "\\Theta_{\\omega} - \\Theta_{\\omega} L^T \\Sigma_{o|S,u} L \\Theta_{\\omega} = \\Theta_{\\omega}^{1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Theta_{\\omega}^{1/2}\n", + "$$\n", + "with $P$ projection onto $\\text{row}(\\Sigma_{\\omega})$. So we need to compute projection on to a $E$-dimensional\n", + "subspace of $\\text{row}(\\Sigma_{\\omega})$. Morally, this makes sense even if $\\Sigma_{\\omega}$ is not full rank but seems a little sketchy.\n", + "\n", + "We might also try computing\n", + "$$\n", + "\\begin{aligned}\n", + "\\Sigma_S\\Theta_{\\omega}\\Sigma_S - \\Sigma_S\\Theta_{\\omega} L^T \\Sigma_{o|S,u} L \\Theta_{\\omega} \\Sigma_S &= \\Sigma_S \\Theta_{\\omega}^{1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Theta_{\\omega}^{1/2} \\Sigma_S \\\\\n", + "&= \\Sigma_S \\Theta_{\\omega} \\Theta_{\\omega}^{-1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Theta_{\\omega}^{-1/2} \\Theta_{\\omega} \\Sigma_S \\\\\n", + "&= \\Sigma_S \\Theta_{\\omega} \\Sigma_{\\omega}^{1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Sigma_{\\omega}^{1/2} \\Theta_{\\omega} \\Sigma_S \\\\\n", + "&= \\Sigma_S \\Theta_{\\omega} (\\Sigma_{\\omega} - PL^T (L^T\\Theta_{\\omega} L)^{-1} LP) \\Theta_{\\omega} \\Sigma_S \\\\\n", + "&= \\Sigma_S \\Theta_{\\omega} (\\Sigma_{\\omega} - L^T (L^T\\Theta_{\\omega} L)^{-1} L) \\Theta_{\\omega} \\Sigma_S \\\\\n", + "\\end{aligned}\n", + "$$\n", + "\n", + "## Three matrices\n", + "\n", + "- All the computations above can be expressed of some target specific info like $B, \\Theta_{\\hat{\\theta}}, \\Sigma_{\\hat{\\theta}}, \\hat{\\theta}$ and\n", + "\n", + "$$\n", + "\\begin{aligned}\n", + "M_1 &= \\Sigma_S \\Theta_{\\omega} \\\\\n", + "M_2 &= M_1 \\Sigma_{\\omega} M_1^T \\\\\n", + "M_3 &= M_1 L (L^T\\Sigma_{\\omega}L)^{-1} L M_1^T\n", + "\\end{aligned}\n", + "$$" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/Makefile b/doc/Makefile index 1f39aad81..7e84d387b 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -122,3 +122,8 @@ doctest: @echo @echo "The overview file is in build/doctest." +github: html + # Needs ghp-import (pip install ghp-import) + ghp-import -n -p $(BUILDROOT)/html/ + @echo + @echo "Published to Github" diff --git a/doc/adjusted_MLE/tests/comparison_metrics.py b/doc/adjusted_MLE/tests/comparison_metrics.py deleted file mode 100644 index c902ec879..000000000 --- a/doc/adjusted_MLE/tests/comparison_metrics.py +++ /dev/null @@ -1,915 +0,0 @@ -from __future__ import division, print_function -import numpy as np, sys, time -from scipy.stats import norm as ndist - -from rpy2 import robjects -import rpy2.robjects.numpy2ri - -from ...randomized.lasso import lasso, full_targets, selected_targets, debiased_targets -from ...algorithms.lasso import ROSI -from ...tests.instance import gaussian_instance - -def BHfilter(pval, q=0.2): - pval = np.asarray(pval) - pval_sort = np.sort(pval) - comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0] - passing = pval_sort < comparison - if passing.sum(): - thresh = comparison[np.nonzero(passing)[0].max()] - return np.nonzero(pval <= thresh)[0] - return [] - -def sim_xy(n, - p, - nval, - rho=0, - s=5, - beta_type=2, - snr=1): - try: - rpy2.robjects.numpy2ri.activate() - robjects.r(''' - #library(bestsubset) - source('~/best-subset/bestsubset/R/sim.R') - sim_xy = sim.xy - ''') - - r_simulate = robjects.globalenv['sim_xy'] - sim = r_simulate(n, p, nval, rho, s, beta_type, snr) - X = np.array(sim.rx2('x')) - y = np.array(sim.rx2('y')) - X_val = np.array(sim.rx2('xval')) - y_val = np.array(sim.rx2('yval')) - Sigma = np.array(sim.rx2('Sigma')) - beta = np.array(sim.rx2('beta')) - sigma = np.array(sim.rx2('sigma')) - rpy2.robjects.numpy2ri.deactivate() - return X, y, X_val, y_val, Sigma, beta, sigma - except: - X, y, beta, _, sigma, Sigma = gaussian_instance(n=n, - p=p, - s=s, - signal=snr, - equicorrelated=False, - rho=rho) - X_val = gaussian_instance(n=n, - p=p, - s=s, - signal=snr, - equicorrelated=False, - rho=rho)[0] - y_val = X_val.dot(beta) + sigma * np.random.standard_normal(X_val.shape[0]) - return X, y, X_val, y_val, Sigma, beta, sigma - -def selInf_R(X, y, beta, lam, sigma, Type, alpha=0.1): - robjects.r(''' - library("selectiveInference") - selInf = function(X, y, beta, lam, sigma, Type, alpha= 0.1){ - y = as.matrix(y) - X = as.matrix(X) - beta = as.matrix(beta) - lam = as.matrix(lam)[1,1] - sigma = as.matrix(sigma)[1,1] - Type = as.matrix(Type)[1,1] - if(Type == 1){ - type = "full"} else{ - type = "partial"} - inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian", - intercept=FALSE, sigma=sigma, alpha=alpha, type=type) - return(list(ci = inf$ci, pvalue = inf$pv))} - ''') - - inf_R = robjects.globalenv['selInf'] - n, p = X.shape - r_X = robjects.r.matrix(X, nrow=n, ncol=p) - r_y = robjects.r.matrix(y, nrow=n, ncol=1) - r_beta = robjects.r.matrix(beta, nrow=p, ncol=1) - r_lam = robjects.r.matrix(lam, nrow=1, ncol=1) - r_sigma = robjects.r.matrix(sigma, nrow=1, ncol=1) - r_Type = robjects.r.matrix(Type, nrow=1, ncol=1) - output = inf_R(r_X, r_y, r_beta, r_lam, r_sigma, r_Type) - ci = np.array(output.rx2('ci')) - pvalue = np.array(output.rx2('pvalue')) - return ci, pvalue - - -def glmnet_lasso(X, y, lambda_val): - robjects.r(''' - library(glmnet) - glmnet_LASSO = function(X,y, lambda){ - y = as.matrix(y) - X = as.matrix(X) - lam = as.matrix(lambda)[1,1] - n = nrow(X) - - fit = glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10) - estimate = coef(fit, s=lam, exact=TRUE, x=X, y=y)[-1] - fit.cv = cv.glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10) - estimate.1se = coef(fit.cv, s='lambda.1se', exact=TRUE, x=X, y=y)[-1] - estimate.min = coef(fit.cv, s='lambda.min', exact=TRUE, x=X, y=y)[-1] - return(list(estimate = estimate, estimate.1se = estimate.1se, - estimate.min = estimate.min, - lam.min = fit.cv$lambda.min, - lam.1se = fit.cv$lambda.1se)) - }''') - - lambda_R = robjects.globalenv['glmnet_LASSO'] - n, p = X.shape - r_X = robjects.r.matrix(X, nrow=n, ncol=p) - r_y = robjects.r.matrix(y, nrow=n, ncol=1) - r_lam = robjects.r.matrix(lambda_val, nrow=1, ncol=1) - - estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate')) - estimate_1se = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.1se')) - estimate_min = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.min')) - lam_min = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.min'))) - lam_1se = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.1se'))) - return estimate, estimate_1se, estimate_min, lam_min, lam_1se - -def coverage(intervals, pval, target, truth): - pval_alt = (pval[truth != 0]) < 0.1 - if pval_alt.sum() > 0: - avg_power = np.mean(pval_alt) - else: - avg_power = 0. - return np.mean((target > intervals[:, 0]) * (target < intervals[:, 1])), avg_power - -def relative_risk(est, truth, Sigma): - if (truth != 0).sum > 0: - return (est - truth).T.dot(Sigma).dot(est - truth) / truth.T.dot(Sigma).dot(truth) - else: - return (est - truth).T.dot(Sigma).dot(est - truth) - - -def comparison_cvmetrics_selected(n=500, - p=100, - nval=500, - rho=0.35, - s=5, - beta_type=1, - snr=0.20, - randomizer_scale=np.sqrt(0.50), - full_dispersion=True, - tuning_nonrand="lambda.min", - tuning_rand="lambda.1se"): - - (X, y, _, _, Sigma, beta, sigma) = sim_xy(n=n, - p=p, - nval=nval, - rho=rho, - s=s, - beta_type=beta_type, - snr=snr) - - true_mean = X.dot(beta) - print("snr", snr) - X -= X.mean(0)[None, :] - X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.))) - y = y - y.mean() - true_set = np.asarray([u for u in range(p) if beta[u] != 0]) - - if full_dispersion: - dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p) - sigma_ = np.sqrt(dispersion) - else: - dispersion = None - sigma_ = np.std(y) - print("estimated and true sigma", sigma, sigma_) - - lam_theory = (sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, - np.random.standard_normal((n, 2000)))).max(0))) - (glm_LASSO_theory, - glm_LASSO_1se, - glm_LASSO_min, - lam_min, - lam_1se) = glmnet_lasso(X, y, lam_theory / n) - - if tuning_nonrand == "lambda.min": - lam_LASSO = lam_min - glm_LASSO = glm_LASSO_min - elif tuning_nonrand == "lambda.1se": - lam_LASSO = lam_1se - glm_LASSO = glm_LASSO_1se - else: - lam_LASSO = lam_theory/float(n) - glm_LASSO = glm_LASSO_theory - active_LASSO = (glm_LASSO != 0) - nactive_LASSO = active_LASSO.sum() - active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]]) - active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for - z in range(nactive_LASSO)], np.bool) - - rel_LASSO = np.zeros(p) - Lee_nreport = 0 - bias_Lee = 0. - bias_naive = 0. - - if nactive_LASSO > 0: - post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y) - rel_LASSO[active_LASSO] = post_LASSO_OLS - Lee_target = np.linalg.pinv(X[:, active_LASSO]).dot(X.dot(beta)) - Lee_intervals, Lee_pval = selInf_R(X, - y, - glm_LASSO, - n * lam_LASSO, - sigma_, - Type=0, - alpha=0.1) - - if (Lee_pval.shape[0] == Lee_target.shape[0]): - - cov_Lee, selective_Lee_power = coverage(Lee_intervals, - Lee_pval, - Lee_target, - beta[active_LASSO]) - - inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0]) - inf_entries = np.mean(inf_entries_bool) - if inf_entries == 1.: - length_Lee = 0. - else: - length_Lee = (np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0]) - [~inf_entries_bool])) - power_Lee = ((active_LASSO_bool) * (np.logical_or((0. < Lee_intervals[:, 0]), - (0. > Lee_intervals[:, 1])))) \ - .sum() / float((beta != 0).sum()) - Lee_discoveries = BHfilter(Lee_pval, q=0.1) - power_Lee_BH = ((Lee_discoveries * active_LASSO_bool).sum() / - float((beta != 0).sum())) - fdr_Lee_BH = ((Lee_discoveries * ~active_LASSO_bool).sum() / - float(max(Lee_discoveries.sum(), 1.))) - bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target) - - naive_sd = sigma_ * np.sqrt(np.diag( - (np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO]))))) - naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd, - post_LASSO_OLS + 1.65 * naive_sd]).T - naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd) - - cov_naive, selective_naive_power = coverage(naive_intervals, - naive_pval, - Lee_target, - beta[active_LASSO]) - - length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]) - power_naive = ((active_LASSO_bool) * ( - np.logical_or((0. < naive_intervals[:, 0]), - (0. > naive_intervals[:, 1])))).sum() / float( - (beta != 0).sum()) - - naive_discoveries = BHfilter(naive_pval, q=0.1) - - power_naive_BH = ((naive_discoveries * active_LASSO_bool).sum() / - float((beta != 0).sum())) - fdr_naive_BH = ((naive_discoveries * ~active_LASSO_bool).sum() / - float(max(naive_discoveries.sum(), 1.))) - - bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target) - - partial_Lasso_risk = (glm_LASSO[active_LASSO]-Lee_target).T.dot( - glm_LASSO[active_LASSO]-Lee_target) - partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot( - post_LASSO_OLS - Lee_target) - - else: - Lee_nreport = 1 - (cov_Lee, - length_Lee, - inf_entries, - power_Lee, - power_Lee_BH, - fdr_Lee_BH, - selective_Lee_power) = [0., 0., 0., 0., 0., 0., 0.] - - (cov_naive, - length_naive, - power_naive, - power_naive_BH, - fdr_naive_BH, - selective_naive_power) = [0., 0., 0., 0., 0., 0.] - - naive_discoveries = np.zeros(1) - Lee_discoveries = np.zeros(1) - partial_Lasso_risk, partial_relLasso_risk = [0., 0.] - - elif nactive_LASSO == 0: - Lee_nreport = 1 - (cov_Lee, - length_Lee, - inf_entries, - power_Lee, - power_Lee_BH, - fdr_Lee_BH, - selective_Lee_power) = [0., 0., 0., 0., 0., 0., 0.] - - (cov_naive, - length_naive, - power_naive, - power_naive_BH, - fdr_naive_BH, - selective_naive_power) = [0., 0., 0., 0., 0., 0.] - - naive_discoveries = np.zeros(1) - Lee_discoveries = np.zeros(1) - partial_Lasso_risk, partial_relLasso_risk = [0., 0.] - - if tuning_rand == "lambda.min": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=n * lam_min * np.ones(p), - randomizer_scale= np.sqrt(n) * - randomizer_scale * sigma_) - elif tuning_rand == "lambda.1se": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=n * lam_1se * np.ones(p), - randomizer_scale= np.sqrt(n) * - randomizer_scale * sigma_) - else: - randomized_lasso = lasso.gaussian(X, - y, - feature_weights= lam_theory * np.ones(p), - randomizer_scale=np.sqrt(n) * - randomizer_scale * sigma_) - signs = randomized_lasso.fit() - nonzero = signs != 0 - active_set_rand = np.asarray([t for t in range(p) if nonzero[t]]) - active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool) - sel_MLE = np.zeros(p) - ind_est = np.zeros(p) - randomized_lasso_est = np.zeros(p) - randomized_rel_lasso_est = np.zeros(p) - MLE_nreport = 0 - - sys.stderr.write("active variables selected by cv LASSO " + str(nactive_LASSO) + "\n") - sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n") - - if nonzero.sum() > 0: - target_randomized = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(randomized_lasso.loglike, - randomized_lasso._W, - nonzero, - dispersion=dispersion) - - MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target, - cov_target, - cov_target_score, - alternatives) - sel_MLE[nonzero] = MLE_estimate - ind_est[nonzero] = ind_unbiased_estimator - randomized_lasso_est = randomized_lasso.initial_soln - randomized_rel_lasso_est = randomized_lasso._beta_full - - cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero]) - length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0]) - power_MLE = ((active_rand_bool) * ( - np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum()) - MLE_discoveries = BHfilter(MLE_pval, q=0.1) - power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum()) - fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.)) - bias_MLE = np.mean(MLE_estimate - target_randomized) - - partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized) - partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized) - partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized) - partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized) - else: - MLE_nreport = 1 - cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.] - MLE_discoveries = np.zeros(1) - partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.] - - risks = np.vstack((relative_risk(sel_MLE, beta, Sigma), - relative_risk(ind_est, beta, Sigma), - relative_risk(randomized_lasso_est, beta, Sigma), - relative_risk(randomized_rel_lasso_est, beta, Sigma), - relative_risk(rel_LASSO, beta, Sigma), - relative_risk(glm_LASSO, beta, Sigma))) - - partial_risks = np.vstack((partial_MLE_risk, - partial_ind_risk, - partial_randLasso_risk, - partial_relrandLasso_risk, - partial_relLasso_risk, - partial_Lasso_risk)) - - naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power, power_naive, power_naive_BH, fdr_naive_BH, - naive_discoveries.sum())) - Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power, power_Lee, power_Lee_BH, fdr_Lee_BH, - Lee_discoveries.sum())) - Liu_inf = np.zeros((10, 1)) - MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, power_MLE, power_MLE_BH, fdr_MLE_BH, - MLE_discoveries.sum())) - nreport = np.vstack((Lee_nreport, 0., MLE_nreport)) - return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport)) - - -def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20, - randomizer_scale=np.sqrt(0.25), full_dispersion=True, - tuning_nonrand="lambda.min", tuning_rand="lambda.1se"): - - X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr) - print("snr", snr) - X -= X.mean(0)[None, :] - X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.))) - y = y - y.mean() - true_set = np.asarray([u for u in range(p) if beta[u] != 0]) - - if full_dispersion: - dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p) - sigma_ = np.sqrt(dispersion) - else: - dispersion = None - sigma_ = np.std(y) - print("estimated and true sigma", sigma, sigma_) - - lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) - glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory/float(n)) - if tuning_nonrand == "lambda.min": - lam_LASSO = lam_min - glm_LASSO = glm_LASSO_min - elif tuning_nonrand == "lambda.1se": - lam_LASSO = lam_1se - glm_LASSO = glm_LASSO_1se - else: - lam_LASSO = lam_theory/float(n) - glm_LASSO = glm_LASSO_theory - - active_LASSO = (glm_LASSO != 0) - nactive_LASSO = active_LASSO.sum() - active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]]) - active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)], - np.bool) - - rel_LASSO = np.zeros(p) - Lee_nreport = 0 - bias_Lee = 0. - bias_naive = 0. - - if nactive_LASSO > 0: - rel_LASSO[active_LASSO] = np.linalg.pinv(X[:, active_LASSO]).dot(y) - Lee_target = beta[active_LASSO] - Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=1, alpha=0.1) - - if (Lee_pval.shape[0] == Lee_target.shape[0]): - - cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO]) - inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0]) - inf_entries = np.mean(inf_entries_bool) - if inf_entries == 1.: - length_Lee = 0. - else: - length_Lee = np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])[~inf_entries_bool]) - power_Lee = ((active_LASSO_bool) * ( - np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))).sum() / float((beta != 0).sum()) - Lee_discoveries = BHfilter(Lee_pval, q=0.1) - power_Lee_BH = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum()) - fdr_Lee_BH = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.)) - bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target) - - post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y) - naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO]))))) - naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd, - post_LASSO_OLS + 1.65 * naive_sd]).T - naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd) - cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO]) - length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]) - power_naive = ((active_LASSO_bool) * ( - np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float( - (beta != 0).sum()) - naive_discoveries = BHfilter(naive_pval, q=0.1) - power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum()) - fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.)) - bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target) - - partial_Lasso_risk = (glm_LASSO[active_LASSO] - Lee_target).T.dot(glm_LASSO[active_LASSO] - Lee_target) - partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target) - else: - Lee_nreport = 1 - cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.] - cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.] - naive_discoveries = np.zeros(1) - Lee_discoveries = np.zeros(1) - partial_Lasso_risk, partial_relLasso_risk = [0., 0.] - - elif nactive_LASSO == 0: - Lee_nreport = 1 - cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.] - cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.] - naive_discoveries = np.zeros(1) - Lee_discoveries = np.zeros(1) - partial_Lasso_risk, partial_relLasso_risk = [0., 0.] - - lasso_Liu = ROSI.gaussian(X, y, n * lam_LASSO) - Lasso_soln_Liu = lasso_Liu.fit() - active_set_Liu = np.nonzero(Lasso_soln_Liu != 0)[0] - nactive_Liu = active_set_Liu.shape[0] - active_Liu_bool = np.asarray([(np.in1d(active_set_Liu[a], true_set).sum() > 0) for a in range(nactive_Liu)], np.bool) - Liu_nreport = 0 - - if nactive_Liu > 0: - Liu_target = beta[Lasso_soln_Liu != 0] - df = lasso_Liu.summary(level=0.90, compute_intervals=True, dispersion=dispersion) - Liu_lower, Liu_upper, Liu_pval = np.asarray(df['lower_confidence']), \ - np.asarray(df['upper_confidence']), \ - np.asarray(df['pval']) - Liu_intervals = np.vstack((Liu_lower, Liu_upper)).T - cov_Liu, selective_Liu_power = coverage(Liu_intervals, Liu_pval, Liu_target, beta[Lasso_soln_Liu != 0]) - length_Liu = np.mean(Liu_intervals[:, 1] - Liu_intervals[:, 0]) - power_Liu = ((active_Liu_bool) * (np.logical_or((0. < Liu_intervals[:, 0]), - (0. > Liu_intervals[:, 1])))).sum() / float((beta != 0).sum()) - Liu_discoveries = BHfilter(Liu_pval, q=0.1) - power_Liu_BH = (Liu_discoveries * active_Liu_bool).sum() / float((beta != 0).sum()) - fdr_Liu_BH = (Liu_discoveries * ~active_Liu_bool).sum() / float(max(Liu_discoveries.sum(), 1.)) - - else: - Liu_nreport = 1 - cov_Liu, length_Liu, power_Liu, power_Liu_BH, fdr_Liu_BH, selective_Liu_power = [0., 0., 0., 0., 0., 0.] - Liu_discoveries = np.zeros(1) - - if tuning_rand == "lambda.min": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights= n * lam_min * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - elif tuning_rand == "lambda.1se": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights= n * lam_1se * np.ones(p), - randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_) - else: - randomized_lasso = lasso.gaussian(X, - y, - feature_weights= lam_theory * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - signs = randomized_lasso.fit() - nonzero = signs != 0 - active_set_rand = np.asarray([t for t in range(p) if nonzero[t]]) - active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool) - sel_MLE = np.zeros(p) - ind_est = np.zeros(p) - randomized_lasso_est = np.zeros(p) - randomized_rel_lasso_est = np.zeros(p) - MLE_nreport = 0 - - if nonzero.sum() > 0: - target_randomized = beta[nonzero] - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(randomized_lasso.loglike, - randomized_lasso._W, - nonzero, - dispersion=dispersion) - MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target, - cov_target, - cov_target_score, - alternatives) - sel_MLE[nonzero] = MLE_estimate - ind_est[nonzero] = ind_unbiased_estimator - randomized_lasso_est = randomized_lasso.initial_soln - randomized_rel_lasso_est = randomized_lasso._beta_full - - cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero]) - length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0]) - power_MLE = ((active_rand_bool) * (np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum()) - MLE_discoveries = BHfilter(MLE_pval, q=0.1) - power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum()) - fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.)) - bias_MLE = np.mean(MLE_estimate - target_randomized) - - partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized) - partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized) - partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized) - partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized) - else: - MLE_nreport = 1 - cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.] - MLE_discoveries = np.zeros(1) - partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.] - - risks = np.vstack((relative_risk(sel_MLE, beta, Sigma), - relative_risk(ind_est, beta, Sigma), - relative_risk(randomized_lasso_est, beta, Sigma), - relative_risk(randomized_rel_lasso_est, beta, Sigma), - relative_risk(rel_LASSO, beta, Sigma), - relative_risk(glm_LASSO, beta, Sigma))) - - partial_risks = np.vstack((partial_MLE_risk, - partial_ind_risk, - partial_randLasso_risk, - partial_relrandLasso_risk, - partial_relLasso_risk, - partial_Lasso_risk)) - - naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power, - power_naive, power_naive_BH, fdr_naive_BH, naive_discoveries.sum())) - Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power, - power_Lee, power_Lee_BH, fdr_Lee_BH, Lee_discoveries.sum())) - Liu_inf = np.vstack((cov_Liu, length_Liu, 0., nactive_Liu, bias_Lee, selective_Liu_power, - power_Liu, power_Liu_BH, fdr_Liu_BH, Liu_discoveries.sum())) - MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, - power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum())) - nreport = np.vstack((Lee_nreport, Liu_nreport, MLE_nreport)) - return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport)) - -def comparison_cvmetrics_debiased(n=100, p=150, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20, - randomizer_scale=np.sqrt(0.25), full_dispersion=False, - tuning_nonrand="lambda.min", tuning_rand="lambda.1se"): - - X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr) - print("snr", snr) - X -= X.mean(0)[None, :] - X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.))) - y = y - y.mean() - true_set = np.asarray([u for u in range(p) if beta[u] != 0]) - - if full_dispersion: - dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p) - sigma_ = np.sqrt(dispersion) - else: - dispersion = None - _sigma_ = np.std(y) - - lam_theory = _sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) - glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory / float(n)) - - if full_dispersion is False: - dispersion = None - active_min = (glm_LASSO_min != 0) - if active_min.sum() > 0: - sigma_ = np.sqrt(np.linalg.norm(y - X[:, active_min].dot(np.linalg.pinv(X[:, active_min]).dot(y))) ** 2 - / (n - active_min.sum())) - else: - sigma_ = _sigma_ - print("estimated and true sigma", sigma, _sigma_, sigma_) - - if tuning_nonrand == "lambda.min": - lam_LASSO = lam_min - glm_LASSO = glm_LASSO_min - elif tuning_nonrand == "lambda.1se": - lam_LASSO = lam_1se - glm_LASSO = glm_LASSO_1se - else: - lam_LASSO = lam_theory / float(n) - glm_LASSO = glm_LASSO_theory - - active_LASSO = (glm_LASSO != 0) - nactive_LASSO = active_LASSO.sum() - active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]]) - active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)], - np.bool) - - rel_LASSO = np.zeros(p) - Lee_nreport = 0. - bias_naive = 0. - - if nactive_LASSO > 0: - rel_LASSO[active_LASSO] = np.linalg.pinv(X[:, active_LASSO]).dot(y) - Lee_target = beta[active_LASSO] - post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y) - naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO]))))) - naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd, - post_LASSO_OLS + 1.65 * naive_sd]).T - naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd) - cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO]) - length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]) - power_naive = ((active_LASSO_bool) * ( - np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float( - (beta != 0).sum()) - naive_discoveries = BHfilter(naive_pval, q=0.1) - power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum()) - fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.)) - bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target) - - partial_Lasso_risk = (glm_LASSO[active_LASSO] - Lee_target).T.dot(glm_LASSO[active_LASSO] - Lee_target) - partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target) - - elif nactive_LASSO == 0: - Lee_nreport += 1 - cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.] - naive_discoveries = np.zeros(1) - partial_Lasso_risk, partial_relLasso_risk = [0., 0.] - - if tuning_rand == "lambda.min": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=n * lam_min * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - elif tuning_rand == "lambda.1se": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=n * lam_1se * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - else: - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=lam_theory * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - signs = randomized_lasso.fit() - nonzero = signs != 0 - active_set_rand = np.asarray([t for t in range(p) if nonzero[t]]) - active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], - np.bool) - sel_MLE = np.zeros(p) - ind_est = np.zeros(p) - randomized_lasso_est = np.zeros(p) - randomized_rel_lasso_est = np.zeros(p) - MLE_nreport = 0 - - if nonzero.sum() > 0: - target_randomized = beta[nonzero] - (observed_target, - cov_target, - cov_target_score, - alternatives) = debiased_targets(randomized_lasso.loglike, - randomized_lasso._W, - nonzero, - penalty=randomized_lasso.penalty, - dispersion=dispersion) - MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target, - cov_target, - cov_target_score, - alternatives) - sel_MLE[nonzero] = MLE_estimate - ind_est[nonzero] = ind_unbiased_estimator - randomized_lasso_est = randomized_lasso.initial_soln - randomized_rel_lasso_est = randomized_lasso._beta_full - - cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero]) - length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0]) - power_MLE = ((active_rand_bool) * ( - np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum()) - MLE_discoveries = BHfilter(MLE_pval, q=0.1) - power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum()) - fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.)) - bias_MLE = np.mean(MLE_estimate - target_randomized) - - partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized) - partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot( - ind_unbiased_estimator - target_randomized) - partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot( - randomized_lasso_est[nonzero] - target_randomized) - partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot( - randomized_rel_lasso_est[nonzero] - target_randomized) - else: - MLE_nreport = 1 - cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., - 0., 0.] - MLE_discoveries = np.zeros(1) - partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.] - - risks = np.vstack((relative_risk(sel_MLE, beta, Sigma), - relative_risk(ind_est, beta, Sigma), - relative_risk(randomized_lasso_est, beta, Sigma), - relative_risk(randomized_rel_lasso_est, beta, Sigma), - relative_risk(rel_LASSO, beta, Sigma), - relative_risk(glm_LASSO, beta, Sigma))) - - partial_risks = np.vstack((partial_MLE_risk, - partial_ind_risk, - partial_randLasso_risk, - partial_relrandLasso_risk, - partial_relLasso_risk, - partial_Lasso_risk)) - - naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power, - power_naive, power_naive_BH, fdr_naive_BH, naive_discoveries.sum())) - Lee_inf = np.zeros((10,1)) - Liu_inf = np.zeros((10,1)) - MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, - power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum())) - nreport = np.vstack((Lee_nreport, 0., MLE_nreport)) - return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport)) - - -def compare_sampler_MLE(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20, target= "selected", - randomizer_scale=np.sqrt(0.50), full_dispersion=True, tuning_rand="lambda.1se"): - - X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr) - print("snr", snr) - X -= X.mean(0)[None, :] - X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.))) - y = y - y.mean() - true_set = np.asarray([u for u in range(p) if beta[u] != 0]) - - if full_dispersion: - dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p) - sigma_ = np.sqrt(dispersion) - else: - dispersion = None - sigma_ = np.std(y) - print("estimated and true sigma", sigma, sigma_) - - lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0)) - _, _, _, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory / float(n)) - - if tuning_rand == "lambda.min": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=n * lam_min * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - elif tuning_rand == "lambda.1se": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=n * lam_1se * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - elif tuning_rand == "lambda.theory": - randomized_lasso = lasso.gaussian(X, - y, - feature_weights=lam_theory * np.ones(p), - randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_) - - else: - raise ValueError('lambda choice not specified correctly') - - signs = randomized_lasso.fit() - nonzero = signs != 0 - sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n") - active_set_rand = np.asarray([t for t in range(p) if nonzero[t]]) - active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], - np.bool) - nreport = 0. - - if nonzero.sum() > 0: - if target == "full": - target_randomized = beta[nonzero] - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(randomized_lasso.loglike, - randomized_lasso._W, - nonzero, - dispersion=dispersion) - elif target == "selected": - target_randomized = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(randomized_lasso.loglike, - randomized_lasso._W, - nonzero, - dispersion=dispersion) - else: - raise ValueError('not a valid specification of target') - toc = time.time() - MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target, - cov_target, - cov_target_score, - alternatives) - tic = time.time() - time_MLE = tic - toc - - cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero]) - length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0]) - power_MLE = ((active_rand_bool) * ( - np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum()) - MLE_discoveries = BHfilter(MLE_pval, q=0.1) - power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum()) - fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.)) - bias_MLE = np.mean(MLE_estimate - target_randomized) - - toc = time.time() - _, sampler_pval, sampler_intervals = randomized_lasso.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - level=0.9, compute_intervals=True, ndraw=100000) - tic = time.time() - time_sampler = tic - toc - - cov_sampler, selective_sampler_power = coverage(sampler_intervals, sampler_pval, target_randomized, beta[nonzero]) - length_sampler = np.mean(sampler_intervals[:, 1] - sampler_intervals[:, 0]) - power_sampler = ((active_rand_bool) * (np.logical_or((0. < sampler_intervals[:, 0]), - (0. > sampler_intervals[:, 1])))).sum() / float((beta != 0).sum()) - sampler_discoveries = BHfilter(sampler_pval, q=0.1) - power_sampler_BH = (sampler_discoveries * active_rand_bool).sum() / float((beta != 0).sum()) - fdr_sampler_BH = (sampler_discoveries * ~active_rand_bool).sum() / float(max(sampler_discoveries.sum(), 1.)) - bias_randLASSO = np.mean(randomized_lasso.initial_soln[nonzero] - target_randomized) - - else: - nreport += 1 - cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power, time_MLE = [0., 0., 0., 0., 0., 0., 0., 0.] - cov_sampler, length_sampler, power_sampler, power_sampler_BH, fdr_sampler_BH, bias_randLASSO, selective_sampler_power, time_sampler = [0., 0., 0., 0., 0., 0., 0., 0.] - MLE_discoveries = np.zeros(1) - sampler_discoveries = np.zeros(1) - - MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, time_MLE, - power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum())) - - sampler_inf = np.vstack((cov_sampler, length_sampler, 0., nonzero.sum(), bias_randLASSO, selective_sampler_power, time_sampler, - power_sampler, power_sampler_BH, fdr_sampler_BH, sampler_discoveries.sum())) - - return np.vstack((MLE_inf, sampler_inf, nreport)) - - - - - - - - - diff --git a/doc/learning_examples/BH/gbm_targets_BH.py b/doc/learning_examples/BH/gbm_targets_BH.py index 7d107c109..f9fd6150b 100644 --- a/doc/learning_examples/BH/gbm_targets_BH.py +++ b/doc/learning_examples/BH/gbm_targets_BH.py @@ -5,11 +5,11 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import normal_sampler, gbm_fit_sk -from selection.learning.learners import mixture_learner +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, gbm_fit_sk +from selectinf.learning.learners import mixture_learner mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10] def BHfilter(pval, q=0.2): @@ -22,9 +22,7 @@ def BHfilter(pval, q=0.2): return np.nonzero(pval <= thresh)[0] return [] -def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000): - - # description of statistical problem +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): X, y, truth = gaussian_instance(n=n, p=p, @@ -36,6 +34,23 @@ def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000): random_signs=True, scale=False)[:3] + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + XTX = X.T.dot(X) XTXi = np.linalg.inv(XTX) resid = y - X.dot(XTXi.dot(X.T.dot(y))) @@ -81,19 +96,35 @@ def meta_algorithm(XTX, XTXi, dispersion, lam, sampler): import matplotlib.pyplot as plt import pandas as pd - for i in range(500): - df = simulate(B=40000) - csvfile = 'gbm_targets_BH.csv' + U = np.linspace(0, 1, 101) + plt.clf() + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=20000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' outbase = csvfile[:-4] - if df is not None and i > 0: + if df is not None: - try: # concatenate to disk + try: df = pd.concat([df, pd.read_csv(csvfile)]) except FileNotFoundError: pass df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_ax, length_ax = pivot_plot(df, outbase) + f = pivot_plot(df, outbase)[1] + plt.close(f) diff --git a/doc/learning_examples/BH/gbm_targets_BH_single.py b/doc/learning_examples/BH/gbm_targets_BH_single.py new file mode 100644 index 000000000..bc13e149d --- /dev/null +++ b/doc/learning_examples/BH/gbm_targets_BH_single.py @@ -0,0 +1,119 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, gbm_fit_sk +from selectinf.learning.learners import mixture_learner +mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10] + +def BHfilter(pval, q=0.2): + pval = np.asarray(pval) + pval_sort = np.sort(pval) + comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0] + passing = pval_sort < comparison + if passing.sum(): + thresh = comparison[np.nonzero(passing)[0].max()] + return np.nonzero(pval <= thresh)[0] + return [] + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000): + + # description of statistical problem + + X, y, truth = generate(n=n, p=p, s=s, signal=signal, sigma=sigma) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(XTX, XTXi, dispersion, lam, sampler): + global counter + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + soln = XTXi.dot(noisy_S) + solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion)) + pval = ndist.cdf(solnZ) + pval = 2 * np.minimum(pval, 1 - pval) + return set(BHfilter(pval, q=0.2)) + + lam = 4. * np.sqrt(n) + selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(1, 1), + B=B, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':500}, + how_many=1) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + df['R2'] = np.ones(df.shape[0]) * R2mean + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/BH/gbm_targets_BH_single_5000.py b/doc/learning_examples/BH/gbm_targets_BH_single_5000.py new file mode 100644 index 000000000..97891ef2e --- /dev/null +++ b/doc/learning_examples/BH/gbm_targets_BH_single_5000.py @@ -0,0 +1,119 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, gbm_fit_sk +from selectinf.learning.learners import mixture_learner +mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10] + +def BHfilter(pval, q=0.2): + pval = np.asarray(pval) + pval_sort = np.sort(pval) + comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0] + passing = pval_sort < comparison + if passing.sum(): + thresh = comparison[np.nonzero(passing)[0].max()] + return np.nonzero(pval <= thresh)[0] + return [] + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000): + + # description of statistical problem + + X, y, truth = generate(n=n, p=p, s=s, signal=signal, sigma=sigma) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(XTX, XTXi, dispersion, lam, sampler): + global counter + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + soln = XTXi.dot(noisy_S) + solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion)) + pval = ndist.cdf(solnZ) + pval = 2 * np.minimum(pval, 1 - pval) + return set(BHfilter(pval, q=0.2)) + + lam = 4. * np.sqrt(n) + selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(1, 1), + B=B, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':500}, + how_many=1) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=5000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + df['R2'] = np.ones(df.shape[0]) * R2mean + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/BH/logit_targets_BH_single_5000.py b/doc/learning_examples/BH/logit_targets_BH_single_5000.py new file mode 100644 index 000000000..48e9a57d6 --- /dev/null +++ b/doc/learning_examples/BH/logit_targets_BH_single_5000.py @@ -0,0 +1,120 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler +from selectinf.learning.Rfitters import logit_fit +from selectinf.learning.learners import mixture_learner +mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10] + +def BHfilter(pval, q=0.2): + pval = np.asarray(pval) + pval_sort = np.sort(pval) + comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0] + passing = pval_sort < comparison + if passing.sum(): + thresh = comparison[np.nonzero(passing)[0].max()] + return np.nonzero(pval <= thresh)[0] + return [] + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000): + + # description of statistical problem + + X, y, truth = generate(n=n, p=p, s=s, signal=signal, sigma=sigma) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(XTX, XTXi, dispersion, lam, sampler): + global counter + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + soln = XTXi.dot(noisy_S) + solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion)) + pval = ndist.cdf(solnZ) + pval = 2 * np.minimum(pval, 1 - pval) + return set(BHfilter(pval, q=0.2)) + + lam = 4. * np.sqrt(n) + selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(1, 1), + B=B, + fit_probability=logit_fit, + fit_args={'df':20}, + how_many=1) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=5000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + df['R2'] = np.ones(df.shape[0]) * R2mean + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/cross_inference/cross_inference.py b/doc/learning_examples/cross_inference/cross_inference.py index 9383e69ee..90000e99e 100644 --- a/doc/learning_examples/cross_inference/cross_inference.py +++ b/doc/learning_examples/cross_inference/cross_inference.py @@ -1,7 +1,7 @@ import numpy as np -from selection.learning.core import cross_inference -from selection.learning.keras_fit import keras_fit +from selectinf.learning.core import cross_inference +from selectinf.learning.core import keras_fit data = np.load('lasso_multi_learning.npz') learning_data = (data['T'][:2000], data['Y'][:2000]) diff --git a/doc/learning_examples/knockoffs/knockoff_followup.py b/doc/learning_examples/knockoffs/knockoff_followup.py index 3978af5f3..a19fc6e3c 100644 --- a/doc/learning_examples/knockoffs/knockoff_followup.py +++ b/doc/learning_examples/knockoffs/knockoff_followup.py @@ -5,19 +5,16 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.Rutils import lasso_glmnet -from selection.learning.utils import (full_model_inference, - pivot_plot, - naive_full_model_inference) -from selection.learning.core import split_sampler, keras_fit +from selectinf.learning.Rutils import lasso_glmnet +from selectinf.learning.utils import (full_model_inference, + pivot_plot, + split_full_model_inference) +from selectinf.learning.core import normal_sampler, keras_fit -def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0): +def generate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, **ignored): - # description of statistical problem - - np.random.seed(seed) X, y, truth = gaussian_instance(n=n, p=p, s=s, @@ -26,8 +23,24 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0): sigma=sigma, signal=signal, random_signs=True, - scale=False, - center=False)[:3] + scale=False)[:3] + + return X, y, truth + +def simulate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), + sigma=2, alpha=0.1,B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] dispersion = sigma**2 @@ -35,12 +48,12 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0): covS = dispersion * X.T.dot(X) smooth_sampler = normal_sampler(S, covS) + def meta_algorithm(X, XTXi, resid, sampler): n, p = X.shape - idx = np.random.choice(np.arange(n), 200, replace=False) - + idx = np.random.choice(np.arange(n), int(n/2), replace=False) S = sampler(scale=0.) # deterministic with scale=0 ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X Xidx, yidx = X[idx], y[idx] @@ -66,45 +79,25 @@ def meta_algorithm(X, XTXi, resid, sampler): y, truth, selection_algorithm, - splitting_sampler, + smooth_sampler, success_params=(8, 10), B=B, fit_probability=keras_fit, - fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}, - fit_args={'df':20}) + fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) if df is not None: + idx2 = np.random.choice(np.arange(n), int(n/2), replace=False) observed_set = list(df['variable']) - true_target = truth[observed_set] - - np.random.seed(seed) - X2, _, _ = gaussian_instance(n=n, - p=p, - s=s, - equicorrelated=False, - rho=0.5, - sigma=sigma, - signal=signal, - random_signs=True, - center=False, - scale=False)[:3] - stage_1 = np.random.choice(np.arange(n), 200, replace=False) - stage_2 = sorted(set(range(n)).difference(stage_1)) - X2 = X2[stage_2] - y2 = X2.dot(truth) + sigma * np.random.standard_normal(X2.shape[0]) - - XTXi_2 = np.linalg.inv(X2.T.dot(X2)) - resid2 = y2 - X2.dot(XTXi_2.dot(X2.T.dot(y2))) - dispersion_2 = np.linalg.norm(resid2)**2 / (X2.shape[0] - X2.shape[1]) - - naive_df = naive_full_model_inference(X2, - y2, - dispersion_2, + split_df = split_full_model_inference(X, + y, + idx2, + None, # ignored dispersion + truth, observed_set, alpha=alpha) - df = pd.merge(df, naive_df, on='variable') + df = pd.merge(df, split_df, on='variable') return df if __name__ == "__main__": @@ -112,13 +105,27 @@ def meta_algorithm(X, XTXi, resid, sampler): import matplotlib.pyplot as plt import pandas as pd - iseed = int(np.fabs(np.random.standard_normal() * 1000)) - for i in range(500): - df = simulate(seed=i + iseed) - csvfile = 'knockoff_followup.csv' + opts = dict(n=2000, p=100, s=10, + signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, + alpha=0.1, B=3000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '_2000_idx.csv' outbase = csvfile[:-4] - if df is not None and i > 0: + if df is not None: try: df = pd.concat([df, pd.read_csv(csvfile)]) @@ -127,5 +134,6 @@ def meta_algorithm(X, XTXi, resid, sampler): df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_plot(df, outbase) + f = pivot_plot(df, outbase)[1] + plt.close(f) diff --git a/doc/learning_examples/knockoffs/knockoff_followup_6000.py b/doc/learning_examples/knockoffs/knockoff_followup_6000.py new file mode 100644 index 000000000..57a8d8649 --- /dev/null +++ b/doc/learning_examples/knockoffs/knockoff_followup_6000.py @@ -0,0 +1,141 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.Rutils import lasso_glmnet +from selectinf.learning.utils import (full_model_inference, + pivot_plot, + split_full_model_inference) +from selectinf.learning.core import normal_sampler, keras_fit +from selectinf.learning.fitters import gbm_fit_sk + +def generate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), + sigma=2, alpha=0.1,B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + + def meta_algorithm(X, XTXi, resid, sampler): + + n, p = X.shape + idx = np.random.choice(np.arange(n), int(n/2), replace=False) + + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + Xidx, yidx = X[idx], y[idx] + rho = 0.8 + + Xnew = rho * Xidx + np.sqrt(1 - rho**2) * np.random.standard_normal(Xidx.shape) + + X_full = np.hstack([Xidx, Xnew]) + beta_full = np.linalg.pinv(X_full).dot(yidx) + winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:] + return set(np.nonzero(winners)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + + # run selection algorithm + + df = full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(8, 10), + B=B, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':1000} + ) + + if df is not None: + + observed_set = list(df['variable']) + idx2 = np.random.choice(np.arange(n), int(n/2), replace=False) + split_df = split_full_model_inference(X, + y, + idx2, + None, # ignored dispersion + truth, + observed_set, + alpha=alpha) + + df = pd.merge(df, split_df, on='variable') + return df + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + opts = dict(n=2000, p=100, s=10, + signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, + alpha=0.1, B=6000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '_gbm.csv' + outbase = csvfile[:-4] + + if df is not None: + + try: + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/knockoffs/knockoff_kernel.py b/doc/learning_examples/knockoffs/knockoff_kernel.py index 1ac91d8c7..d979566a9 100644 --- a/doc/learning_examples/knockoffs/knockoff_kernel.py +++ b/doc/learning_examples/knockoffs/knockoff_kernel.py @@ -14,7 +14,6 @@ def simulate(n=1000, p=50, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B= # description of statistical problem - np.random.seed(seed) X, y, truth = gaussian_instance(n=n, p=p, s=s, diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi.py index a6e438cdd..2fdac03b5 100644 --- a/doc/learning_examples/knockoffs/knockoff_kernel_multi.py +++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi.py @@ -5,16 +5,13 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import normal_sampler, keras_fit +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit -def simulate(n=1000, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=5000): +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): - # description of statistical problem - - np.random.seed(seed) X, y, truth = gaussian_instance(n=n, p=p, s=s, @@ -23,8 +20,23 @@ def simulate(n=1000, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B sigma=sigma, signal=signal, random_signs=True, - scale=False, - center=False)[:3] + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] dispersion = sigma**2 @@ -71,10 +83,23 @@ def meta_algorithm(X, XTXi, resid, sampler): import matplotlib.pyplot as plt import pandas as pd + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + iseed = int(np.fabs(np.random.standard_normal() * 50000)) - for i in range(500): - df = simulate(seed=i + iseed, B=3000) - csvfile = 'knockoff_kernel_multi.csv' + for i in range(2000): + df = simulate(**opts) + csvfile = __file__[:-3] + '_200.csv' outbase = csvfile[:-4] if df is not None and i > 0: @@ -86,6 +111,6 @@ def meta_algorithm(X, XTXi, resid, sampler): df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_ax, length_ax = pivot_plot(df, outbase) - + f = pivot_plot(df, outbase)[1] + plt.close(f) diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py new file mode 100644 index 000000000..031cc0fb5 --- /dev/null +++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py @@ -0,0 +1,117 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(X, XTXi, resid, sampler): + + n, p = X.shape + + rho = 0.8 + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape) + + X_full = np.hstack([X, Xnew]) + beta_full = np.linalg.pinv(X_full).dot(ynew) + winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:] + return set(np.nonzero(winners)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(8, 10), + B=B, + fit_probability=keras_fit, + fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), + sigma=2, alpha=0.1, B=5000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + + iseed = int(np.fabs(np.random.standard_normal() * 50000)) + for i in range(2000): + df = simulate(**opts) + csvfile = __file__[:-3] + '_200.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py new file mode 100644 index 000000000..8b4035d26 --- /dev/null +++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py @@ -0,0 +1,117 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(X, XTXi, resid, sampler): + + n, p = X.shape + + rho = 0.8 + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape) + + X_full = np.hstack([X, Xnew]) + beta_full = np.linalg.pinv(X_full).dot(ynew) + winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:] + return set(np.nonzero(winners)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(8, 10), + B=B, + fit_probability=keras_fit, + fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + opts = dict(n=2000, p=100, s=10, signal=(0.5, 1), + sigma=2, alpha=0.1, B=8000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + + iseed = int(np.fabs(np.random.standard_normal() * 50000)) + for i in range(2000): + df = simulate(**opts) + csvfile = __file__[:-3] + '_2000.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py new file mode 100644 index 000000000..4f834ec7b --- /dev/null +++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py @@ -0,0 +1,90 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=3000): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False, + center=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(X, XTXi, resid, sampler): + + n, p = X.shape + + rho = 0.8 + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape) + + X_full = np.hstack([X, Xnew]) + beta_full = np.linalg.pinv(X_full).dot(ynew) + winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:] + return set(np.nonzero(winners)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(8, 10), + B=B, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':1000}) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + iseed = int(np.fabs(np.random.standard_normal() * 50000)) + for i in range(2000): + df = simulate(seed=i + iseed, B=3000) + csvfile = 'knockoff_kernel_multi_gbm.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + pivot_plot(df, outbase) + + diff --git a/doc/learning_examples/lasso_CV/lasso_example_CV.py b/doc/learning_examples/lasso_CV/lasso_example_CV.py index ad08a05a9..f0b6fa0f5 100644 --- a/doc/learning_examples/lasso_CV/lasso_example_CV.py +++ b/doc/learning_examples/lasso_CV/lasso_example_CV.py @@ -5,11 +5,11 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import split_sampler, probit_fit -from selection.learning.Rutils import lasso_glmnet +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, probit_fit +from selectinf.learning.Rutils import lasso_glmnet def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1): diff --git a/doc/learning_examples/multi_target/followup_multi.py b/doc/learning_examples/multi_target/followup_multi.py index aa16ded9f..95fe1208c 100644 --- a/doc/learning_examples/multi_target/followup_multi.py +++ b/doc/learning_examples/multi_target/followup_multi.py @@ -5,17 +5,18 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot, naive_full_model_inference -from selection.learning.core import normal_sampler, keras_fit -from selection.learning.Rutils import lasso_glmnet +from selectinf.learning.utils import (full_model_inference, + pivot_plot, + split_full_model_inference) +from selectinf.learning.core import normal_sampler, keras_fit +from selectinf.learning.Rutils import lasso_glmnet -def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=2000): +def simulate(n=1000, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000): # description of statistical problem - np.random.seed(seed) X, y, truth = gaussian_instance(n=n, p=p, s=s, @@ -33,10 +34,11 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B= covS = dispersion * X.T.dot(X) smooth_sampler = normal_sampler(S, covS) - def meta_algorithm(X, XTXi, resid, sampler): + idx = np.random.choice(np.arange(n), int(n/2), replace=False) + + def meta_algorithm(X, XTXi, resid, idx, sampler): n, p = X.shape - idx = np.random.choice(np.arange(n), 200, replace=False) S = sampler(scale=0.) # deterministic with scale=0 ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X @@ -50,7 +52,7 @@ def meta_algorithm(X, XTXi, resid, sampler): resid = y - X.dot(XTXi.dot(X.T.dot(y))) dispersion = np.linalg.norm(resid)**2 / (n-p) - selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, idx) # run selection algorithm @@ -68,35 +70,15 @@ def meta_algorithm(X, XTXi, resid, sampler): if df is not None: observed_set = list(df['variable']) - true_target = truth[observed_set] - - np.random.seed(seed) - X2, _, _ = gaussian_instance(n=n, - p=p, - s=s, - equicorrelated=False, - rho=0.5, - sigma=sigma, - signal=signal, - random_signs=True, - center=False, - scale=False)[:3] - stage_1 = np.random.choice(np.arange(n), 200, replace=False) - stage_2 = sorted(set(range(n)).difference(stage_1)) - X2 = X2[stage_2] - y2 = X2.dot(truth) + sigma * np.random.standard_normal(X2.shape[0]) - - XTXi_2 = np.linalg.inv(X2.T.dot(X2)) - resid2 = y2 - X2.dot(XTXi_2.dot(X2.T.dot(y2))) - dispersion_2 = np.linalg.norm(resid2)**2 / (X2.shape[0] - X2.shape[1]) - - naive_df = naive_full_model_inference(X2, - y2, - dispersion_2, + split_df = split_full_model_inference(X, + y, + idx, + dispersion, + truth, observed_set, alpha=alpha) - df = pd.merge(df, naive_df, on='variable') + df = pd.merge(df, split_df, on='variable') return df if __name__ == "__main__": @@ -104,10 +86,9 @@ def meta_algorithm(X, XTXi, resid, sampler): import matplotlib.pyplot as plt import pandas as pd - iseed = int(np.fabs(np.random.standard_normal() * 1000)) for i in range(500): - df = simulate(seed=i+iseed, B=2000) - csvfile = 'followup_multi.csv' + df = simulate(B=3000) + csvfile = __file__[:-3] + '.csv' outbase = csvfile[:-4] if df is not None and i > 0: @@ -119,6 +100,7 @@ def meta_algorithm(X, XTXi, resid, sampler): df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_plot(df, outbase) + f = pivot_plot(df, outbase)[1] + plt.close(f) diff --git a/doc/learning_examples/multi_target/lasso_multi.py b/doc/learning_examples/multi_target/lasso_multi.py new file mode 100644 index 000000000..ba3754c8b --- /dev/null +++ b/doc/learning_examples/multi_target/lasso_multi.py @@ -0,0 +1,120 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk +from selectinf.learning.Rutils import lasso_glmnet + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(X, XTXi, resid, lam, sampler): + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0) + problem = rr.simple_problem(loss, pen) + soln = problem.solve(max_its=100, tol=1.e-10) + success += soln != 0 + return set(np.nonzero(success)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + lam = 4. * np.sqrt(n) + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, lam) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(1, 1), + B=B, + fit_probability=keras_fit, + fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) + + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + U = np.linspace(0, 1, 101) + plt.clf() + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None: + + try: + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/multi_target/lasso_example_multi_CV.py b/doc/learning_examples/multi_target/lasso_multi_CV.py similarity index 85% rename from doc/learning_examples/multi_target/lasso_example_multi_CV.py rename to doc/learning_examples/multi_target/lasso_multi_CV.py index 7daf55c83..14d407608 100644 --- a/doc/learning_examples/multi_target/lasso_example_multi_CV.py +++ b/doc/learning_examples/multi_target/lasso_multi_CV.py @@ -5,11 +5,11 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import split_sampler, keras_fit -from selection.learning.Rutils import lasso_glmnet +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, keras_fit +from selectinf.learning.Rutils import lasso_glmnet def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): @@ -48,6 +48,8 @@ def meta_algorithm(X, XTXi, resid, sampler): # run selection algorithm + print('SNR', np.linalg.norm(X.dot(truth)) / np.linalg.norm(y-X.dot(truth))) + print('R2', 1 - np.linalg.norm(y-X.dot(truth))**2 / np.linalg.norm(y)**2) return full_model_inference(X, y, truth, @@ -66,7 +68,7 @@ def meta_algorithm(X, XTXi, resid, sampler): U = np.linspace(0, 1, 101) plt.clf() - for i in range(500): + for i in range(1000): df = simulate() csvfile = 'lasso_multi_CV.csv' outbase = csvfile[:-4] diff --git a/doc/learning_examples/multi_target/lasso_example_multi_bigger.py b/doc/learning_examples/multi_target/lasso_multi_CV_bigger.py similarity index 60% rename from doc/learning_examples/multi_target/lasso_example_multi_bigger.py rename to doc/learning_examples/multi_target/lasso_multi_CV_bigger.py index 19cabbf6c..4f43caa7e 100644 --- a/doc/learning_examples/multi_target/lasso_example_multi_bigger.py +++ b/doc/learning_examples/multi_target/lasso_multi_CV_bigger.py @@ -5,10 +5,11 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import split_sampler, keras_fit +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk +from selectinf.learning.Rutils import lasso_glmnet def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000): @@ -29,31 +30,22 @@ def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000): S = X.T.dot(y) covS = dispersion * X.T.dot(X) smooth_sampler = normal_sampler(S, covS) - splitting_sampler = split_sampler(X * y[:, None], covS) - def meta_algorithm(XTX, XTXi, lam, sampler): + def meta_algorithm(X, XTXi, resid, sampler): - p = XTX.shape[0] - success = np.zeros(p) - - loss = rr.quadratic_loss((p,), Q=XTX) - pen = rr.l1norm(p, lagrange=lam) - - scale = 0. - noisy_S = sampler(scale=scale) - loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0) - problem = rr.simple_problem(loss, pen) - soln = problem.solve(max_its=100, tol=1.e-10) - success += soln != 0 - return set(np.nonzero(success)[0]) + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + G = lasso_glmnet(X, ynew, *[None]*4) + select = G.select() + print(select) + return set(list(select[0])) XTX = X.T.dot(X) XTXi = np.linalg.inv(XTX) resid = y - X.dot(XTXi.dot(X.T.dot(y))) dispersion = np.linalg.norm(resid)**2 / (n-p) - lam = 5. * np.sqrt(n) - selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam) + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) # run selection algorithm @@ -61,11 +53,11 @@ def meta_algorithm(XTX, XTXi, lam, sampler): y, truth, selection_algorithm, - splitting_sampler, + smooth_sampler, success_params=(1, 1), B=B, - fit_probability=logit_fit, - fit_args={'df':20}) + fit_probability=keras_fit, + fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) if __name__ == "__main__": @@ -76,9 +68,9 @@ def meta_algorithm(XTX, XTXi, lam, sampler): U = np.linspace(0, 1, 101) plt.clf() - for i in range(500): - df = simulate(B=4000) - csvfile = 'lasso_multi_bigger.csv' + for i in range(2000): + df = simulate(B=3000) + csvfile = __file__[:-3] + '.csv' outbase = csvfile[:-4] if df is not None and i > 0: @@ -90,4 +82,4 @@ def meta_algorithm(XTX, XTXi, lam, sampler): df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_ax, length_ax = pivot_plot(df, outbase) + pivot_plot(df, outbase) diff --git a/doc/learning_examples/multi_target/lasso_multi_CV_gbm.py b/doc/learning_examples/multi_target/lasso_multi_CV_gbm.py new file mode 100644 index 000000000..73e4f14a8 --- /dev/null +++ b/doc/learning_examples/multi_target/lasso_multi_CV_gbm.py @@ -0,0 +1,84 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, gbm_fit_sk +from selectinf.learning.Rutils import lasso_glmnet + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + splitting_sampler = split_sampler(X * y[:, None], covS) + + def meta_algorithm(X, XTXi, resid, sampler): + + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + G = lasso_glmnet(X, ynew, *[None]*4) + select = G.select() + return set(list(select[0])) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + splitting_sampler, + success_params=(1, 1), + B=B, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':1000}) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + U = np.linspace(0, 1, 101) + plt.clf() + + for i in range(2000): + df = simulate() + csvfile = 'lasso_multi_CV_gbm.csv' + outbase = csvfile[:-4] + + if df is not None: + + try: + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + pivot_plot(df, outbase) + diff --git a/doc/learning_examples/multi_target/lasso_multi_CV_split.py b/doc/learning_examples/multi_target/lasso_multi_CV_split.py new file mode 100644 index 000000000..dfa17a801 --- /dev/null +++ b/doc/learning_examples/multi_target/lasso_multi_CV_split.py @@ -0,0 +1,149 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, keras_fit +from selectinf.learning.Rutils import lasso_glmnet +from rpy2.robjects import numpy2ri +import rpy2.robjects as rpy + +class lasso_glmnet_split(lasso_glmnet): + + def select(self, CV=True, seed=0): + + numpy2ri.activate() + + rpy.r.assign('X', self.X.copy()) + rpy.r.assign('Y', self.Y.copy()) + rpy.r('X = as.matrix(X)') + rpy.r('Y = as.numeric(Y)') + rpy.r('n = nrow(X)') + rpy.r('split_ = sample(1:n, n/2, replace=FALSE)') + rpy.r('Xsplit_ = X[split_,]') + rpy.r('Ysplit_ = Y[split_]') + rpy.r('set.seed(%d)' % seed) + rpy.r('cvG = cv.glmnet(Xsplit_, Ysplit_, intercept=FALSE, standardize=FALSE)') + rpy.r("L1 = cvG[['lambda.min']]") + rpy.r("L2 = cvG[['lambda.1se']]") + if CV: + rpy.r("L = L1") + else: + rpy.r("L = 0.99 * L2") + rpy.r("G = glmnet(X, Y, intercept=FALSE, standardize=FALSE)") + n, p = self.X.shape + L = rpy.r('L') + rpy.r('B = as.numeric(coef(G, s=L, exact=TRUE, x=X, y=Y))[-1]') + B = np.asarray(rpy.r('B')) + selected = (B != 0) + numpy2ri.deactivate() + if selected.sum(): + V = np.nonzero(selected)[0] + return V, V + else: + return [], [] + + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + splitting_sampler = split_sampler(X * y[:, None], covS) + + def meta_algorithm(X, XTXi, resid, sampler): + + S = sampler(scale=0.) # deterministic with scale=0 + ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X + G = lasso_glmnet_split(X, ynew, *[None]*4) + select = G.select() + return set(list(select[0])) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + splitting_sampler, + success_params=(1, 1), + B=B, + fit_probability=keras_fit, + fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + U = np.linspace(0, 1, 101) + plt.clf() + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None: + + try: + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/multi_target/lasso_multi_bigger.py b/doc/learning_examples/multi_target/lasso_multi_bigger.py new file mode 100644 index 000000000..e7f86b13d --- /dev/null +++ b/doc/learning_examples/multi_target/lasso_multi_bigger.py @@ -0,0 +1,135 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk +from selectinf.learning.Rutils import lasso_glmnet + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(X, XTXi, resid, lam, sampler): + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0) + problem = rr.simple_problem(loss, pen) + soln = problem.solve(max_its=100, tol=1.e-10) + success += soln != 0 + return set(np.nonzero(success)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + lam = 5. * np.sqrt(n) + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, lam) + + # run selection algorithm + + print('SNR', np.linalg.norm(X.dot(truth)) / np.linalg.norm(y-X.dot(truth))) + print('R2', 1 - np.linalg.norm(y-X.dot(truth))**2 / np.linalg.norm(y)**2) + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(1, 1), + B=B, + fit_probability=keras_fit, + fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) + + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + U = np.linspace(0, 1, 101) + plt.clf() + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None: + + try: + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/multi_target/lasso_multi_logit.py b/doc/learning_examples/multi_target/lasso_multi_logit.py new file mode 100644 index 000000000..94f9cd4d1 --- /dev/null +++ b/doc/learning_examples/multi_target/lasso_multi_logit.py @@ -0,0 +1,134 @@ +import functools + +import numpy as np +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk +from selectinf.learning.Rutils import lasso_glmnet +from selectinf.learning.Rfitters import logit_fit + +def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored): + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + return X, y, truth + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000): + + # description of statistical problem + + X, y, truth = generate(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + smooth_sampler = normal_sampler(S, covS) + + def meta_algorithm(X, XTXi, resid, lam, sampler): + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0) + problem = rr.simple_problem(loss, pen) + soln = problem.solve(max_its=100, tol=1.e-10) + success += soln != 0 + return set(np.nonzero(success)[0]) + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + lam = 4. * np.sqrt(n) + selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, lam) + + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + smooth_sampler, + success_params=(1, 1), + B=B, + fit_probability=logit_fit, + fit_args={'df':20}) + + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + U = np.linspace(0, 1, 101) + plt.clf() + + opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000) + + R2 = [] + for _ in range(100): + + X, y, truth = generate(**opts) + R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2)) + + R2 = np.array(R2) + R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1]) + print('R2', R2mean) + + for i in range(5000): + df = simulate(**opts) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None: + + try: + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + f = pivot_plot(df, outbase)[1] + plt.close(f) + diff --git a/doc/learning_examples/multi_target/lee_multi.py b/doc/learning_examples/multi_target/lee_multi.py index d81ff4cb1..2bf5a4eee 100644 --- a/doc/learning_examples/multi_target/lee_multi.py +++ b/doc/learning_examples/multi_target/lee_multi.py @@ -5,12 +5,13 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import (partial_model_inference, - pivot_plot, - lee_inference) -from selection.learning.core import normal_sampler, keras_fit +from selectinf.learning.utils import (partial_model_inference, + pivot_plot, + lee_inference) +from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk +from selectinf.learning.learners import sparse_mixture_learner def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=8000): @@ -63,11 +64,12 @@ def meta_algorithm(XTX, XTXi, lam, sampler): truth, selection_algorithm, smooth_sampler, - fit_probability=keras_fit, - fit_args={'epochs':30, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':1000}, success_params=(1, 1), B=B, - alpha=alpha) + alpha=alpha, + learner_klass=sparse_mixture_learner) lee_df = lee_inference(X, y, diff --git a/doc/learning_examples/stability/stability_selection_harder.py b/doc/learning_examples/stability/stability_selection_harder.py index 2ac1a1903..f13a9006a 100644 --- a/doc/learning_examples/stability/stability_selection_harder.py +++ b/doc/learning_examples/stability/stability_selection_harder.py @@ -5,11 +5,11 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import split_sampler, keras_fit +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, keras_fit from sklearn.linear_model import lasso_path @@ -83,9 +83,9 @@ def _alpha_grid(X, y, center, XTX): import matplotlib.pyplot as plt import pandas as pd - for i in range(500): + for i in range(2000): df = simulate(B=3000) - csvfile = 'stability_selection_harder.csv' + csvfile = __file__[:-3] + '.csv' outbase = csvfile[:-4] if df is not None and i > 0: @@ -97,6 +97,6 @@ def _alpha_grid(X, y, center, XTX): df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_ax, length_ax = pivot_plot(df, outbase) + pivot_plot(df, outbase) diff --git a/doc/learning_examples/stability/stability_selection_harder_5000.py b/doc/learning_examples/stability/stability_selection_harder_5000.py new file mode 100644 index 000000000..33943a72f --- /dev/null +++ b/doc/learning_examples/stability/stability_selection_harder_5000.py @@ -0,0 +1,102 @@ +import functools, uuid + +import numpy as np, pandas as pd +from scipy.stats import norm as ndist + +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance + + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, keras_fit + +from sklearn.linear_model import lasso_path + +def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.1, + sigma=sigma, + signal=signal, + random_signs=True, + scale=True)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + splitting_sampler = split_sampler(X * y[:, None], covS) + + def meta_algorithm(XTX, XTXi, sampler): + + min_success = 6 + ntries = 10 + + def _alpha_grid(X, y, center, XTX): + n, p = X.shape + alphas, coefs, _ = lasso_path(X, y, Xy=center, precompute=XTX) + nselected = np.count_nonzero(coefs, axis=0) + return alphas[nselected < np.sqrt(0.8 * p)] + + alpha_grid = _alpha_grid(X, y, sampler(scale=0.), XTX) + success = np.zeros((p, alpha_grid.shape[0])) + + for _ in range(ntries): + scale = 1. # corresponds to sub-samples of 50% + noisy_S = sampler(scale=scale) + _, coefs, _ = lasso_path(X, y, Xy = noisy_S, precompute=XTX, alphas=alpha_grid) + success += np.abs(np.sign(coefs)) + + selected = np.apply_along_axis(lambda row: any(x>min_success for x in row), 1, success) + vars = set(np.nonzero(selected)[0]) + return vars + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi) + + # run selection algorithm + + + return full_model_inference(X, + y, + truth, + selection_algorithm, + splitting_sampler, + success_params=(1, 1), + B=B, + fit_probability=keras_fit, + fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'}) + + +if __name__ == "__main__": + import statsmodels.api as sm + import matplotlib.pyplot as plt + import pandas as pd + + for i in range(2000): + df = simulate(B=5000) + csvfile = __file__[:-3] + '.csv' + outbase = csvfile[:-4] + + if df is not None and i > 0: + + try: # concatenate to disk + df = pd.concat([df, pd.read_csv(csvfile)]) + except FileNotFoundError: + pass + df.to_csv(csvfile, index=False) + + if len(df['pivot']) > 0: + pivot_plot(df, outbase) + + diff --git a/doc/learning_examples/stability/stability_selection_harder_big.py b/doc/learning_examples/stability/stability_selection_harder_big.py index e22389e6a..9fd38d909 100644 --- a/doc/learning_examples/stability/stability_selection_harder_big.py +++ b/doc/learning_examples/stability/stability_selection_harder_big.py @@ -5,11 +5,11 @@ import regreg.api as rr -from selection.tests.instance import gaussian_instance +from selectinf.tests.instance import gaussian_instance -from selection.learning.utils import full_model_inference, pivot_plot -from selection.learning.core import split_sampler, keras_fit +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.core import split_sampler, keras_fit from sklearn.linear_model import lasso_path @@ -83,9 +83,9 @@ def _alpha_grid(X, y, center, XTX): import matplotlib.pyplot as plt import pandas as pd - for i in range(500): + for i in range(2000): df = simulate(B=3000) - csvfile = 'stability_selection_harder_big.csv' + csvfile = __file__[:-3] + '.csv' outbase = csvfile[:-4] if df is not None and i > 0: @@ -97,6 +97,6 @@ def _alpha_grid(X, y, center, XTX): df.to_csv(csvfile, index=False) if len(df['pivot']) > 0: - pivot_ax, length_ax = pivot_plot(df, outbase) + pivot_plot(df, outbase) diff --git a/doc-requirements.txt b/doc/requirements.txt similarity index 68% rename from doc-requirements.txt rename to doc/requirements.txt index 864bedd86..9833fbac5 100644 --- a/doc-requirements.txt +++ b/doc/requirements.txt @@ -1,14 +1,16 @@ # Requirements for building docs # Check these dependencies against doc/conf.py --r dev-requirements.txt +-r ../dev-requirements.txt sphinx>=1.4 numpydoc matplotlib texext nb2plots -rpy2 seaborn statsmodels -tensorflow +#tensorflow keras nbsphinx +jupytext +sphinx-book-theme +myst_nb diff --git a/doc/source/algorithms/LASSO.Rmd b/doc/source/algorithms/LASSO.Rmd new file mode 100644 index 000000000..c0ad171f7 --- /dev/null +++ b/doc/source/algorithms/LASSO.Rmd @@ -0,0 +1,111 @@ +--- +jupyter: + jupytext: + cell_metadata_filter: all,-slideshow + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.1' + jupytext_version: 1.1.1 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +# LASSO when conditioning on signs and active set + +One of the first works in this line of conditional inference +is [Lee et al.](projecteuclid.org/euclid.aos/1460381681) which +considers the LASSO (squared-error loss) and conditions +on the active set and their signs. + + +```{python collapsed=TRUE} +import numpy as np, pandas as pd +import matplotlib.pyplot as plt +import statsmodels.api as sm +# %matplotlib inline + +from selectinf.tests.instance import gaussian_instance # to generate the data +from selectinf.algorithms.api import lasso + +``` + +We will know generate some data from an OLS regression model and fit the LASSO +with a fixed value of $\lambda$. In the simulation world, we know the +true parameters, hence we can then return +pivots for each variable selected by the LASSO. These pivots should look +(marginally) like a draw from `np.random.sample`. This is the plot below. + +```{python} +np.random.seed(0) # for replicability + +def simulate(n=500, + p=100, + s=5, + signal=(5, 10), + sigma=1): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0., + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p) + L = lasso.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat) + soln = L.fit() + active_vars = soln != 0 + + if active_vars[truth != 0].sum() == s: # ensure we have screened for ease of interpretation + projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth)) + S = L.summary(truth=projected_truth) + S0 = L.summary() + + pivot = S['pval'] # these should be pivotal + pvalue = S0['pval'] + return pd.DataFrame({'pivot':pivot, + 'pvalue':pvalue}) +``` + +Let's take a look at what we get as a return value: + +```{python} +while True: + df = simulate() + if df is not None: + break +df.columns +``` + +```{python collapsed=TRUE} +dfs = [] +for i in range(200): + df = simulate() + if df is not None: + dfs.append(df) +``` + +```{python} +results = pd.concat(dfs) +import statsmodels.api as sm +thresh = 0.001 # POSSIBLE BUG? several very small pivots -- fine for pvalues +grid = np.linspace(0, 1, 101) +fig = plt.figure(figsize=(8, 8)) +plt.plot(grid, sm.distributions.ECDF(results['pivot'][results['pivot'] > thresh])(grid), 'b-', linewidth=3, label='Pivot') +plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value') +plt.plot([0, 1], [0, 1], 'k--') +plt.legend(fontsize=15); +``` + +```{python collapsed=TRUE} + +``` diff --git a/doc/source/algorithms/LASSO.ipynb b/doc/source/algorithms/LASSO.ipynb new file mode 100644 index 000000000..8c15520d4 --- /dev/null +++ b/doc/source/algorithms/LASSO.ipynb @@ -0,0 +1,194 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LASSO when conditioning on signs and active set\n", + "\n", + "One of the first works in this line of conditional inference\n", + "is [Lee et al.](projecteuclid.org/euclid.aos/1460381681) which\n", + "considers the LASSO (squared-error loss) and conditions\n", + "on the active set and their signs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np, pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "%matplotlib inline\n", + "\n", + "from selectinf.tests.instance import gaussian_instance # to generate the data\n", + "from selectinf.algorithms.api import lasso\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will know generate some data from an OLS regression model and fit the LASSO\n", + "with a fixed value of $\\lambda$. In the simulation world, we know the\n", + "true parameters, hence we can then return\n", + "pivots for each variable selected by the LASSO. These pivots should look\n", + "(marginally) like a draw from `np.random.sample`. This is the plot below." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(0) # for replicability\n", + "\n", + "def simulate(n=500, \n", + " p=100, \n", + " s=5, \n", + " signal=(5, 10), \n", + " sigma=1): \n", + "\n", + " # description of statistical problem\n", + "\n", + " X, y, truth = gaussian_instance(n=n,\n", + " p=p, \n", + " s=s,\n", + " equicorrelated=False,\n", + " rho=0., \n", + " sigma=sigma,\n", + " signal=signal,\n", + " random_signs=True,\n", + " scale=False)[:3]\n", + "\n", + " sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p)\n", + " L = lasso.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat)\n", + " soln = L.fit()\n", + " active_vars = soln != 0\n", + " \n", + " if active_vars[truth != 0].sum() == s: # ensure we have screened for ease of interpretation\n", + " projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth))\n", + " S = L.summary(truth=projected_truth)\n", + " S0 = L.summary()\n", + "\n", + " pivot = S['pval'] # these should be pivotal\n", + " pvalue = S0['pval']\n", + " return pd.DataFrame({'pivot':pivot,\n", + " 'pvalue':pvalue})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at what we get as a return value:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['pivot', 'pvalue'], dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "while True:\n", + " df = simulate()\n", + " if df is not None:\n", + " break\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "dfs = []\n", + "for i in range(200):\n", + " df = simulate()\n", + " if df is not None:\n", + " dfs.append(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeMAAAHSCAYAAADfUaMwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd1iV5RvA8e8roDgQFy4UR6GZKxO3\npgnuPSFzZcNylWlqpblN0zRnmubItMy9N6KVObCs3CaOHD8V3IoDeH5/PB4PR0hR4bznHO7PdXHF\n+7xvnBtF7vOs+zGUUgghhBDCPGnMDkAIIYRI7SQZCyGEECaTZCyEEEKYTJKxEEIIYTJJxkIIIYTJ\nJBkLIYQQJnM364Vz5MihChYsaNbLCyGEEHa1Z8+eSKWUT2L3TEvGBQsWJDw83KyXF0IIIezKMIyT\n/3VPhqmFEEIIk0kyFkIIIUwmyVgIIYQwmSRjIYQQwmSSjIUQQgiTmbaaOimuXbvGhQsXuHfvntmh\niEfw8PAgZ86cZM6c2exQhBDCKTlsMr527Rrnz5/H19eX9OnTYxiG2SGJRCiliI6O5syZMwCSkIUQ\n4ik47DD1hQsX8PX1JUOGDJKIHZhhGGTIkAFfX18uXLhgdjhCCOGUHDYZ37t3j/Tp05sdhkii9OnT\ny3SCEEI8JYdNxoD0iJ2I/F0JIcTTc+hkLIQQQqQGkoyFEEIIk0kyTkGDBg3CMIwHH3nz5qVFixYc\nO3YMgI4dOxIQEJDsr3vkyBEGDRrElStXkv1rCyGESH4Ou7XJVXh7e7Nu3ToAIiIiGDBgAIGBgezf\nv58BAwYQHR2d7K955MgRBg8eTMeOHcmSJUuyf30hhBDJ67HJ2DCMmUBD4IJSqkQi9w1gPFAfuAV0\nVEr9ntyBOit3d3cqVqwIQMWKFfHz86NatWqsWbOGVq1amRydEEIIR5CUYerZQN1H3K8H+N//eAf4\n+tnDcl1ly5YF4MSJEzbD1MePH8cwDFavXm3zfGxsLLlz56Z///4P2kJDQ6lQoQKenp7kypWLLl26\ncOPGDQDCwsJo1KgRAIUKFcIwDAoWLGiH70wIIVzH1atwcuf/ICbGLq/32GSslNoGXHrEI02A75S2\nA8hiGEae5ArQ1Zw4cQKA3Llz27QXKlSI8uXL89NPP9m0b926lfPnzxMSEgLA/v37qVu3Ljly5GDx\n4sUMHjyY+fPn07JlSwBefvllxowZA8CSJUv47bffWLp0aQp/V0II4SKU4u7mn9njH0Leivk59tVK\nu7xscswZ+wL/xrs+fb/tXDJ8bRuOsJVVqSf/f2Luv7OKiIigS5cueHl5ERQUxObNm22eCwkJYfDg\nwdy5c4d06dIBsGDBAooXL06JEnqGYOjQoRQoUIAVK1bg5uYGQLZs2QgODua3336jUqVKFC1aFIAy\nZcpIr1gIIZLi+nX4/nvUlCmwbx+LgcLAyX5TyNi2GQ/1n5KdXVdTG4bxjmEY4YZhhF+8eNGeL22a\nqKgoPDw88PDwoGjRokRERLBgwQLy5Ek4eNC6dWuuXbv2YMFXTEwMS5YsITg4+MEzu3btolmzZg8S\nMUCLFi1wd3fnl19+SflvSAghXM2KFVC4MHTpgrFvHz2BKcAWwD//HXJnuZ3iISRHMj4D5I93ne9+\nWwJKqW+UUgFKqQAfH59keGnH5+3tze7duwkPD+f06dOcOHGCevXqJfqsr68vVatWZcGCBQBs3ryZ\nyMjIB0PUAOfOnSNXrlw2/5+bmxvZs2fn0qVHzSYIIYSwcfs2dO8OTZpAZOSD5q6kpwOvcqHNn+SL\n2AaenikeSnIMU68AuhmG8SNQAbiqlEr2IWp4uiFis7m7uz/RXuLg4GD69etHdHQ0CxYsoEyZMvj7\n+z+4nydPngQHMsTGxhIVFUW2bNmSLW4hhHBpBw5ASAj8/TcAN4HPycwVhjCXjtRt7c1Hc+03PfrY\nnrFhGD8AvwFFDcM4bRjGm4ZhvGsYxrv3H1kDRAD/ANOBLikWbSrQqlUroqOjWbp0KUuXLrXpFQNU\nqFCBpUuXEhsb+6BtyZIlxMTEULVqVQDSpk0LwO3bKT+0IoQQTuXCBejZE15++UEivg6UJzvDucFk\nyhNQ05vvvoM0dpzIfWzPWCn12mPuK6BrskWUyuXMmZMaNWrQu3dvrly5QuvWrW3u9+/fnzJlytC0\naVPee+89Tp8+Td++falTpw6VKlUCeLCAa9q0aYSEhJAhQwZKlixp9+9FCCEcxuXLMGYMjB8PN28+\naL6UNh0lYvJyLu4UMJ+XXqrE0qVwfw2t3Ug5TAcUEhLCuXPnqFixYoLV0MWLF2ft2rVcuHCB5s2b\n079/f1577TUWLVr04JkCBQowZswYlixZQpUqVR7sOxZCiFTnwAE9L1ygAIwYYZOIL5Ypy3PG85yL\nOw0sJE+eYFavhsyZ7R+moUyaiA0ICFDh4eH/ef/gwYMUK1bMjhGJZyV/Z0IIhxAXB0uXwqRJEBaW\n8H6pUqghQwmakI3Q0EbAXDw8GrJ1K9wfYEwRhmHsUUoluohIalMLIYRwDUrpbUr9+8O+fQnvv/AC\nDBzInSZNmDApPaGhAMeBLEyenLKJ+HEkGQshhHBucXGwcSN89hns2mV7z81Nb13q0gVq1uTsuXOU\n9y/L2bMfAW8AWXj3XXj7bTMCt5JkLIQQwjkdOQJz58L338P9UsMPZMyo54q7doV8+QD4999/KVeu\nJufP/w94HoAqVfSaLrNJMhZCCOE8bt2CH3+EadMS9oJBL4Pu0gX69YOcOR80Hz9+nHLlahIVdQnY\nAFSiWDFYvBju7wY1lSRjIYQQju/oUZg6FWbN0tuUHpYlC7RtC337PugJW1y9epWyZatz+fINYDMQ\nQKlSemQ7Xr42lSRjIYQQjmvHDhg1CpYtS3jPwwMaNIB27fR/E9kcrBRMnuzN5csfAjWAl3j5Zdiw\nAbJnT+ngk06SsRBCCMeiFKxbp5Pw1q0J7xcuDO+9Bx07Qo4c//ll/vprP0OH3mLRonLABwBUqKC/\ndJYsKRP605JkLIQQwnEcOaLnfB86YhaA+vWhWzeoU+extSp37fqTatWCuHs3J/AX4EbNmnr7sRlF\nPR5HkrEQQgjz3bkDI0fqKll371rb3d3h9dfho4+gePEkfamwsHBq1apNTExGYBngRnAwzJlj/zKX\nSSXlMFPQoEGDMAzjwUfevHlp0aIFx44ds8vrG4bBpEmT7PJaQgjx1DZuhNKlYdAgayJOk0ZvSzp2\nDGbPTnIiXrlyB4GBgcTEeAPbAH/efx/mz3fcRAzSM05x3t7erFu3DoCIiAgGDBhAYGAg+/fvJ2PG\njCZHJ4QQJjpyBHr3hpUrbdvLldNbl8qUeaIvd/gwvPbaZOLifIBQwI9Ro3Sn2l5HIT4tScYpzN3d\nnYoVKwJQsWJF/Pz8qFatGmvWrKFVq1YmRyeEECa4eBE+/xwmToSYGGu7l5cepn7vPV056wn89pui\nUSODmzdnAFdwd8/Ft99C+/bJG3pKkWFqOytbtiwAJx6uFnNfoUKF+OijjxK0t2rV6sF5xTdv3qRb\nt24ULVqUDBkyUKhQIbp27cq1a9ce+doFCxakd+/eNm2zZ8/GMAxu3LjxoO3SpUu888475MqVC09P\nTypXrszOnTuf5NsUQghbSsFvv+ltSPnywbhxtom4Y0fdte3W7YkS8Z078M47G6hatSJRUZFAOjJk\nyMXKlc6TiEGSsd1ZknDu3LkTvd+6dWsWLlxo03bjxg1Wr15NSEgIALdu3SI2Npbhw4ezdu1ahg4d\nSmhoaLL0tO/cuUNQUBCbNm1i9OjRLFu2DB8fH4KCgvjf//73zF9fCJHKHDsGY8dC2bJQubIuXRl/\ngVbVqhAerot55MmT5C+rFCxYAH5+q5k+vRFxcXcARY4csGUL1K2b/N9KSpJhajuIuf/uLyIigi5d\nuuDl5UVQUFCiz4aEhPDFF1+wY8eOB8PbK1eu5O7duw+SrY+PD19//bXN1y9UqBBVq1bl1KlT+Pn5\nPXWs33//Pfv27WP//v34+/sDEBQURNGiRfnyyy8ZPXr0U39tIUQq8fff8NNPulBHYqcngd7w27s3\ntGjxxBO6+/fDm2/Czp3LgNZAKWADRYtmY8UKKFLkWb8B+3OuZOwIM/BPeP5zVFQUHh4eD679/PxY\nsGABuXPnfpCkQa98dnNzo0yZMhQpUoQFCxY8SMYLFiygevXq5MqV68Hzc+fOZezYsRw9epSb8Q7L\nPnLkyDMl402bNlG2bFkKFSpkE1/16tV51PnTQohU7tw5vWR57lz488/En/H0hDZt9D7i+1N2T+q7\n7+DddyE6ehXQCggga9a1DB6chc6dHaPO9NNwrmTshLy9vdm0aROGYZA7d27y5s2LYRiEhYXx6quv\nPniuevXqhN0/BDs4OJiZM2cyduxYrl+/zrp165g4ceKDZ5cuXUr79u157733GDFiBNmyZePcuXM0\na9aM27dvP1O8kZGR7Nixw+YNhMVzzz33TF9bCOGCLOUqV6zQRxk+LF06qF1bH2PYrBlky/ZULxMd\nDT16wIwZlpaXSZOmDd27T2TQoMwOV1HrSUkyTmHu7u4EBAQkaC9btiy7d+9+cO3l5fXg8+DgYIYO\nHcovv/zC8ePHiYuLo3nz5g/uL1y4kAoVKjBlypQHbVsTKxn3EE9PT+7Gn6sBLj9UcD1btmwEBATY\nDINbpHPkTXpCCPtRCtav10U6Evvd4+mpk2/r1joRZ8r0TC8XEaFHs/fuBdgCVOOFF/KyaNGcpG4/\ndnjOlYyfcIjYkXl5eSWapAGKFy9OiRIlWLBgAcePHycoKIjs8SqaR0dHJ0iM8+bNe+xr5suXj4MH\nD9q0bdiwweY6MDCQDRs24OfnR05HOc5ECGGumzchLAx279bHFu7eDZGRCZ+rXl2vlm7ZEry9k+Wl\nt27ViTgqCuAboDNlyoxm27bez5rjHYpzJeNUJDg4mPHjx3P16lWmT59uc69WrVp07dqV4cOHU6FC\nBdasWcPmxOq4PqRZs2Z0796dESNGUK5cORYvXsz+/fttnmnfvj1Tp06lRo0a9O7dm8KFCxMVFcWu\nXbvInTs3PXv2TNbvUwjh4JYtg86d4cKFxO+7u+t54I8+ghIlkvWlp0/X08t6+cokoDslStTn11+7\nkT59sr6U+ZRSpnyULVtWPcqBAwceed8ZDBw4UGXPnv2p/t+jR48qQKVLl05duXLF5l5MTIzq1auX\n8vHxUV5eXqp58+Zqx44dClArV6588BygJk6c+OD67t27qmfPnipXrlwqS5YsqkePHmratGkKUNev\nX3/w3JUrV1SPHj1Uvnz5lIeHh/L19VXNmjVTv/zyyyNjdoW/MyHEfZcuKdW2rVJ6TDLhR5YsSvXo\nodTJk8n+0vfuKfX++/Ff7ksFqFdeaaJu376d7K9nL0C4+o+caCiThn4DAgLUo1bnHjx4kGLFitkx\nIvGs5O9MCBcQG6t7wz16wNmz1vY8efQccLly+uP55x97ctLTUErX//juO0vLGdKkKUq9evVZunRe\nootLnYVhGHuUUonOT8owtRBCCF2icuZMmDoVHq4Q2LYtTJgAWbOmeBhffRU/EUPz5r706fMbZcsW\nw93ddVOW635nQgghHu/MGfjss4SVsQBy5tQHNjRtapdQQkP11DMooD8VKuRh4cJupElT0i6vbyYp\nhymEEKnRrVswdKguVzVzpm0izp4d+vTRpa7slIhPnNCj4LGxCugNjKBUqf0YhuvsonkU6RkLIURq\ncvo0rFkDw4bBv//a3qtQQS9fbt1a7xW2k1u3dD2QqCgF9AAm0alTd6ZNG4/hCJUX7UCSsRBCuLK4\nOFi7Flatgs2b4ejRhM+ULq1PUYpXFdBerl2D4GDYu1cB7wHTeO21XsyYMTrVJGJw8GSslEpVfxnO\nzKxV+UKI/3D3rp4HHj0aDh1K/JmcOWH4cHjjjSc+Pzg5RERAo0Zw4ACAARSlbt2PmTdveKr73e+w\nydjDw4Po6GgyZMhgdigiCaKjo516y4EQLuPWLb0ieuxYvTjrYZ6eUKUK1Kmji3lkzmz/GIlfWSsG\nOAK8SP/+PRk61JRwTOewyThnzpycOXMGX19f0qdPn+reJTkLpRTR0dGcOXPG5lQpIYSd3bmjT1EY\nNgwePns8c2Z4+21o2BAqVrTrfPDDYmL0Lqm+fSEm5h7wOrCOiRMP061b0s8zdjUOm4wz33+3dvbs\nWe7du2dyNOJRPDw8yJUr14O/MyGEHZ09CytXwuefw8mTtvdy54aePXUPOJlqRT+LPXvgnXfg998B\n7gDBwHJ69PgyVSdicOBkDDohyy94IYSI5+ZN2LABNm3SG3MTmw/Omxf699dzwSb2gi2uX4cBA2Di\nRMspi7eBFsAaBg+eyGefdTM3QAfg0MlYCCEE+pCGlSth+XLYuBH+69zyHDng44/hvfdwhJMUlNKV\nNbt3t52+dnefQGzsWiZNmkaXLu+YF6ADkWQshBCOSCnd850yRSfh2NjEn0ubVi/IatBAjwHHOxvd\nTKdOQbdu+j1EfLVrw/jxPTlzpiyBgYHmBOeAJBkLIYQjuX4dZs3SSfjw4cSfKVFCL8YKCoLKlR2i\nF2yhlA69b189om7h43ONYsXeZ86ckeTOnYsXXpBEHJ8kYyGEcAQXLuhlxpMnw5UrCe9Xrqz3AjVp\nAs89Z//4kuDaNXjrLVi40La9Y8cr/P13XbZv38OePS1p0KCBOQE6MEnGQghhpn//hZEjdX3oh+eC\nvbygQwc9B/zii+bEl0R//QUtW9oW+CpeHMaMuUT//rX566+/WLRokSTi/yDJWAghzHD+PIwYoQt0\nPHxa0vPPw4cfQrt2kCmTOfE9gXnzdI84/nuJLl3g448v0rBhLQ4dOsSyZcuoX7++eUE6OEnGQghh\nTxcu6EN7x4/X1bLiK1tWT7Y2b25KecqnsXKlfs9gqYibMSNMnw6vvQYXLijc3d1ZuXIltWrVMjdQ\nByfJWAghUppSsH27Xtm0cCE8XMioQgUYMgRq1QInqja4d69OupZE/OKLsHgxZM16nnv3spEzZ052\n7dpFmjRyWu/jyJ+QEEKklNhYmD8fXnoJqlbVn8dPxKVL667lb7/pPT9OlIjPntWHPFhWTBcqBGFh\nkCHDKapUqULnzp0BJBEnkfSMhRAiuVmqXQwYAPv3J7xfuTJ88IFeHe2EyermTWjcWB+NDLr09apV\ncOPGcWrWrMnly5cfJGORNJKMhRAiOYWG6nnf8HDb9gwZ4PXX9cqml14yJ7ZkEBcH7dvrOtOgp7YX\nLQIPj6O88kpNbt26xebNmylbtqy5gToZScZCCJEc/vkHevfW1bLiy5RJ94I//BCyZjUntmQ0bBgs\nWWK9njQJataMpWTJJty+fZvQ0FBKly5tXoBOSpKxEEI8i6tXdYYaP952PjhdOujaFfr1Ax8f8+JL\nRitWwMCB1uv334d33wVwY+bMmXh5eVG8eHGzwnNqzjdZIYQQjiA2FqZNA39/GDPGNhF36KB7yl9+\n6TKJ+NAhaNvWel2zJrRtu5dJkyYBULFiRUnEz0B6xkII8aRCQ/XQ899/27ZXrqz3EJcrZ05cKeTq\nVWjaVJfNBihQAD7+OJzatWuTKVMm2rdvL8fdPiPpGQshRFIopffu1K0LgYG2idjPD378EX75xeUS\nsWXBluXMivTpYdCg32jRIhBvb2+2bdsmiTgZSM9YCCEeJS5OL8oaNQp27rS9lzGjnhPu1cuhTk5K\nTkOG6Llii969t9G9ewNy585NaGgo+fPnNy84FyLJWAgh/su+fdCxo3Ufj0WaNLoG5IgRkDevKaHZ\nw7JlMHiw9bpXL8if/zD58uVj8+bN5HXh793eDGWpY2ZnAQEBKvzhfXhCCOEIYmJg9GgYNMj2EId0\n6XRy7t1bH+bgwg4ehPLl4cYNfV2jxjU2bsyMuzvcvn0bT09PcwN0QoZh7FFKBSR2T+aMhRAivkOH\ndOnKTz6xJuJ06XQhjxMn9ClLLp6ILQu2LIk4V65V7N1bkPDwHQCSiFOADFMLIQTo8/9GjoTPP7ft\nDZcrB3PmQLFi5sVmRzExegvTkSP6Om3apURFBfPSS6UpUqSIucG5MEnGQggRGgrvvWfNQAAeHnrC\n9KOPwD11/KqMi4M339R1prWfiIlpQ/ny5Vi3bh3e3t5mhufSUsdPmBBCPOzyZVi9Gn76SZ+cFF+F\nCvpQ3pIlzYnNBErpstnffWdp+RXDeI0qVaqwevVqvLy8zAzP5UkyFkKkHrdvw/ffww8/wNatuopW\nfJkz62Hqzp31CQiphFK6dPa0ada2Tp0q4u8/gu7du5ExY0bzgkslJBkLIVzf1avw9de6Otb584k/\n06qVri+dJ499YzOZUvDpp/qPRptLs2av8s03+XBz62tmaKmKJGMhhOu6eVP3dCdOhGvXEt4vX14v\nG27aNNUs0IpPKb1ofORIS8sE4H3y5OmGm9tEEyNLfSQZCyFc06pV+tSkU6ds2319dV3p117Tn6dS\nSukiHuPGWVpGA31o0qQZ48Z9aWJkqZMkYyGEazlzBnr0sD10F6BoUb1X+PXXIW1ac2JzEHFx0K2b\nHrnXhgP9adUqmHnz5uLh4WFidKmTJGMhhGu4ckUfZfjVV3p42iJ7dn2UYbt2uoxlKhcdrQcMZs16\n0IK39080bNiO2bNn4p5KtnE5GvlTF0I4txs3YMIEXb7yyhXbe506wRdf6IQs2LhRb6c+dgxAATG0\naZOe8eO3kjWrF26paAW5o5FkLIRwTseP6704M2ZAVJTtvZIl9aKt6tXNic3BnD+vty7Nn29pUUAv\n8uf/h2+/XYynZxYToxMgyVgI4UzOn4ft2+Hbb2HNGr0KKT5/f101KzhYhqTv++03aNgQLl2ytMSR\nNm0P7t6dTNOmPUiXTtKAI5C/BSGEY4qNhb17YcsW2LEDdu9OuDLaokABGDAAOnRINaUrk2LrVmjQ\nIP4UehzPPdeZY8dm0Lt3b7744gsMwzAzRHGf/NQKIRzHtWt6LHX9eggLSzgH/LC6dXUNx/r1U1XF\nrKTYtAkaN9YLtgB8fKBKlZ4sWzaDTz/9lKFDh0oidiCSjIUQ5jt/Xi/CmjxZV8v6L56eUKaMngt+\n802XP8rwaa1ZA82bw507+jpPHti8GW7dak+lSr706dPH3ABFApKMhRDmiYjQ25FmzrRmjvhy54bA\nQHjlFV0tq3hxfZqSSJRSek1bjx5w755u8/W9R9++yylWrCVQlrJly5oao0hckpKxYRh1gfGAGzBD\nKTXyoft+wBwgy/1n+iml1iRzrEIIV7F3L4wapU9Miouzvefvr4ee69SBF14AGUpNkhs39PkW1hXT\nUKDAHfz9W9OjxwoqVdpNQECAeQGKR3psMjYMww2YDNQCTgO7DcNYoZQ6EO+x/sBPSqmvDcN4EVgD\nFEyBeIUQzuzQIV2Kcv36hPfKloV+/aBZM5n/fUIHDkDLlnDwoLWtZMlocuRowaZNa5k8ebIkYgeX\nlLX/5YF/lFIRSqm7wI9Ak4eeUUDm+597A2eTL0QhhEtYuBDKlUuYiGvV0quNdu/WGUUS8RP5/nv9\nxxo/EXfseAsfn8aEha1j+vTpdOnSxbwARZIkZZjaF/g33vVpoMJDzwwCNhiG0R3ICAQlS3RCCOd3\n757u8Y4da21Lk0Yn3j59dI9YPLHbt/UgQ/wziNOn1/WmfXzCaNIkjFmzZtGhQwfzghRJllwLuF4D\nZiulvjQMoxIw1zCMEkopm8kgwzDeAd4B8PPzS6aXFkI4rDNn9OlIP/9sbXv+eVi0CEqXNi8uJxcR\noY9f/v13a1vRovDTT4pSpQygPocPH6Zw4cKmxSieTFKGqc8A+eNd57vfFt+bwE8ASqnfAE8gx8Nf\nSCn1jVIqQCkV4OPj83QRCyEcX2wsTJqkzwiOn4gbN9bD0ZKIn9ry5fDyy7aJODgYNmy4TPfuNdi4\ncSOAJGInk5RkvBvwNwyjkGEYaYEQYMVDz5wCAgEMwyiGTsYXkzNQIYST+OMPqFQJuneH69d1W5o0\nMGIELF0KWaQO8tO4dw9694amTa1bsT089HueSZOiaNo0kN9++41oS5UP4VQeO0ytlIoxDKMbsB69\nbWmmUmq/YRhDgHCl1AqgFzDdMIye6MVcHZV6uGisEMKl3bgBn30G48fbblcqWhS++UbvFRZP5cwZ\n3fv99VdrW4ECek1cgQIXqFkziCNHjrB8+XLq1atnXqDiqSVpzvj+nuE1D7V9Fu/zA0CV5A1NCOE0\nli/Xp9WfPm1tS5cOPvkE+vbVn4un8vPPuppWZKS1rWFDmDMHDOMyVarU4MSJE6xatYqgIFk766yk\nApcQ4ulFROiz+ZYvt22vWVMv6y1SxJy4XMTSpXr9m6U4mWW0/6OP9Odxcd4EBQXRokULqstxkU5N\nkrEQ4sn9VwUtHx+9hen116Vy1jOaOhW6drX+8ebMqf+4q1eHkydPopSiYMGCTJgwwdxARbKQZCyE\nSLpff4Vhw2DduoT33npLJ+hs2ewflwtRCgYNgiFDrG3PP69rpRQuDBEREbz66qtkz56dPXv2yMlL\nLkKSsRDi8fbsgf79E0/CtWrBwIFQRZaNPKuYGF2We/p0a1u5crBqle4ZHzlyhJo1axIdHc3SpUsl\nEbsQScZCiMQppfcEjxoFS5bY3jMMXXVCKmglm+hoCAmBFfE2jtapo+ujZMoEBw4cIDAwkNjYWLZs\n2UKpUqXMC1YkO0nGQghbJ07ogsdz58KRI7b3DAPatoUBA/TpSiJZXLoEjRrB9u3Wtnbt4NtvrSdG\n9unTB6UUYWFhvPjii+YEKlKMJGMhhHbypC52vGxZ4vdbtNATmZIIktXp07oHfCDeOXgffQQjR+oV\n0xZz584lMjISf3kT5JKSUoFLCOHKYmLgyy91kn04EWfKBB066DnjRYskESezq1cTJuKxY+GLL3Qi\n3rVrF8HBwdy+fZusWbNKInZh0jMWIjXbvl2vGPrzT9v2evX0OGmTJpAhgzmxubiYGGjd2pqIPTx0\nIY/XXtPX27dvp27duuTIkYOoqCh8fX3NC1akOEnGQqRGf/6p531XrrRtL1FCn8lXubI5caUSSkGP\nHrBhg7Vt1ixrIt62bRv162Owzb8AACAASURBVNcnb968hIaGSiJOBWSYWojUQildrCMkBF56yTYR\np0+vJyl//10SsR1MmKALlFl89pmukwIQGhpK3bp18fPzY+vWreTLl8+cIIVdSc9YCFcWE6MLdSxf\nrueDjx+3vW8YOjkPHw6FCpkTYyqzcCH07Gm9DgnRRT4scuTIQYUKFViwYAE5c+a0e3zCHJKMhXBF\nN27AjBl6YVb8wxvia9IEhg6FkiXtG1sqde8efPopjB5tbatYUQ9PGwYcPHiQF154gVKlShEaGioF\nPVIZGaYWwpVERupqWH5+uvv1cCL28tJdsZ07dU9ZErFdnD2rz86In4ife07/FXh6wuLFiylVqhQz\nZswAkEScCknPWAhXcPs2fPWVHm6+ccP2no8PtGype8I1ashxhna2eTO0aQMXLljb6teH776D7Nnh\nxx9/pG3btlSoUIHWrVubF6gwlSRjIZyZUrB4sa4SceKE7b3ChXV7hw56gZawq7g4fabGoEH6rwn0\n3uFhw/QRz2nS6EIeHTt2pGrVqqxatQovLy9TYxbmkWQshDO6d0+PcX71lW0NRYBixfTy3JYtwV3+\niZvh4kVdNTT+1qVcueDHH/XgBOhjEDt16kSNGjVYsWIFGTNmNCVW4RjkX6oQzuTMGX2kzzffwLlz\ntveyZ9flKt95R5KwiXbu1JVDz5yxttWoAT/8ALlzW9sKFCjAmjVrqFq1Kull5CLVk3+xQjiDa9f0\nfPBXX8Hdu7b33N2hWzfdG86a1Zz4BAChofrAh1u3rG2ffAKDB1vfH02cOJFChQrRsGFDatWqZU6g\nwuHIamohHFlsrO4J+/vrgsXxE3GePHrl9IkTMG6cJGKTrV8PDRpYE3G2bLB6tX4PZUnEo0ePpkeP\nHvzwww/mBSockvSMhXAkMTG6WPHu3fpjy5aExxiWLw+9e0PTptbz9YSpVq7UU/SW90q+vnoVddGi\n1meGDRvGgAEDCAkJYc6cOeYEKhyWJGMhHMGNG7pG4pdf6sNtE5M/P4wapfcJyz5Uh7FkCQQH6/dR\nAAUK6OHqwoX1tVKKgQMHMnToUNq1a8esWbNwc3MzL2DhkCQZC2Gm6GiYOhU+/1wvwU1MhgzQrx/0\n6iUnKDmYvXv1HmJLIn7uOd0jLlDA9rlLly7x5ptvMm3aNEnEIlGSjIUww40bei74yy9tl92CXnJb\nqRKUK6c/ypeHzJnNiVP8p6tXoVUruHNHXxcponvElgOWlFJcuHCBXLlyMWHCBADSpJFlOiJxkoyF\nsKfISJg4UX9cvmx7z89Pr4ju0EG2Jjk4peCtt+Cff/R1pkywYoU1EcfFxdGtWzdWrFjBH3/8gY+P\nj3nBCqcg/+KFsIebN/Vq6DFjbPe9gO4Jf/opvP22lKp0EhMnwqJF1usZM6yLtWJjY+ncuTPffvst\nffr0IUeOHOYEKZyKJGMhUlJcHMybp+d8z561vSflKp3Szp16MbtFly56ARdATEwMnTp1Yu7cuQwY\nMIDBgwfLoQ8iSSQZC5FSdu+Grl31f+MrVUpXgmjRQoajncxff0GzZroaKUDZsjB2rPX+yJEjmTt3\nLkOHDqV///7mBCmckvwmECK5Xb0K/fvD5MnWEwJAD0d//jm0b69PCRBOJSxMH3x17Zq+9vaGhQtt\nZxZ69OhBgQIFaNeunSkxCuclvxGESC5K6YnEYsVg0iRrIk6XTveEjxyBjh0lETuhRYugTh1rIs6c\nGZYvh0KF4M6dO3z22WfcvHmTzJkzSyIWT0V6xkI8q5s3Yf58mDJFbzyNr25d3UO2VIAQTmfqVD0v\nbHlvlTs3rFsHpUtDdHQ0zZo1Y/369ZQtW5YmTZqYG6xwWpKMhXhaZ8/qilhz5uih6fhy54bx4/VG\nVFnA47RWrrRNxEWK6BrUBQvCzZs3ady4MVu2bGHGjBmSiMUzkWQsxNNYuRLeeAOiomzb06fXG1CH\nDIEsWcyJTSSLw4f1mcSWRFyunD74wccHrl+/ToMGDfj111+ZM2eODE2LZybJWIgncfu23o40aZJt\nu7+/7kJ16CCnJ7mAa9f0ORyWOWI/P2siBjh//jwRERHMnz+fYMu+JiGegSRjIZLqjz/0Aqy//rK2\n5cunJxXr1ZOFWS4iLg7atYNDh/R1+vSwbJlOxDdv3iRDhgw8//zzHDlyhAxSK1wkE/ntIcTjHDqk\nqzq8/LJtIm7WDP78Ux9iK4nYZQwdqktbWsyYAWXKQGRkJFWrVn2wf1gSsUhO8htEiP9y7JjuCRcv\nDj/9ZG339ISvv4bFi/UJ8sJlzJ0LgwZZr3v10qcynT9/nldffZVDhw5RrVo10+ITrkuGqYV42B9/\n6FXSCxfqMcv4mjaFESP0XmLhUtatg06drNeBgTByJJw9e5bAwEBOnjzJqlWrCAwMNC9I4bIkGQth\nsX27XgW9fn3Ce7Vrw7BhekmtcDm7dunqpJZziUuW1IU+lLpHUFAQp0+fZt26dbzyyivmBipcliRj\nIU6e1Ac5/Phjwnu1a+sTleSXsMs6ckRP+1sO0/Lz071kvTPNgyFDhpA3b14qV65sZpjCxUkyFqnX\njRt6OHrMGL1lySJNGl2so08fvWhLuKw9e/Q6vMhIfZ09ux4YiY4+xooV+2ncuDEtW7Y0N0iRKkgy\nFqnP4cN6Adbs2QkrZ7VuDcOHw/PPmxKasA+l9I60Dz6Au3d1W/r0sGoVGMZhXnmlJnFxcQQGBpIx\nY0ZzgxWpgiRjkTpcuaKrNsyaBZs3J7xftix89RVUrWr/2IRd3bgBnTvrcuIWmTPrBfOZMx+gevWa\nKKXYtGmTJGJhN5KMhes6exaWLtUVG8LCrKtz4nv+eT0nLMcapgoHDkDLlnDwoLWtdGm9WOvWrb+o\nUSMINzc3tmzZQjFZMS/sSJKxcC03bsCSJXrD6ObNtucJW6RJA40aQdeuev+KJOFU4fvvdY/YslAL\ndBnxCRP0EPWgQUtImzYtoaGhFClSxLxARapkqMR+WdlBQECACg8PN+W1hQs6dw4GDoR582x/28ZX\nrpw+Hb5dO71kVqQKt2/D++/DN99Y29Kn18sGOnSAmJgY3N3dUUoRGRmJj6UAtRDJzDCMPUqpgMTu\nSc9YOLfoaBg3ThfiuHnT9p5hQI0aelyycWNdR1qkKpcu6d1pe/ZY24oW1fVcSpaEX3/9lTfeeIOV\nK1dStGhRScTCNJKMhXOKjtYTfQMG6H3C8RUvrnu/bdpA/vzmxCdMd/cuNG9um4iDg2H6dPDygrCw\nMBo2bIivry+ZMmUyL1AhkGQsnElcHGzdqueDFy2C69dt75coAWPHQlCQ7hWLVEspePdd/eNiMWEC\ndOumfzQ2bdpE48aNKVSoEJs2bSJPnjzmBSsEkoyFs1i3Dnr0gKNHE97LkUMftfPWW+AuP9JC13KZ\nNct6/fnn0L27/nz79u00bNiQIkWKsGnTJnLmzGlOkELEI8tIhWM7dw5CQvR5wQ8nYn9/nYSPHtXd\nIEnEAn2Y1scfW6/feAP69rVely5dmk6dOrFlyxZJxMJhyG8v4Zhu3YJvv4X+/eHaNWu7tze0bavn\nhMuXl+FoYeP33/WPhkX16rrSlmHAxo0bqVixIl5eXkyZMsW8IIVIhPSMhWM5ehQ+/BB8ffWwdPxE\n3K6druo/aRJUqCCJWNi4cEGfcBkdra/9/fWW87Rp4YcffqBevXoMGDDA3CCF+A/SMxaOYe9e+OQT\nWLs24T1/f70pVM6RFf/h3j1dVvzff/W1t7euM50tG8yZM4dOnTpRrVo1hg0bZm6gQvwH6RkLc50/\nD2+/rU9HejgRFy6sT1T66y9JxOKReve2rpw2DF13ukgRmDFjBm+88QY1a9ZkzZo1soVJOCzpGQtz\nXLwIM2boZa7xtygZhj5ctksXqFNHSlWKx5o9W29bshg6FOrXhxs3bjB48GDq1q3LkiVL8PT0NC1G\nIR5HkrGwn9u3YcUKvU943bqEBzfUr697wlKgXyRBbCxMm6aXGFg0b65nO5RSZMqUiZ9//pk8efKQ\nLl068wIVIgkkGYuUd/48jB+v532vXEl4v1gxXayjbl37xyac0p9/6kMfdu60tr34ou4ljxo1krNn\nzzJ+/HgKFixoVohCPBEZAxQp59gxeO89KFBAD0c/nIgrV9ZD1X/+KYlYJMmlS/DRR/r46fiJuEgR\nWL5cMW7cED7++GMiIyOJjY01L1AhnpD0jEXyu3gRPvtMH5MTF2d7r1AhfVRO27bw3HPmxCeczvnz\nevBkyhR9SqZF2rS6wEffvophw/ozYsQIOnTowLfffoubm5t5AQvxhCQZi+Rz967eAzxkCFy9anuv\nXDno108fYSi/JEUSXb+u39dNm2bdP2xRo4Yu6FG0KHzyyad8/vnnvP3220ydOpU0svBPOBlJxuLZ\n3bih95KMGZOwZGXNmvDpp/Dqq1KkQzyREyegUSPYt8+2vXhxfVhX69bWH6lKlSrx/vvvM3bsWEnE\nwikZSilTXjggIECFh4eb8toimRw6pBdlzZ5tWykL9CTel1/qbUqShMUT+uUXaNYMIiOtbWXL6uqo\njRvrHW9xcXHs3r2bChUqmBeoEE/AMIw9SqmAxO7JW0jxZKKidAKuXFmvgp4wwTYRZ8kC48bB339D\nw4aSiMUTmzVLD6hYEnHatDBzJuzerctdpkkDsbGxvPXWW1SuXJm//vrL3ICFSAYyTC0eTykIDdXz\nwatX69qDD/P314U6OnbUCVmIJ6SUXm4waJC1LWdOWLpUv/eziImJoWPHjsybN4+BAwdSsmRJu8cq\nRHKTZCz+W2ys/k04ahQkNqXg7q57v1266HKVMlcnnpJSelX0qFHWtlKldI2YAgWsbffu3eP1119n\n4cKFDB8+nE8++cT+wQqRAiQZi4Tu3IHvvoPRoxMuyAJ9dGG7dhAcDD4+9o9PuBSloGdPXRfGok4d\nWLQIHi4lvWjRIhYuXMiYMWPo1auXfQMVIgVJMhZW167pPSTjxsG5c7b3PD31Ke3vv6/3kgiRDOLi\noGtXvUXJolEjWLgQEqtgGRISgp+fH1WqVLFfkELYgYwrCr0neOBA8PODPn1sE7G3ty72e+KErrgg\niVgkkytXoEUL20TcsqXuEcdPxNHR0bRp04Z9+/ZhGIYkYuGSpGecmt28CRMnwhdfwOXLtvfy5NEV\n+N95BzJnNic+4bJ+/x1atYKICGtbmzYwZ45eimBx8+ZNGjVqRFhYGA0aNKBEiRL2D1YIO0hSz9gw\njLqGYRw2DOMfwzD6/cczrQ3DOGAYxn7DMOYnb5giWd25o7ckPfecXjUTPxHrQ2Dh+HF9SKwkYpGM\nlNJVUitXtk3EH3yglynET8TXr1+nXr16bN26le+++47XX3/d/gELYSeP7RkbhuEGTAZqAaeB3YZh\nrFBKHYj3jD/wMVBFKXXZMIycKRWweAb37umux5Ah8O+/tvcKF4bBg+G116RcpUgxQ4fqGRELLy+9\nh7hlS9vnrl69Sr169di1axfz588nODjYvoEKYWdJGaYuD/yjlIoAMAzjR6AJcCDeM28Dk5VSlwGU\nUheSO1DxDJSCxYt1L/iff2zv+frq4r9vvAEeHubEJ1KF8HD9PtCiVCm9UKtIkYTPpk2blqxZs7Jw\n4UKaNWtmvyCFMElSkrEvEL8bdRp4uP5cEQDDMH4F3IBBSql1yRKheDbHj+vlqmvX2rb7+OiFWe++\nq1dKC5GC7t6FTp301nWAatVg/XpIn972ucjISNzc3MiaNSurVq3CkApuIpVIrgVc7oA/UAPIB2wz\nDKOkUsrmAFvDMN4B3gHw8/NLppcWibp3T9eGHjLE9ribLFn0iunu3RNu4hQihXz+ua6QCjoBz5qV\nMBGfP3+ewMBAcuTIwZYtWyQRi1QlKcn4DJA/3nW++23xnQZ2KqXuAccNwziCTs674z+klPoG+Ab0\nQRFPG7R4jIgIPQn3xx/WNsPQlbKGDZNylcKu/v4bhg+3Xo8YkfAo67NnzxIYGMipU6eYMGGCJGKR\n6iRlNfVuwN8wjEKGYaQFQoAVDz2zDN0rxjCMHOhh6wiE/a1erY+3iZ+IS5eGHTt0bWlJxMKOYmL0\n8LSlnHmlSnpQJr5///2X6tWrc/r0adatW0fNmjXtH6gQJntsMlZKxQDdgPXAQeAnpdR+wzCGGIbR\n+P5j64EowzAOAFuAj5RSUSkVtEhEbKxeiNWwoa6mAPq4m9Gj9cqZ8uXNjU+kSiNHWsuap00L336b\ncLF+hw4duHDhAhs2bKBatWr2D1IIByDnGTu7K1d0yaLp02HXLmt7/vx6BXW5cubFJlK1efOgbVvr\n9YgRekH/wyIiIrh06RIBAYke8yqEy5DzjF1RWJguYZQ7N7z9tm0iDgrSJY4kEQuTbNigT9O0qFZN\n15CxOHToEP369SMuLo7ChQtLIhapniRjZ3P7tj6s4dVXdY/4zh3rPTc36N8f1q2DHDnMi1GkauHh\n0Ly5ni8GKFECli+3bmPft28fNWrUYNasWZw58/BaUCFSJ6lN7UwOHYKQEPjzT9v2MmX0kYavvaZ7\nykKY5J9/oH59XfYc9GzJ2rWQNau+/vPPPwkKCsLDw4PQ0FDy58//319MiFREkrEzuH1br3zp0wdu\n3bK2N2qkN3AWL25ebELc9++/eobk4kV9nTWrLuyRL5++3rNnD7Vq1SJjxoyEhobi7+9vXrBCOBhJ\nxo4sIkKfLzdzJkTFW5yeLp0u6NGli94/LITJzp2DmjXh5El9nT49rFoFxYpZn4mKiiJXrlysWbOG\nQoUKmROoEA5KkrEjioyEzp1h6VJdVzq+YsXgxx91YV8hHMDFi7pHbCl77uEBS5bok5lAl7jMkSMH\ntWvX5u+//8bdXX7tCPEwWcDlaPbv13uClyyxTcQFClg3bUoiFg7i8mWoXRsO3D82xs0NfvoJ6tbV\n12FhYRQuXJjFixcDSCIW4j/IvwxHsnq1XoR1/bq1rW5dfdBDvXpytKFwKDdv6sVae/fqa8OA77+H\npk319caNG2nSpAmFChWiSpUq5gUqhBOQZOwI4uL0HHDfvtbecMaMMH8+NG786P9XCBPcuwfBwbrK\nqsXMmXqxP8CaNWto3rw5L7zwAhs3bsTHx8ecQIVwEpKMzfbzz9CzJ+zZY20rUABWrJDhaOGQlNJL\nGlavtrZNmGAt8nH06FGaNm1KqVKl2LBhA9myZTMlTiGcicwZm+XECWjdGl55xTYRV6miq2lJIhYO\n6tNP9RGIFh9/bHv4g7+/P19//TWbNm2SRCxEEkkyNsOGDfDii7BwobXN01NXz9q8GXLmNC82IR5h\n/Hi9td3ijTesxyP++OOP/P777wC8+eabZJETwoRIMknG9rZzJzRrBtHR1raQEDh8GIYO1XuIhXBA\n48bBBx9Yrxs0gG++0Qu3Zs+eTZs2bRgxYoR5AQrhxGTO2J4OHNDLTy1VtPz84IcfrBsyhXBQI0fa\nnrhUqZLewuTuDt988w2dO3emVq1afPfdd+YFKYQTk56xvZw6BXXqwKVL+jpHDj1cLYlYODClYNAg\n20Rctao+iyRDBpg0aRKdO3emQYMGrFixggwZMpgWqxDOTJKxPURE6MoIp0/r60yZdPX8okXNjUuI\nR4iJgY8+gsGDrW01a+pEnDkzxMXFsX79epo2bcqSJUvw9PQ0L1ghnJwMU6eku3f1/uEhQ/RhDwBp\n08KyZSDntwoH9r//6fozYWHWtrp1dWG49OkhOjqa9OnTs3DhQtzc3PCwnI8ohHgq0jNOKb/8Ai+/\nDJ98Yk3Ebm4wbx4EBpobmxCPEBamT+WMn4ibNNHvIT09FYMGDaJy5cpcvXoVT09PScRCJANJxsnt\n5k296bJaNV1n2uKll2D7dmjZ0rzYhHiMceP0e8X//U9fG4Yepl68GNKmVXz66acMHjyYl156iUyZ\nMpkbrBAuRIapk9Mvv+gyRMeOWdsyZtRblrp310tPhXBQS5bAhx9ar318dEXWoCBQStG7d2/Gjh1L\n586dmTJlCmnSyHt5IZKL/GtKDrdvQ69euppW/ETcsCEcPKjLXUoiFg7s2DFdwMOiUiX44w+diAFG\njBjB2LFj6d69O19//bUkYiGSmWSIZ/W//+ljanbutLZ5e+tSRe3b63E+IRzY7dvQqhVcu6avCxbU\ndaezZrU+0759e9zc3Ojbty+G/EwLkezk7e2z+P13KFfONhHXqQP79kGHDpKIhVP44APdCwa92H/h\nQp2IY2NjmTFjBrGxseTPn59+/fpJIhYihUgyflqLF+vqB5a9w2nS6NUva9dCvnzmxiZEEs2bB9Om\nWa/HjdO77mJiYmjfvj1vv/02q1atMi9AIVIJGaZ+UrGxennp0KHWNm9vXRuwdm3z4hLiCf39tz4K\n0SI4GN57D+7du8frr7/OwoULGTFiBE2aNDEvSCFSCUnGTyIqCtq00WUsLZ5/HlauhBdeMC8uIZ7Q\npUt6qcPNm/q6SBGYPh3u3r1DSEgIy5Yt48svv+TD+MurhRApRoapkyo8XBfxiJ+IAwP1fLEkYuFE\nYmN1da2ICH2dMaPe1uTlBQcOHGDDhg1MnDhRErEQdiQ946SYMQO6dtXlLS0+/lgPVbu5mReXEE/h\n009t31N+9x288EIs4EaZMmX4559/yJMnj2nxCZEaSc/4UaKj4c034e23rYk4c2ZdF3DECEnEwuks\nWACjRlmvP/0Uate+Qa1atZh2fyWXJGIh7E+S8X85fhyqVIGZM61tJUvCnj26UK8QTuavv6BTJ+t1\n/frQq9c16taty9atW/Hy8jIvOCFSOUnGiVm+HMqWtW6+BGjbFnbs0Au2hHAyUVF6wdatW/ra3x+m\nTLlCvXq12blzJz/++CNt2rQxN0ghUjFJxvFdvqyrZjVtqj8H8PCAyZP1xJocnC6cUEyMXrB1/Li+\nzpQJfvrpDi1aBPH777+zaNEiWrVqZW6QQqRykowt1q6FEiVg7lxrm68vbNsGXbpINS3htD7+GDZu\ntF7PnQsvvZSO1157jWXLlsk+YiEcgKymBvjsM9siHgCvvw4TJkC2bObEJEQy+OEHGDPGev3hh//D\n1/dfoBy9evUyLS4hhC3pGX//vW0i9vHRmy6//14SsXBqq1fbLtgKCjrDqlXVadasGbdv3zYvMCFE\nAqm7Z/z773rbkkWtWrpYr4+PeTEJkQxmzdI/2rGx+rpw4VMcO1aTyMgLrF27Fk9PT3MDFELYSL09\n44sXoVkzfX4cQLFisGiRJGLh1JSCzz/XPWJLIvb1Pc6dO69w6VIkGzdupEqVKuYGKYRIIHUm43v3\noHVrOHVKX3t760IemTObG5cQzyAmBnr0gE8+sbaVLg1BQWOIjr7O5s2bqVChgnkBCiH+U+obplYK\nevaEsDB9bRh6aLpIEVPDEuJZnDsHISF68b/Fq6/C0qWQPv04Tp78AH9/f/MCFEI8UurrGY8YofcN\nWwwbBg0amBePEM8oNBReesk2Edepsw+lArl79yJp06aVRCyEg0tdyXjaNOjf33rdurXehCmEE4qN\nheHD9brDCxd0W5o00KXLXsLDa3DkyCEuW4rXCCEcWupJxosW6ZPTLYKCdFUtKeYhnNDx43oYun9/\niIvTbTlzwqRJ4fzwQ00yZMjA1q1bKSLTL0I4hdSRjDdv1kU8lNLXAQF6L3G6dObGJcQTUkqfXVKq\nFPz8s7W9WjWYPTucfv0C8fb2Ztu2bTwvddSFcBqun4z379dbmCxHIBYtCmvW6JPUhXAiV69C8+b6\nVM8bN3SbmxsMHKjnjcuUyUe1atXYtm0bBQsWNDVWIcSTce3V1BcvQqNGcP26vvb11aeqy15i4WTO\nnYN69eDPP61tRYroOtNp0+4FSpA7d25WrVplWoxCiKfnuj3jO3d0N8JyVE3GjLo+oJ+fuXEJ8YQO\nH4ZKlWwTcdeu+oTPy5fXU6lSJQYOHGhegEKIZ+aayVgpePdd+OUXfW0YMH++roAghBPZuROqVIGT\nJ/W1m5sudTlpEoSGrqJx48a88MIL9OzZ09xAhRDPxDWT8dixMHu29XrkSGjc2LRwhHhSt27pLfE1\na0JUlG7LkAFWrICOHWHp0qU0b96cUqVKsXnzZnLkyGFqvEKIZ2MoywpjOwsICFDh4eHJ/4Xv3NFz\nwpZ54o4d9fJT2cIknEBcnD4w7NNP4fRpa3v27HqWpUIFuHr1KoUKFaJo0aKsW7cOb29v8wIWQiSZ\nYRh7lFIBid1zvQVcly5ZE3GWLDB1qiRi4RT++QeCg/VhYvEVLw6LF+uNAADe3t5s2LCBokWL4iW7\nAoRwCa43TH3njvXzzJllL7FwCgcPwiuv2CbiXLl00bi9e3UinjlzJuPHjwcgICBAErEQLsS1k7Ek\nYuEE/voLqlfX25cAPD1hwAA4ehTeeQfc3WHq1Km8+eabrF27lljL2YhCCJfhesPUkoyFE9mzB2rX\n1rMrAJky6bnhV16xPjNhwgTef/99GjRowKJFi3BzczMnWCFEipGesRAmCQ2FwEBrIs6cWdekiZ+I\nx4wZw/vvv0+zZs1YsmQJnp6e5gQrhEhRkoyFsLO4OH1yZ61ausQlQNasuoR6pUq2z6ZLl47g4GAW\nLFhA2rRp7R+sEMIuJBkLYUeRkfr47AEDrKct5cqle8kB9zc8KKU4ceIEAN27d+eHH37Aw8PDnICF\nEHYhyVgIO/n7b3j5ZVi3ztpWrZpeQf3SS/paKcXHH39MiRIlOHz4MACGbM0TwuVJMhbCDs6c0Qc9\n/Puvta1vX90jzptXXyul6NWrF6NGjaJdu3b4+/ubE6wQwu5kNbUQKezmTV2N9cwZfZ05s66y1aiR\n9Zm4uDh69OjB5MmT6dGjB1999ZX0iIVIRVy7ZywLXoTJ4uKgbVtrMQ83N11NK34iBpg9ezaTJ0+m\nd+/ekoiFSIWkZyxECurXD5Yts15PmQJBQQmfa9++PRkyZCA4OFgSsRCpkGv3jCUZCxNNmQKjR1uv\ne/XSFbUsYmJi6NOndvq6CAAAIABJREFUD+fOncPd3Z2QkBBJxEKkUq6XjO/etX4uyViYZPx46NrV\net24MYwaZb2+e/cuISEhjB49mtWrV9s/QCGEQ3G9ZCw9Y2GyL76ADz6wXpcvD/Pm6fligDt37tCq\nVSsWL17M2LFjeeutt8wJVAjhMGTOWIhkohQMHQoDB1rbqlSBNWt0zWmA6OhoWrRowdq1a5k8eTJd\nunQxJ1ghhEORZCxEMnk4EdeoAStXWhMxwK1btzh9+jTTp0+XHrEQ4gFJxkIkg6lTbRNx7dqwdClk\nyKCvb9y4Qdq0acmePTvh4eFSZ1oIYUPmjIV4RkuWQPzR5jp1YPlyayK+evUqderUoW3btiilJBEL\nIRKQZCzEM9i6Fdq00fPFAOXKwaJFYDnp8PLly9SuXZtdu3bRunVr2bokhEiUDFML8ZT+/huaNLH+\nyPn7w+rV1jniqKgoatWqxb59+1i0aBFNmjQxL1ghhENLUs/YMIy6hmEcNgzjH8Mw+j3iuRaGYSjD\nMAKSL8QnJMlY2MG5c1C/vvU84ty5Yf168PHR10opmjdvzoEDB1i+fLkkYiHEIz22Z2wYhhswGagF\nnAZ2G4axQil14KHnvID3gZ0pEWiSSTIWKezWLV3E4/Rpfe3lBWvXQqFC1mcMw+CLL77g+vXrBCVW\n/1IIIeJJSs+4PPCPUipCKXUX+BFI7G3+UGAUcDsZ43tykoxFCoqLg3btIDxcX7u56Tliy3nEZ86c\n4ZtvvgGgQoUKkoiFEEmSlGTsC8Q7hZXT99seMAzjZSC/Usr8un6SjEUK+uQTvXraYuJEvY0J4NSp\nU1SvXp3evXtz7tw5cwIUQjilZ15NbRhGGmAs0CsJz75jGEa4YRjhFy9efNaXTpwkY5FCZs60rS/9\nwQfw3nv684iICF555RUiIyPZuHEjefLkMSdIIYRTSkoyPgPkj3ed736bhRdQAggzDOMEUBFYkdgi\nLqXUN0qpAKVUgI9lpUtyk/OMRQpYs8b2xKWGDWHMGP350aNHqV69OtevXyc0NJQKFSqYE6QQwmkl\nZWvTbsDfMIxC6CQcArSx3FRKXQVyWK4NwwgDeiulwpM31CSSnrFIZjt3QqtWEBurr0uXhvnzrQc/\n7Nixg7t377JlyxZKlSplXqBCCKf12J6xUioG6AasBw4CPyml9huGMcQwjMYpHeATk2QsktGhQ9Cg\ngV5BDVCwoF457eWlT18CaNeuHUeOHJFELIR4akmaM1ZKrVFKFVFKPaeUGn6/7TOl1IpEnq1hWq8Y\nJBmLZHPmjC5tGRWlr3Pk0HuJ8+SBP/74A39/f7Zu3QqAt7e3iZEKIZyd65XDvHvX+rkkY/GUTp6E\nwEA4dUpfZ8igq2sVKQK7du2iZs2aGIZBvnz5zA1UCOESXC8ZS89YPKO//4bKleHwYX3t7g6LF0P5\n8rB9+3aCgoLImjUr27Zt47nnnjM3WCGES5BkLEQ827ZBtWpw9qy+TptWL9aqWxcOHjxI7dq1yZ07\nN9u2baNAgQLmBiuEcBmulYyVkmQsntrSpbqAh6XetKXMZatW+rpo0aL07NmTrVu3yvC0ECJZuVYy\njomxnmXn5mbdeyLEYyxfrpOu5b1c7ty6l1yzJmzatIlTp06RJk0ahg4dKgU9hBDJzrWSsfSKxVNY\ntw5at7buI/b3h+3bdb3plStX0qBBAz788ENzgxRCuDRJxiJVCwuDZs2si/Cffx62btUnMC1evJjm\nzZtTunRppk+fbmqcQgjXJslYpFrbt+uylrfvnzNWoABs3qz3Ef+/vTuPr6q4/z/+GpIgjShQpFSj\ngCKKiFb5IlpFCGGLrGUVEQShBQSqgKJWK+6CVbC2BVwgZRWQRYhRQHYtLaA1lB+bCIgsKiBQZEnM\nNr8/JulNIMtF7n7fz8eDR8+cnHPz6THkzcyZM2fWrFncfffdNGrUiGXLllGlSpXgFisiEU1hLFFp\n7Vo3Q/rUKde+7DIXxDVqQG5uLq+99hp33HEHS5Ys0YIeIuJ33qxNHT4UxuKF1atdj7ggiH/xCxfE\ntWtDXl4eMTExLF68mAoVKnDhhRcGtVYRiQ7qGUtU+egjuOsuTxBXr+6CuG5dmDBhAh06dODHH3+k\natWqCmIRCRiFsUSN1FRo395zj/iyy9xkrfr14c9//jNDhgyhXLnI+ishIuEhsn7zKIylGEeOQL9+\n0LGjZ9Z0jRruOeJrr4U//elPDB8+nC5dujBv3jwu0M+OiARY5IZx+fLBq0NCgrUwdaobgv773z37\nr7rKBXHt2jB27Fgee+wxevTowezZsymvnxsRCYLIDWP1bqLav/8NiYnQty98/71nf5cubiZ1wbLS\nzZo1Y8iQIcyYMYPY2Miazygi4UNhLBFl717o3RsaNnS93wI1asD778O8eVC9umXZsmUANGjQgL/9\n7W/EaOlUEQkihbFEhNxcGDXKvW94xgzP/thYePhh2LrVPc5krWX48OG0atWKJUuWBK9gEZFCImtc\nrmB2DiiMo0hmJtx7LyxYUHT/b34DY8a4SVrgniEeOnQoEydOZNiwYbRu3TrwxYqIFCOywlg946jz\n3/+6WdKFh6RvuQXGjnXvJS6Ql5fHgAEDmDx5Mo8++ihjxozBGBP4gkVEiqFhaglbBw64wC0cxMOG\nwbp1RYMY4J///CcpKSk89dRTCmIRCTnqGUtY+vRTNzN63z7PvldecfeHi8vZxo0b8/nnn3PTTTcF\nrkgRES+pZyxhxVoYPx7uuMMTxLGxMG0aPPJI0SDOysqiZ8+e/5uopSAWkVClMJawceIE9OwJQ4dC\ndrbbV6mSe2Spd++ix/7444907dqVWbNm8eWXXwa+WBGRc6BhagkL+/ZBq1awfbtn3803w9y5biWt\nwjIyMujcuTNLlixhwoQJPPDAA4EtVkTkHKlnLCHvu++gefOiQTxgAPzzn2cHcWZmJu3bt2fp0qVM\nmjRJQSwiYUE9Ywlp338PLVpAwUhzXBxMnnz2sHSB8uXLc80119CnTx96l3SQiEiIURhLyPrvf93Q\n9JYtrh0TA3PmQKdOZx97/Phxjh07Rq1atZgwYUJgCxUROU8KYwlJJ07AXXdBerprGwPTpxcfxMeO\nHaN169YcO3aMLVu26M1LIhJ2FMYSck6fdutIr1vn2Td5Mtxzz9nHfv/997Rq1YotW7Ywb948BbGI\nhKXIDWP9Ug5LmZluTenCq2qNHw/333/2sYcOHaJFixZ8+eWXLFq0iOTk5MAVKiLiQ5EbxuoZh52s\nLOjWDfLfbgjAq6/C4MHFHz9y5Eh27txJWloazZs3D0yRIiJ+oEebJCTk5ECvXpCW5tn3/PNuecuS\nvP7666xatUpBLCJhT2EsIWHkSLeAR4E//AGefPLs4/bs2UO/fv3IyMigcuXK3HrrrYErUkTETxTG\nEnTTp8Of/+xpDxsGL7549gsfdu3aRdOmTXnvvffYvXt3YIsUEfGjyArjrCzPtsI4LHz+uVtNq0Cn\nTu5dxGcG8RdffEGTJk04deoUK1eu5Prrrw9soSIifqQJXBI0hw65mdOZma5drx5MnQrlzvgn4tat\nW0lKSsJay6pVq7jhhhsCX6yIiB9FVs9YYRw2fvgB7r7b8xrESpVg4UK46KKzj83Ly6N69eqsXr1a\nQSwiEUk9YwmozZthwgR3n/jkSbfPGHjnHahTp+ix+/fvJyEhgfr165Oenk65M7vMIiIRIrJ+uymM\nQ9aXX0JSEtxwA0yc6AlicI8wtWlT9Pj169dTv359xo0bB6AgFpGIpp6x+N2GDdC2rXsDU2H16sEj\nj0DfvkX3r127lrvuuotq1arRtWvXgNUpIhIskRPG1iqMQ9DixdC1q1tvGiA21s2YHjIEmjQ5e9b0\n6tWradeuHQkJCaxcuZKEhITAFy0iEmCRE8bZ2Z7t2Nizp+RKwE2bBv37u9W1AKpWhQ8+gJLW6Thy\n5AgdOnSgZs2arFixgl/+8peBK1ZEJIgiJ7HUKw4Z1sILL0CfPp4grlkT1q4tOYgBqlatysyZM1m9\nerWCWESiSuT0jBXGISEjw/WGZ83y7LvxRjdcfdllxZ+zaNEiADp27Ej79u0DUKWISGhRz1h85ptv\noGnTokGclOReh1hSEM+bN4+uXbsyduxYrLWBKVREJMQojMUnNm2CW26BTz/17HvgAViyxC3oUZxZ\ns2bRo0cPbr31VtLS0jBnzuYSEYkSkRnG5csHr44otHs3tGrlesYAMTEwfrxb3CMurvhzpk6dSq9e\nvWjcuDFLlizh4osvDlzBIiIhRveM5bwcOgStW8PBg65dqRLMmwctWpR+3saNG0lKSmLRokXEx8f7\nv1ARkRCmMJaf7MQJt3LWzp2uXaECpKVB48Yln3P8+HEqVarEuHHjyMrK4gL9txIRidBhav2C97us\nLOjSBf79b9cuVw5mzy49iF977TXq1avH3r17McYoiEVE8imM5Zzk5MDMmXDTTbBsmWf/G29Ax44l\nn/fyyy8zYsQIbr/9di699FL/FyoiEkYiZ5g6K8uzrTD2uexst6LW6NGwa1fRrz37LPzudyWf+/zz\nzzNq1Cjuuecepk2bRmxs5PzYiYj4QuT8VlTP2G/27oXu3WH9+qL7L7oInnwSHn205HMnT57MqFGj\n6NOnD5MnTyYmJsa/xYqIhCGFsZRq8WLo1QuOHvXsq1IFhg2D3//ebZeme/fuHD16lIcfflivQRQR\nKUHk/HZUGPtUTo7r9bZp4wni2Fg3JP311zBqVMlBbK3lr3/9KydPnuSiiy5i5MiRCmIRkVKoZyxn\nOXYM7r676ASthASYMwfuuKP0c/Py8hgyZAhvvPEGcXFxDBo0yL/FiohEAIWxFLFjB7Rv7/63QMuW\nbgZ1tWqln5ubm8uAAQNISUnh8ccfZ+DAgf4tVkQkQkTO2KHC+LwtX+5ecVg4iEeNcveNywrinJwc\n+vbtS0pKCk8//TQvvfSS1poWEfGSesYCuLWkH3oIcnNd+2c/g6lToVs3787/9ttvWb58OS+++CJP\nPPGE/woVEYlACuMol53tQnjiRM++hARYtAj+7/+8OT+b2NhYrrjiCrZs2cLPf/5z/xUrIhKhNEwd\nxY4eheTkokF8yy2wYYN3QZyZmUnnzp0ZOXIkgIJYROQnUhhHqe3b3f3hlSs9+3r0gDVr4LLLyj4/\nIyODjh07kpaWRp06dfxXqIhIFIjMMNb7jEu1ZIkL4oK3LQE8/zy88467V1yWU6dO0bZtW5YtW0ZK\nSopmTYuInCfdM44i1sJf/gIjRkBentsXH+/WnO7SxdvPsHTs2JE1a9Ywbdo0evXq5b+CRUSihMI4\nSmRlwdCh8Pbbnn2XXw6pqXDzzd5/jjGGwYMHM2DAALp37+77QkVEopDCOAocPw6dOsGqVZ59t94K\nCxfCL3/p3WccPXqUDRs2kJycTOfOnf1TqIhIlFIYR7hvvoG77oJNmzz77r0XJk2CChW8+4zvv/+e\nli1b8uWXX/LVV19RrawVQERE5JxE5gQuhTEAX3wBt99eNIhfeAGmT/c+iA8ePEhiYiLbt29nwYIF\nCmIRET+InJ5xVpZnW2HMhg3ujUtHjrh2TAxMngx9+nj/Gd988w3Nmzdn7969fPDBByQlJfmnWBGR\nKBc5Yaye8f/s2OFe7vDDD64dHw/z5rnh6nMxa9Ys9u/fz5IlS7jzzjt9X6iIiABeDlMbY5KNMV8Y\nY3YaYx4v5usjjDFbjTGbjDErjDE1fV9qGRTGAGRkuPWkC4K4alW3sMe5BLG1FoARI0awadMmBbGI\niJ+VGcbGmBhgPHAXUA+4xxhT74zD0oGG1tobgXnAn3xdaJkUxgA8+KDnHvEFF8BHH7mZ097auXMn\njRo1Ytu2bRhjuPLKK/1TqIiI/I83PeNGwE5r7W5rbRYwG+hY+ABr7Spr7en85jrgct+W6QWFMdOm\nuVnSBV5/HRo08P787du306RJE/bs2cOPha+niIj4lTdhnADsK9Ten7+vJP2BxedT1E8S5WG8eTMM\nGuRp33svDBhwLudvJjExkdzcXFatWsVNN93k+yJFRKRYPp3AZYzpBTQEmpbw9QHAAIAaNWr48ltH\ndRh//bVbzjIjw7Wvuw7eeAOM8e787du306xZM+Li4li5ciV169b1X7EiInIWb3rGB4ArCrUvz99X\nhDGmBfAk0MFaW+wYp7X2LWttQ2ttQ58/rxqlYbx2rXvt4Y4drl0wc7piRe8/o0aNGrRu3Zo1a9Yo\niEVEgsCbMP4UqGOMudIYUx7oAaQWPsAYczPwJi6ID/m+TC9EYRhPnQpJSXD4sGvHxbn7xvXOnF5X\ngvT0dI4fP058fDwzZszQqxBFRIKkzDC21uYAQ4GlwDbgXWvtFmPMc8aYDvmHvQJUBOYaYzYaY1JL\n+Dj/sLbooh8R/gpFa+Hxx6FvX8//7WrV3NrT3r596R//+AdNmjRhyJAhfqtTRES849U9Y2vth8CH\nZ+wbVWi7hY/rOjeFgzguDspFziqfxXnpJXj5ZU/7hhvc25dq1fLu/NWrV9O2bVuuuOIKXi78QSIi\nEhSRkVqFh6gjvFf87rvwxz962u3bu/vG3gbxsmXLaNOmDbVq1WL16tUkJJQ2MV5ERAIhMpbDjJL7\nxevXF11bulkzN1nL239/ZGdnM3jwYOrUqcPy5cv10gcRkRChMA4TX38NHTtCZqZrX3MNzJ9/bgMB\ncXFxLFmyhMqVK1O1alX/FCoiIucs8oapIzCMv/sO2rWDgwdd++c/hw8+gCpVvDt/7ty5PPjgg1hr\nqV27toJYRCTEKIxDXHo6NGrkVtgCNz/tvffg6qu9O3/mzJn06NGD9PR0MgpWBRERkZCiMA5hCxZA\n48awL38x0nLlICUFmjTx7vwpU6bQu3dvmjZtyuLFi4mPj/dfsSIi8pMpjEOQtfDii+6Z4dP5r9+4\n+GI3NN2rl3efMWnSJO6//35atGhBWloaFc9lSS4REQmoyAjjws8ZR0AYT5hQ9PGl2rVh3TpITvb+\nMy699FI6depEamqqesQiIiEuMsI4gnrGn34Kw4d72omJ7pGm667z7vxt27YB0LZtWxYsWECFChV8\nX6SIiPiUwjiEHDsG3btDdrZr33wzLF4M3k5+Hj16NPXr1+fjjz/2X5EiIuJzCuMQYa1b0GPPHteu\nVAnmzgVvOrbWWp599lmeeOIJ7rnnHm6//Xa/1ioiIr6lRT9CxKuvwvvve9p//7u7V1wWay1PPvkk\no0ePpm/fvkyaNImYmBj/FSoiIj6nnnEImD8f/vAHT3vECOjUybtzly1bxujRoxk4cCCTJ09WEIuI\nhCH1jINswgQYOtQNUwP8+tcwZoz357ds2ZL333+ftm3bYozxT5EiIuJX6hkHibXu8aUhQzxBfO21\n7j5xXFzp5+bl5TFy5Ej+85//YIyhXbt2CmIRkTCmnnEQ5OTAwIFuNa0Ct94KaWlwySWln5ubm8tv\nf/tbpkyZQpUqVfjVr37l32JFRMTv1DMOMGvhoYeKBnGbNrBiRdlBnJOTw3333ceUKVN45pln+EPh\nG80iIhK2Iq9nfC7vFAyCv/7V3Scu0KcPvP122UPT2dnZ3HvvvcydO5eXXnpJQSwiEkEiL4xDuGf8\nwQdFV9e6+27XQy7nxfhEXl4eJ06cYOzYsYwYMcJ/RYqISMApjANk0ybo0QPy8lz7ttvcs8RlBXFm\nZiYZGRlUqVKFtLQ0PbokIhKBdM84AL79Ftq1g5MnXbtmTVi4EH72s9LPO336NB06dCA5OZmcnBwF\nsYhIhFLP2M8OH4YWLTzvJL7oIjdrunr10s87efIk7du3Z82aNaSkpBAbGxn/qURE5GyR8Rs+RMP4\n2DFo1Qq2bnXtmBh4912oX7/083744QfatGnDv/71L2bMmEHPnj39X6yIiASNwthPfvjBvX9440bX\nLlcOZs707p3EAwcOZP369cyePZtu3br5t1AREQm6yLhnnJXl2Q6BMD51yt0j3rDBsy8lxc2e9saY\nMWNYuHChglhEJEpERhiHUM84Nxd69oRPPvHsmzjRPU9cmsOHD/Pcc8+Rl5dHzZo1adu2rX8LFRGR\nkKEw9rHHHoPUVE977FgYNKj0c7777jsSExMZPXo0WwtuMIuISNTQPWMfeustF74FHnnEvQ6xNAcO\nHCApKYn9+/fz4YcfUr+s2V0iIhJxFMY+snw5DB7saf/mN2W/CnHv3r0kJSVx6NAhli5dSuPGjf1b\npIiIhCSFsQ9s2wZdu7r7xQANGsCMGe5RptLs3r2bU6dO8dFHH3Hbbbf5v1AREQlJCuPzdPgwtG0L\nx4+79mWXuXvGF15Y8jknT56kYsWKJCYmsmvXLuLj4wNTrIiIhCRN4DoPmZnQqRN89ZVrx8e71bUS\nEko+Z9u2bdStW5fp06fnn6MgFhGJdgrjn8ha6N8f1q51bWNg1iy4+eaSz9m8eTOJiYnk5OTQoEGD\ngNQpIiKhL/LCOEDvM37+eXjnHU/71VehQ4eSj9+4cSOJiYnExsayZs0arr/+ev8XKSIiYSHywjgA\nPeN334Wnn/a0Bwwo+p7iMx08eJCkpCTi4+NZs2YN1157rd9rFBGR8BH+YZyXB9nZnrafe8a7d8Nv\nf+tpt2gBf/ubG6YuSfXq1Xnuuef4+OOPufrqq/1an4iIhJ/wn01deF3quDj3RgY/yc52S12eOOHa\ntWvD3Lnu2xbnk08+oUKFCtxyyy0MHTrUb3WJiEh4C/+ecQCHqJ99Ftavd9uxsW7CVuXKxR+7YsUK\nkpOTGTZsGNZav9YlIiLhTWHspdWr4aWXPO0XXoBbbin+2KVLl9KuXTuuuuoqFixYgCltDFtERKKe\nwtgLR45Ar17ucSaA5s1h5Mjij01LS6NDhw7UrVuXVatWUb16db/UJCIikUNhXAZrYeBAOHDAtatW\nhWnTSr41PXXqVG688UZWrFjBJZdc4vN6REQk8kTWBC4/hPHMmTB/vqedkuKWvDxTTk4OsbGxzJgx\ng8zMTCpVquTzWkREJDKFf8/4uutcIJ84AevW+fSj9+2DwpOgBw4sfmGPGTNm0KhRI44cOcIFF1yg\nIBYRkXMS/mFsjHu2qGJF8GEI5uVBv36eF0BcdZVbZetMKSkp3HfffVSuXJkKFSr47PuLiEj0CP8w\n9pOJE907isHl/bRpLu8Le/PNN+nfvz8tW7YkLS2NC0t7VZOIiEgJFMbF2LGj6GzpRx+FO+4oesy0\nadMYNGgQbdu2ZdGiRXr7koiI/GQK4zNs3gytW0NGhmvfcINb7ONMzZs358EHH2TBggUanhYRkfOi\nMC4kLQ1+/WvYs8e14+Jg+vSik7QXLlxIbm4uCQkJvP7665QP0FuiREQkcimMcc8Sv/KKmyl98qTb\nV7EivPce/OpXBcdYnnnmGTp16kRKSkrwihURkYgT/s8Z+8Ajj8C4cZ52rVqQmuqGqMEF8RNPPMGY\nMWO4//776devX1DqFBGRyBT1PePFi4sG8Z13woYNRYP44YcfZsyYMQwaNIhJkyYRExMTnGJFRCQi\nRXUYHzkC/ft72u3auceZqlXz7Nu5cydvvvkmDz74IBMmTKCcH1/RKCIi0Smqh6mHDIFvv3Xb1au7\npS4L5mNZazHGUKdOHTZu3MjVV1+tty+JiIhfRG03b/ZsmDPH0377bU+PODc3l379+jFhwgQA6tSp\noyAWERG/icowPnAABg/2tPv3h/bt3XZOTg69e/dmypQpHDlyJDgFiohIVIm6YeqsLOjdG44dc+1a\ntTwTuLKysujZsyfz589nzJgxPPbYY0GrU0REokdUhXFeHtx/P6xa5drGwJQpcPHFkJeXR7du3UhN\nTWXcuHEMHz48qLWKiEj0iKowHjkS3nnH0372WWja1G2XK1eOpk2b0qpVK4YMGRKcAkVEJCpFTRi/\n+mrR54kHDoQ//hFOnz7Njh07uOmmmxgxYkTwChQRkagVFRO4Zswo+hamTp1g/Hg4deokbdq0oVmz\nZhwruIksIiISYBHfM969GwYM8LTvvNMNVZ869QNt2rRh3bp1TJ8+nSpVqgSvSBERiWoRHcbWwgMP\neF6HeN11bs3pjIxjJCcn8/nnnzNnzhy6dOkS3EJFRCSqRfQw9ezZ8NFHbrtg5nTlyjB27FjS09OZ\nP3++glhERILOWGuD8o0bNmxoP/vsM799/tGjrid86JBr//738Je/uO3s7GzS09Np1KiR376/iIhI\nYcaYf1trGxb3tYjtGT/2mCeIExJg6NBv6dy5MwcPHiQuLk5BLCIiISMi7xl/8glMmuRpP/30ftq1\nS+Kbb75h165dVK9ePXjFiYiInCHiwjgryz1DXKBly68ZMyaJw4cPs3TpUm6//fbgFSciIlKMiAvj\n11+Hbdvcdnz8V2zdmsipUz+wfPlyDU2LiEhIiqh7xvv3uyUuCzz6aDy1al3BihUrFMQiIhKyIiqM\nH34YTp0C+Irrr8/miSeq88knn9CgQYNglyYiIlIir8LYGJNsjPnCGLPTGPN4MV+/wBgzJ//r640x\ntXxdaFmWL4d33wX4f8Ct1KkzjLg4MMYEuhQREZFzUmYYG2NigPHAXUA94B5jTL0zDusPHLPWXg28\nBrzs60JLk5UFQ4cCpAPNiI8vz8svPxTIEkRERH4yb3rGjYCd1trd1tosYDbQ8YxjOgJT87fnAc1N\nALukr70GX3zxKZCEMReyYsUarrnmmkB9exERkfPiTRgnAPsKtffn7yv2GGttDnAcqOqLAsuybx88\n++yPQGegCk899TG33VY7EN9aRETEJwL6aJMxZgAwAKBGjRo++cwFCyAj4wJgLnXrXs5TT13uk88V\nEREJFG96xgeAKwq1L8/fV+wxxphYoBJw5MwPsta+Za1taK1tWK1atZ9W8RkeegiWLYO6dW/jrbcu\nJzbinpwWEZFI5010fQrUMcZciQvdHkDPM45JBfoA/wK6AittAN9A0aIFbN4MMTGB+o4iIiK+U2YY\nW2tzjDFDgaWEOmlCAAAEy0lEQVRADJBird1ijHkO+MxamwpMBqYbY3YCR3GBHVAKYhERCVdeDepa\naz8EPjxj36hC25lAN9+WJiIiEh0iagUuERGRcKQwFhERCTKFsYiISJApjEVERIJMYSwiIhJkCmMR\nEZEgUxiLiIgEmcJYREQkyBTGIiIiQaYwFhERCTKFsYiISJApjEVERIJMYSwiIhJkCmMREZEgUxiL\niIgEmbHWBucbG3MY+NqHH3kJ8L0PPy9a6TqeP13D86dreP50Dc+fr69hTWttteK+ELQw9jVjzGfW\n2obBriPc6TqeP13D86dreP50Dc9fIK+hhqlFRESCTGEsIiISZJEUxm8Fu4AIoet4/nQNz5+u4fnT\nNTx/AbuGEXPPWEREJFxFUs9YREQkLIVdGBtjko0xXxhjdhpjHi/m6xcYY+bkf329MaZW4KsMbV5c\nwxHGmK3GmE3GmBXGmJrBqDOUlXUNCx3XxRhjjTGa1VoMb66jMaZ7/s/jFmPMO4GuMdR58fe5hjFm\nlTEmPf/vdJtg1BmqjDEpxphDxpjNJXzdGGP+kn99NxljGvilEGtt2PwBYoBdwFVAeeA/QL0zjhkM\nvJG/3QOYE+y6Q+mPl9ewGRCfv/2AruG5X8P84y4CPgbWAQ2DXXeo/fHyZ7EOkA5UyW//Ith1h9If\nL6/hW8AD+dv1gD3BrjuU/gBNgAbA5hK+3gZYDBjgNmC9P+oIt55xI2CntXa3tTYLmA10POOYjsDU\n/O15QHNjjAlgjaGuzGtorV1lrT2d31wHXB7gGkOdNz+HAM8DLwOZgSwujHhzHX8HjLfWHgOw1h4K\ncI2hzptraIGL87crAd8EsL6QZ639GDhayiEdgWnWWQdUNsZc6us6wi2ME4B9hdr78/cVe4y1Ngc4\nDlQNSHXhwZtrWFh/3L8KxaPMa5g/lHWFtfaDQBYWZrz5WbwGuMYYs9YYs84Ykxyw6sKDN9fwGaCX\nMWY/8CHw+8CUFjHO9XfmTxLr6w+UyGGM6QU0BJoGu5ZwYowpB4wD+ga5lEgQixuqTsSN0HxsjLnB\nWvvfoFYVXu4Bplhrxxpjfg1MN8bUt9bmBbsw8Qi3nvEB4IpC7cvz9xV7jDEmFjcscyQg1YUHb64h\nxpgWwJNAB2vtjwGqLVyUdQ0vAuoDq40xe3D3mVI1iess3vws7gdSrbXZ1tqvgB24cBbHm2vYH3gX\nwFr7L6ACbs1l8Y5XvzPPV7iF8adAHWPMlcaY8rgJWqlnHJMK9Mnf7gqstPl34QXw4hoaY24G3sQF\nse7Rna3Ua2itPW6tvcRaW8taWwt3372Dtfaz4JQbsrz5+7wQ1yvGGHMJbth6dyCLDHHeXMO9QHMA\nY8x1uDA+HNAqw1sqcF/+rOrbgOPW2m99/U3CapjaWptjjBkKLMXNIkyx1m4xxjwHfGatTQUm44Zh\nduJuyvcIXsWhx8tr+ApQEZibP/dtr7W2Q9CKDjFeXkMpg5fXcSnQyhizFcgFRlprNdKVz8tr+DDw\ntjFmOG4yV191UDyMMbNw/+C7JP+++tNAHIC19g3cffY2wE7gNHC/X+rQfxMREZHgCrdhahERkYij\nMBYREQkyhbGIiEiQKYxFRESCTGEsIiISZApjERGRIFMYi4iIBJnCWEREJMj+P50h0JPWRc/hAAAA\nAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = pd.concat(dfs)\n", + "import statsmodels.api as sm\n", + "thresh = 0.001 # POSSIBLE BUG? several very small pivots -- fine for pvalues\n", + "grid = np.linspace(0, 1, 101)\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.plot(grid, sm.distributions.ECDF(results['pivot'][results['pivot'] > thresh])(grid), 'b-', linewidth=3, label='Pivot')\n", + "plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value')\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "plt.legend(fontsize=15);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-slideshow", + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/algorithms/ROSI.Rmd b/doc/source/algorithms/ROSI.Rmd new file mode 100644 index 000000000..9ed0517e1 --- /dev/null +++ b/doc/source/algorithms/ROSI.Rmd @@ -0,0 +1,110 @@ +--- +jupyter: + jupytext: + cell_metadata_filter: all,-slideshow + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.1' + jupytext_version: 1.1.1 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +# LASSO when conditioning on less: ROSI + +Instead of conditioning on the active set and signs, +one can work in the full model and for each feature $j$ selected +construct p-values and confidence intervals +conditional only on the event $j$ was selected. +This is the approach of [Liu et al.](https://arxiv.org/abs/1801.09037), which +can be extended as ROSI (Relevant One-step Selective Inference) +beyond squared-error loss (described in forthcoming work, though +code is already available). + + +```{python} +import numpy as np, pandas as pd +import matplotlib.pyplot as plt +import statsmodels.api as sm +# %matplotlib inline + +from selectinf.tests.instance import gaussian_instance # to generate the data +from selectinf.algorithms.api import ROSI + +``` + +We will know generate some data from an OLS regression model and fit the LASSO +with a fixed value of $\lambda$. In the simulation world, we know the +true parameters, hence we can then return +pivots for each variable selected by the LASSO. These pivots should look +(marginally) like a draw from `np.random.sample`. This is the plot below. + +```{python collapsed=TRUE} +np.random.seed(0) # for replicability + +def simulate(n=500, + p=100, + s=5, + signal=(5, 10), + sigma=1): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0., + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p) + L = ROSI.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat) + soln = L.fit() + active_vars = soln != 0 + + if active_vars.sum() > 0: + projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth)) + S = L.summary(truth=projected_truth) + S0 = L.summary() + + pivot = S['pval'] # these should be pivotal + pvalue = S0['pval'] + return pd.DataFrame({'pivot':pivot, + 'pvalue':pvalue}) +``` + +Let's take a look at what we get as a return value: + +```{python} +while True: + df = simulate() + if df is not None: + break +df.columns +``` + +```{python} +dfs = [] +for i in range(200): + df = simulate() + if df is not None: + dfs.append(df) +``` + +```{python} +results = pd.concat(dfs) +import statsmodels.api as sm +grid = np.linspace(0, 1, 101) +fig = plt.figure(figsize=(8, 8)) +plt.plot(grid, sm.distributions.ECDF(results['pivot'])(grid), 'b-', linewidth=3, label='Pivot') +plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value') +plt.plot([0, 1], [0, 1], 'k--') +plt.legend(fontsize=15); +``` diff --git a/doc/source/algorithms/ROSI.ipynb b/doc/source/algorithms/ROSI.ipynb new file mode 100644 index 000000000..11996ef3d --- /dev/null +++ b/doc/source/algorithms/ROSI.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LASSO when conditioning on less: ROSI\n", + "\n", + "Instead of conditioning on the active set and signs, \n", + "one can work in the full model and for each feature $j$ selected\n", + "construct p-values and confidence intervals\n", + "conditional only on the event $j$ was selected.\n", + "This is the approach of [Liu et al.](https://arxiv.org/abs/1801.09037), which\n", + "can be extended as ROSI (Relevant One-step Selective Inference)\n", + "beyond squared-error loss (described in forthcoming work, though\n", + "code is already available).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np, pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "%matplotlib inline\n", + "\n", + "from selectinf.tests.instance import gaussian_instance # to generate the data\n", + "from selectinf.algorithms.api import ROSI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will know generate some data from an OLS regression model and fit the LASSO\n", + "with a fixed value of $\\lambda$. In the simulation world, we know the\n", + "true parameters, hence we can then return\n", + "pivots for each variable selected by the LASSO. These pivots should look\n", + "(marginally) like a draw from `np.random.sample`. This is the plot below." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "np.random.seed(0) # for replicability\n", + "\n", + "def simulate(n=500, \n", + " p=100, \n", + " s=5, \n", + " signal=(5, 10), \n", + " sigma=1): \n", + "\n", + " # description of statistical problem\n", + "\n", + " X, y, truth = gaussian_instance(n=n,\n", + " p=p, \n", + " s=s,\n", + " equicorrelated=False,\n", + " rho=0., \n", + " sigma=sigma,\n", + " signal=signal,\n", + " random_signs=True,\n", + " scale=False)[:3]\n", + "\n", + " sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p)\n", + " L = ROSI.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat)\n", + " soln = L.fit()\n", + " active_vars = soln != 0\n", + " \n", + " if active_vars.sum() > 0:\n", + " projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth))\n", + " S = L.summary(truth=projected_truth)\n", + " S0 = L.summary()\n", + "\n", + " pivot = S['pval'] # these should be pivotal\n", + " pvalue = S0['pval']\n", + " return pd.DataFrame({'pivot':pivot,\n", + " 'pvalue':pvalue})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at what we get as a return value:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['pivot', 'pvalue'], dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "while True:\n", + " df = simulate()\n", + " if df is not None:\n", + " break\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = []\n", + "for i in range(200):\n", + " df = simulate()\n", + " if df is not None:\n", + " dfs.append(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeMAAAHSCAYAAADfUaMwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzde3yO9R/H8de12cz5fByico7CnI/Z\nHEKI2CgSRUgnogMRUTlVkgoVyWEIySlnOklUv4SSYznkfDY7Xr8/vubebM7bfd27934+HnvY9b2v\nts8ye+/7vb4Hy7ZtRERExDk+ThcgIiKS3imMRUREHKYwFhERcZjCWERExGEKYxEREYcpjEVERByW\nwalPnDdvXrt48eJOfXoRERG32rx58zHbtvMl95pjYVy8eHE2bdrk1KcXERFxK8uy9l3tNQ1Ti4iI\nOExhLCIi4jCFsYiIiMMUxiIiIg5TGIuIiDjMsdnUN+LMmTMcOXKE6Ohop0uRa/Dz8yN//vxkz57d\n6VJERNIkjw3jM2fOcPjwYQIDA8mUKROWZTldkiTDtm0iIiI4cOAAgAJZROQWeOww9ZEjRwgMDCRz\n5swKYg9mWRaZM2cmMDCQI0eOOF2OiEia5LFhHB0dTaZMmZwuQ25QpkyZ9DhBROQWeWwYA+oRpyH6\nuxIRuXUeHcYiIiLpgcJYRETEYQrjVDRkyBAsy7r8VrhwYdq2bcuuXbsA6NKlC0FBQSn+eXfs2MGQ\nIUM4depUin9sERFJeR67tMlb5MiRg2XLlgGwe/duBg0aRHBwMFu3bmXQoEFERESk+OfcsWMHr7/+\nOl26dCFnzpwp/vFFRCRlXTeMLcv6FGgBHLFt+55kXreA94BmwAWgi23bv6R0oWlVhgwZqFGjBgA1\natSgWLFi1K1blyVLltCuXTuHqxMREU9wI8PUU4Cm13j9AaDkpbfuwIe3X5b3qlKlCgB79+5NNEy9\nZ88eLMti8eLFie6PjY2lYMGCDBw48HLb6tWrqV69OgEBARQoUIBevXpx7tw5ANauXcuDDz4IQIkS\nJbAsi+LFi7vhKxMR8R4XL8KuRdvd9vmuG8a2ba8HTlzjllbA57axAchpWVahlCrQ2+zduxeAggUL\nJmovUaIE1apVY/bs2Yna161bx+HDhwkLCwNg69atNG3alLx58/Lll1/y+uuvM2PGDB5++GEAKleu\nzOjRowGYN28eP/74I/Pnz0/lr0pExHtE7D7EhhJhlHiwPH9N3eCWz5kSz4wDgX8TXO+/1HYoBT52\nIp6wlNW2b/6/iYmJAcwz4169epEtWzZCQkJYtWpVovvCwsJ4/fXXiYyMJGPGjACEh4dTvnx57rnH\nPCEYNmwYd9xxBwsXLsTX1xeA3LlzExoayo8//kjNmjUpXbo0AJUqVVKvWETkRsXFEfX+x1x8YQBz\n4s5SHIju1oP/Gm6iYFG/VP3Ubp1NbVlWd8uyNlmWteno0aPu/NSOOX78OH5+fvj5+VG6dGl2795N\neHg4hQolHTxo3749Z86cuTzhKyYmhnnz5hEaGnr5no0bN/LQQw9dDmKAtm3bkiFDBr777rvU/4JE\nRLzR//5HbI1a+D/Xi4FxZ5kArAFiy99LwRwpP9H2SinRMz4AFE1wXeRSWxK2bU8EJgIEBQXdQh8z\n7cmRIwcrV67EsiwKFixI4cKFr7pbVWBgIHXq1CE8PJxWrVqxatUqjh07dnmIGuDQoUMUKFAg0X/n\n6+tLnjx5OHHiWk8TREQkiV27YMgQ7OnT8b009NkfuIMC+D0+g3s/beiWMlKiZ7wQ6GwZNYDTtm2n\n+BA1mCFip99uVoYMGQgKCqJKlSoEBgZed9vI0NBQvv76ayIiIggPD6dSpUqULFny8uuFChVKciBD\nbGwsx48fJ3fu3DdfoIhIerR/P/ToAWXKwBdfcMG2GQ1E4MdnDMYaupdH3RTEcANhbFnWTOBHoLRl\nWfsty+pmWdZTlmU9demWJcBuYCcwCeiVatWmA+3atSMiIoL58+czf/78RL1igOrVqzN//nxiY2Mv\nt82bN4+YmBjq1KkDgL+/PwAXL150X+EiImnBjh3w5JNw110wcSLExHAWsyxoAFCaz8kycggvDgpw\na1nXHaa2bbvDdV63gd4pVlE6lz9/fho0aEC/fv04deoU7du3T/T6wIEDqVSpEq1bt6Znz57s37+f\nAQMG0KRJE2rWrAlweQLXxx9/TFhYGJkzZ6ZChQpu/1pERDyCbcOGDTBmDMybl2iY8xRQm2xs4wIw\nnefGhPLCC+4vUdtheqCwsDAOHTpEjRo1ksyGLl++PEuXLuXIkSO0adOGgQMH0qFDB+bOnXv5njvu\nuIPRo0czb948ateufXndsYhIuhEdDStXQp8+cMcdUKsWfPlloiD+o0gQd1CKbVwE5jBggDNBDGDZ\nt/IgNAUEBQXZmzZtuurr27dvp2zZsm6sSG6X/s5ExFG2DT/+CNOmQXg4nDyZ/H3NmrGq6kuEvG4B\nDwLTCAtrwfTp4JOKXVTLsjbbtp3sgQTam1pERNK2Cxdg7FiYMsXMjk5OrlzQqhU8/zxLD5SmdeuM\nl17YQ716OZkyJXWD+HoUxiIiknbt3w8tW8KvvyZ9rWhReOghaN0a6tQBPz8WLDhImzaVsO0Xgccp\nUyYn8+dDxoxJ/3N3UhiLiEja9NNPJmj/+8/VliMHtGsHnTtD7dqJurtff/0vbdo0xLb/A+4mMBCW\nLgVPWBWqMBYRkbRnxgzo2hUiI811hgwwerRZOxyQdFnS4sV7aN26IbZ9AlhOgQI1Wb0aPGXHYIWx\niIikDUePwqxZZoLWzz+72nPnhrlz4f77k/3PVq48TcuW9YmLOwesIk+eIFauhFKl3FP2jVAYi4iI\n5zp2DBYtMsuSli2DSwfvXFa2LCxcCHffneQ/tW2YMAGefz4HcXEvAA3ImfM+VqyAS2fveAyFsYiI\neJbdu+Grr2DBAvjuO4iLS3qPnx906ADjxpnnxFc4fx7CwrayaNEFoCrwHLlymWfElSql+ldw0xTG\nIiLiLNuG336D+fNNAG/ZcvV7a9WCTp2gffurzrzauxcaNfofO3eGAPmB36lc2Ze5c6FEidT4Am6f\nwlhERJxz8CB07w6LFyf/umVBzZpm1nSbNmZP6WvYvBkaN97EiRONgSzAArp182X8+GTndXkMbYeZ\nioYMGYJlWZffChcuTNu2bdl1tUXpKcyyLMaPH++WzyUiclNsG6ZPNw9vrwzijBmheXOYNAkOHYLv\nv4cXX7xuEC9dCnXqbODEiWAgB35+65k0qSSTJ3t2EIN6xqkuR44cLFu2DIDdu3czaNAggoOD2bp1\nK1myZHG4OhERBxw+DD17mmHphEJDzRrhJk0ga9ab+pCffmo62LGxHwD5yJ59NV9/XYx69VKu7NSk\nME5lGTJkoEaNGgDUqFGDYsWKUbduXZYsWUK7du0crk5ExM3mzDFBfPy4q614cfjsM2jQ4JY+5Oef\nQ7duNmABkwkMPMXy5QUoVy4F6nUTDVO7WZUqVQDYu3dvsq+XKFGCF198MUl7u3btLp9XfP78eZ5+\n+mlKly5N5syZKVGiBL179+bMmTPX/NzFixenX79+idqmTJmCZVmcO3fuctuJEyfo3r07BQoUICAg\ngFq1avHTTz/dzJcpIpLY8eMQFmYmXiUM4h494PffbzmIDx6Enj2XAzWAY9x3X0Y2bkxbQQwKY7eL\nD+GCBQsm+3r79u2ZM2dOorZz586xePFiwsLCALhw4QKxsbEMHz6cpUuXMmzYMFavXp0iPe3IyEhC\nQkJYuXIlo0aNYsGCBeTLl4+QkBD+S7jlnIjIjTh2DD74AMqXNycpxStSBL75Bj76CLJlu+UP//DD\ni7lw4UEgkhIlbNatg8KFb79sd9MwtRvEXFqkvnv3bnr16kW2bNkICQlJ9t6wsDBGjhzJhg0bLg9v\nf/3110RFRV0O23z58vHhhx8m+vglSpSgTp06/PPPPxQrVuyWa/3iiy/4448/2Lp1KyVLlgQgJCSE\n0qVLM2bMGEaNGnXLH1tE0onISPj6a7NT1pIlSTfqePxxeOedZNcH34wBAxbw44/tgYrAcj75JDfZ\ns9/Wh3RM2gpjy3K6gkQHU9+I48eP4+fnd/m6WLFihIeHU7BgwcshDWbms6+vL5UqVaJUqVKEh4df\nDuPw8HDq169PgQIFLt8/bdo0xo4dy99//8358+cvt+/YseO2wnjlypVUqVKFEiVKJKqvfv36XOv8\naRER9u0zPd1PPjFbV16pUCGYOBFatLjtTzVr1iJGjmwHBAFLeeKJnFfbDTNN0DB1KsuRIwc///wz\nmzZtYv/+/ezdu5cHHniAdevW4efnd/ktODj48n8TGhrKnDlzsG2bM2fOsGzZsstD1ADz58+nc+fO\n1KxZkzlz5rBhwwbmX5qVePHixduq99ixY2zYsCFRbX5+fnz22Wf8+++/t/WxRcQLnTsH8+aZYwzv\nvBPeeitpENeoYYaqt29PkSAG+PrrykBH4BsKFsxJWh+0S1s94zQoQ4YMBAUFJWmvUqUKPyfY6Dxb\ngmcmoaGhDBs2jO+++449e/YQFxdHmzZtLr8+Z84cqlevzoQJEy63rVu37rq1BAQEEBUVlajt5MmT\nia5z585NUFBQomHweBmdPvBTRDzDiRMmgL/6ClascJ2clFDRomY4+tFH4dIjr5SwZs0azp2ry4wZ\nhYGpgMn5nDlT7FM4Im2F8U0OEXuybNmyJRvSAOXLl+eee+4hPDycPXv2EBISQp48eS6/HhERkSQY\np0+fft3PWaRIEbZv356obfny5Ymug4ODWb58OcWKFSN//vw3+uWISHrw778wdqwZar5wIfl7GjeG\nXr3Mph0ZUjZiJk6cSI8ePfDxGQWYlSEPPWQ25krr0lYYpyOhoaG89957nD59mkmTJiV6rVGjRvTu\n3Zvhw4dTvXp1lixZwqpVq677MR966CH69OnDiBEjqFq1Kl9++SVbt25NdE/nzp356KOPaNCgAf36\n9ePOO+/k+PHjbNy4kYIFC/L888+n6NcpIh7Ots0ek+PHmx2zrpyMBVChArRqZfaMTqVzCd9/fzzP\nPNMHaEZc3NMABAaaXrE3UBh7qLCwMAYNGkTGjBlp3bp1otd69OjB7t27ee+997h48SKNGjVixowZ\nlyd8XU337t3ZtWsX48aNIzIyks6dOzNw4EB69Ohx+Z6AgADWrFnDa6+9xuDBgzl8+DD58+enWrVq\ntGzZMlW+VhHxMNHRsG6d6+Sk/fuT3lOhAnTpYkL4OttU3q5Ro8bSv39foBUQDmSkfHmz/WWhQqn6\nqd3Gsh0a+g0KCrKvNTt3+/btlC1b1o0Vye3S35lIGmbbsHGjWY40a1bijTkSqlcPXnoJmjZ1ywqX\n2bMP0KFDaeLimgHTAT8aNDA7aaa158SWZW22bTvZ55PqGYuIpGfnzsGECWY50o4dyd+TK5eZBd2z\npzlByQ127jRnQyxYEAj8CJQFMhAWBlOmmLMkvInCWEQkPbp4ET7+GEaMgCNHkr4eGAht25ph6Lp1\nIcF+CakpOhqGD7cZNmwgcXGFgKeBCmTODIMGQf/+4OOFi3IVxiIi6YVtw59/wrJlZlb0lc+Cs2aF\nhx82E7EaNHB76m3ZAp072/z2Wz9gLPAUYNO5s8WIEeb3A2+lMBYR8WbR0TB3rjkzePVqcz7wlYoU\nMd3ORx+FzJndXmJMDIwaBa+9ZhMT8wwwHuhD9erv8f77FlWrur0kt1MYi4h4o/PnYfJkGDPGrA9O\nToEC8Mor5iDggAD31neJbcNjj8GMGTbQE/gYX9++vP32KJ57zsLX15Gy3M6jw9i2bSxP2I9arsup\nWfkicoXTp+Hdd2HcOLNT1pVy5oT77zebc3TqBFmyuL/GBKZOhRkzwJxFXJpChV5m1arhlC2bvn72\ne2wY+/n5ERERQWYHhkzk5kVERCQ6EENE3Oz8eRPAo0bBFdvcki8f9O5tZkTfdx+e0t3cuRN6944B\ndgDl6NLleSZNSvGNu9IEj/2S8+fPz4EDBwgMDCRTpkzqIXso27aJiIjgwIEDiU6VEhE3OX4cPv/c\nHNBw5azoEiXM+qAuXSBTJkfKu5roaAgLi+bChUeAZdx111988EGhdBnE4MFhnP3SoZQHDx4kOjra\n4WrkWvz8/ChQoMDlvzMRSWWRkWZC1rRp5s8rf0bedRcMGQJhYR7bzRw4MJLNm0OBr/DxGcOcOYWc\nmDvmMTzzb+mS7Nmz6we8iKRvMTGwZAn8+qtZlvTnn/DXXxARkfTeokXhtdfMjCgPfmy0YsVFRo5s\nCywB3uftt5+mUiWnq3KWR4exiEi6FRcH4eEweDD8/fe1761WzRxX2KWLY7Oib9ShQ9C27ThgKfAx\nwcHdeeEFp6tynsJYRMST2DYsXGjW/W7ZcvX7SpSARx4xa4NLl3ZffbchKsrsKXL27PNAFXLnDmbq\nVO/cUetmKYxFRDyBbcOKFTBwIPz8c+LXcuY0Q88VKkCZMiZ88+Z1ps5bdObMGapWfZYdO94CCuDj\nE0x4uHfvqnUzFMYiIk777jt49VVYvz5xe5Ys8Nxz0K9f2juiKIFTp05RuXJT9uzZDDwMNGfkSAgJ\ncboyz6EwFhFxwsWLMGeOOTFpw4bEr2XMCL16maMK8+d3pr4UcuLECWrVasyePb8Dc4HmdOyInhNf\nQWEsIuIutg2//w4zZ5ojC48dS/x6hgzwxBNmqNoLxm+PHj1K9eqN2LPnT2AB0Iz77oNJk9xyFHKa\nojAWEUlNcXFm+HnBAvO2b1/Se/z9oWNHM2nrzjvdX2MqsG0YP95mz54MwNdAI/LkgfnzHTmLwuMp\njEVEUkNkJEyfDiNHmnXBySlWDJ56Crp1S/PD0Qn9889hBg7MzbRp+YGNgA933GEmiRcv7nBxHkph\nLCKSks6fh48+MucFHzyY9PUcOaBZM7M7VvPmHrNPdEr5449/qFq1IRcv1gM+BXyoUwe+/NKrft9I\ncQpjEZGUsnat2Xxj797E7dmzmzXBDz0E9eubYWkvtGnTHmrXbkhU1EmgB2D+d3z4oZmTJlenMBYR\nuV0XLpiZz++/n7i9YEF4/nno0cP0iL3Y+vV/ExzckJiYC8AqoApvv23OqdBkretTGIuI3IqoKLND\n1s8/myHphFtW5soFw4ebbqGHb0+ZErZvjyU4uBUxMReB1fj43MukSdC1q9OVpR0KYxGRGxUdDR9/\nDF98Ab/9ZiZpXal5c5g4EQoXdn99Dli/Htq29SUm5lMgG35+5Zkxw2x7KTdOYSwicj22DUuXmp0q\nrjYzOnt2ePddc1hDOhmXHTz4N9544zvi4p4GapA5s1m61Lix05WlPQpjEZGrOXkSNm6Ed96Bb75J\n+nqJElC1qjk1qUOHdNMbjomBzp03MXNmYyAr0JkCBbLz1VdQvbrT1aVNCmMRETBLkn791TwDjn/b\nuTPpfdmzmx2yHn88zR3WkBIiIiAk5Ed++KEpkBtYw333mSAuVszp6tIuhbGIpF8XLsCoUWYR7Nat\nZresq/HxgSefhKFD0+2C2QsXoF699Wze3BwoCKzmoYeKMm2aOdNCbp3CWETSp6VLoXdv2LPn6vf4\n+UHFimbstUcP8346df48tGwJmzf/BRQBVvHSS4UZPlznEacEhbGIpC8HD5q1v7NnJ263LHNWcNWq\nrufAFSumi6VJ13PuHDRteobvv88OPAl0YtiwAAYOdLoy76EwFhHvFxkJixbBtGmwZIlZohQvd254\n6y2zPWW2bM7V6KFOn4ZatRaxbVtnYAlQgzffDOCll5yuzLsojEXEO128CKtXm7U2c+fCqVNJ7+nc\nGUaPhnz53F9fGnD0KFSvPp89e0KBe4FSjBoF/fo5XZn3URiLiPewbRO+M2eaZ8Lnzyd/X40aZoes\nhg3dW18a8u+/UL36bA4d6ghUBZbxzjs5eO45pyvzTgpjEfEOf/4JPXuawxqSU7w4PPoodOoEpUq5\ns7I0Z8cOqFfvew4f7gDUxrIWM3lyNm1vmYoUxiKStl28CG++aZ77RkUlfq1kSWjd2rzVqKFpvzfg\nhx/M/66jR2sAI8iQ4Wlmzsyi7S1TmcJYRNKubdvMsYQ7drjafH3hmWfMmuAyZdLN1pQpYepU6NZt\nGrGx9wNFyJx5APPmQZMmTlfm/fRrooikTevXQ+3aiYO4enXYvNmcolS2rIL4BsXGQv/+0KXLOGJj\nOwNvkzcvrFihIHYX9YxFJO2ZPds8+40fls6SxcyK7t5dQ9E3KSICQkPh669HAf2BhyhXbgyLF5vH\n7OIeCmMRSTtiY82hDS++6GorUMCsHa5c2bm60qjISGjbFpYuHQ4MBEJp3nwaM2f6acm1mymMRcTz\nHTsGn34KH32UePvK0qVh2TJ14W5BdLQ5aGrp0ghgNtCJfv0+5a23MuDr63R16Y/CWEQ804UL5qHl\n3LkwZ47pxiVUuzYsXGh20JKbEhsLnTvbzJ8fA2QC1vHKK9kYPlwp7BSFsYh4jqgoCA83pygtX24e\naF4pVy5zaMPgwdo3+hZERUH37jazZvUFdgJf0rdvTt54w+nK0jeFsYg4z7Zh8WLo2zfx7OiEqlaF\nXr3MbKNMmdxbn5f4/Xfo3DmO//3vGeAD4Bl69szAqFGaeO40hbGIOGvrVnOK0ooVSV8rW9bsQNG2\nLVSp4v7avERMDLz9NgwZEkdMTA9gMtCPLl1GMn68pSD2AApjEXHGuXPw2mswbpx5iBkvRw7TQw4N\n1baVKeDYMWjeHDZuBHgemIyv76uMHDmM556ztBLMQyiMRcT9vvoKnn4a9u93tfn4mHXCQ4fqFKUU\nEhlpNigzQQzQmaJFA1mxoj+lSztZmVxJvxOJiPv8+adrr+iEQXz//fDbb/DhhwriFGLb8NRT8N13\n0cBcLAtGjKjC7t0KYk90Q2FsWVZTy7L+sixrp2VZSY6UtiyrmGVZayzL+tWyrN8ty2qW8qWKSJq1\ncSO0aQPlyplecbx8+eCLL2DVKqhQwbn6vNCoUTBlSiTwMNCOp5/exMsvQwaNh3qk6/61WJbli5l2\n1wjYD/xsWdZC27a3JbhtIDDbtu0PLcsqBywBiqdCvSLiyWJi4KefYN8+OHAADh40e0V/+23Se598\n0py0pHXCKe6rr2DAgAigLbCUGjU+4L33gpwuS67hRn5HqgbstG17N4BlWbOAVkDCMLaB7JfezwEc\nTMkiRcTDXbgAn31m9ofeu/fa97ZoAa++ao40lBT3yy/QseMFzI/pVZQsOYl1657QjGkPdyNhHAj8\nm+B6P1D9inuGAMsty+oDZAFCUqQ6EfFcZ87AX3/BN9+YGdFHj179Xl9f6NjRHA10zz3uqzGd+ftv\naNoULlxYC6wlb97P+OGHx/D3d7oyuZ6UenrQAZhi2/YYy7JqAtMsy7rHtu24hDdZltUd6A5QrFix\nFPrUIuI2CxbABx+Yc4QPXmUALE8eMyErMNC8FS4M9epB0aLurTWdOXgQGjWyOXrUApqRLdtfrFt3\nJ3nzOl2Z3IgbCeMDQMJ/RUUutSXUDWgKYNv2j5ZlBQB5gSMJb7JteyIwESAoKMi+xZpFxN2iosxJ\nSePGXf2eYsWgXz/o2tUcaShuc/IkhIScZN++1sBAMmVqxLJld1KunNOVyY26kTD+GShpWVYJTAiH\nAR2vuOcfIBiYYllWWSAAuMaYlYikGQcPQrt28MMPidv9/aFkSShTBlq1grAw8PNzpsZ07OxZaNLk\nONu3NwL+wMcngi+/hFq1nK5MbsZ1w9i27RjLsp4GvgF8gU9t295qWdZQYJNt2wuBvsAky7Kex0zm\n6mLbtnq+ImndunVmJ6zDh11tDz1kZkHfeafWyTjszz+hZcsj/P13CLAD+IrPP3+ABx5wujK5WTf0\nL8m27SWY5UoJ215L8P42oHbKliYijrFtGDsWBgxwbVXp42NCuF8/nSrgAebOhS5dTnL+fANgL7CI\nd98N4ZFHnK1Lbo1+rRWRxM6eNc995851teXLB7NmQcOGztUlAERHw8svw5gxYFaShuDv35bJk+vT\nqZPDxcktUxiLiMu2beaEpD//dLXVqAFz5kCRIs7VJQBs2QKPPQa//roP80SwOHfdNY4vv4R773W6\nOrkd2ptaRCAuDt55xxxTmDCIe/c2z40VxI6KiYE33zR/Pb/+uhuoB7ShRQubTZsUxN5APWOR9G7X\nLnj88cRbVmbKBJMmoQeQztu710xU/+knMJO0GgIRPPfcfMaM0RGI3kJhLJJe2TZ89JGZkHXhgqv9\n3nth2jQd3OABtm2DRo3i91fZBgSTIUMsc+asoXXrig5XJylJYSySHh0/biZpLVzoavP1hVdegYED\n0f6Jztu0yWxtefy4ubas/mTJYvP992upWFG7eXgbhbFIerN2LTz6qDlVKV758jB1qnkoKY5buxZa\ntjQT28FsaDZ9+jTKlTtGyZIlHa1NUofCWCQ9OHXKTMRavBgmTzZD1PGeeQbefhsCApyrTy5bssQc\n/RwZCbARf/8xLF06lbp1cwG5HK5OUovCWMQbXbgA338Pq1fDqlXmTOG4uMT35MkDU6aYIw3FI3z9\ntVlZFh0N8AOW1ZR8+fJy553HMQfoibdSGIt4i3/+gZkzYelS+PFHc7jD1TRsaCZpFS7svvrkmubP\nNzuPmiBej2U14447CvPtt6sJDFQQezuFsUhaduaM2Slr2jQzDH21LeEtyzwPDg4203Pvvx+tifEc\nc+dChw5mPTGsxrJacNddxVm/fhWFChVyujxxA4WxSFoTEwPLl5sAXrAALl5M/r5y5UwPODgY6teH\nXHre6ImmTze7asVvAV6sWF4CA6uzYEE4+fPnd7Y4cRuFsUhasW+fOU/4iy/gyJGkr/v4mOB95BFo\n0gQKFnR/jXLDbBtGjTJncRjbKVWqDGvWVKRQodVYOowjXVEYi3i6P/6AkSPN82AzjplYxYrQqRN0\n7KhnwGlEbCw8+yx88EF8y5dAGE8+OYHChZ8EFMTpjcJYxBNFRsKiRfDpp2aty5UKFzbh26mTCWNJ\nMy5cMH91X30V3zILeJRq1arz5JPtHaxMnKQwFvEUtm1mQU+dCrNnm7XBV2rQAF580QxD+/q6vUS5\nPXv2mKVLv/4a3zINy+pC7SxTf7IAACAASURBVNp1WLJkEdmyZXOyPHGQwljEaXFxppv09tvxpwEk\nZlnQurV5uFi9uvvrkxSxZInZ+OzkyfiWffj4dKVBgwYsXLiQLFmyOFmeOExhLOKUqCgzGWvUqMTH\nFsYrUcL89O7cGe6+2/31SYqIjYWhQ2HYMNfKMz8/GDfuDu66awl16tQhU6ZMzhYpjlMYi7jb2bMw\ncaI5Pzjh/tBgDmjo1Am6dIHatU2vWNKsixfN+uEFC1xtOXO+z8CBJXjqqRZAI8dqE8+iMBZxlyNH\n4P33Yfz4pM+Ds2eHnj3NFFtt8uAVzp41TxdWr3a13X33KHbu7M8vv3QEtA2puCiMRVLbnj0wZgx8\n8knSDToKFIDnnjNBnCOHM/VJijt+HJo1g40bXW21ar3BDz8MIiwsjKlTpzpXnHgkhbFIatm+Hd54\nA8LDXdsrxbvrLujf3zwP1mlJXuXgQWjcGLZujW+xadhwMKtXD6NTp0589tln+GomvFxBYSyS0nbv\nhtdfN5OzrjwpqXJlMyu6bVstTfJC//5rtv3etctcW5Z5KrFt2wlKlOjGxx9/rCCWZCmMRVLKrl1m\nZvQnnyTdKSs4GF56yfypSVlead8+E8R79phrX1+b8eOP8NRTBYiLGweAjw7nkKvQd4bI7YiNNYfQ\nPvCAWX708ceJg7hpU/PgcOVKCAlREHupPXvMWRzxQZwhQxyNG/fmjTeqcPToUXx8fBTEck367hC5\nFbGxMGmSefbbsiUsW5b49Xr1YP16c7Zw1arO1ChusWuX2Rht3z5z7ecXS8OG3Vm69EMeeeQR8ubN\n62h9kjYojEVu1po15tlv9+6un8Bger3Nm5vjDdeuhbp1HStRUl9EhJmfd++98M8/ps3fP4Z69R5n\n+fJPGDRoEG+99ZZOX5IbomfGIjdqyxZ47bXEOzgA5MkD3brBU0+ZXbPEq9m22Tq8f39XCIOZFB8a\n+hZTp05j2LBhDBw40LkiJc1RGItcS1QUzJsHEybAt98mfi1zZjMpq29f8754vehoaNcu4YlLxj33\nmE3Vypd/huDgO+jUqZMzBUqapTAWSU5kJIwbZzbrOHw46eudOsGbb0JgoPtrE8c891ziIM6bFwYP\njuS//4ZTseIAsmTJriCWW6JnxiIJ2bbpCZcrZ8YhEwZxhgzQvr2ZHf355wridGb8eDNAEq9HD9iy\nJYJFi1oxfPgwVq5c6VxxkuapZywS7+efTQCvXZu4PTDQ/OR94gntG51OLV9utg2PFxYGo0efp1Wr\nlqxZs4bJkyfTqlUr5wqUNE9hLOmbbZs1wG+9lXhHf4BcucxOWk89Zc68k3Rp+3bznDh+M7Vq1eC9\n987SrFlzvv/+e6ZOnaqhabltCmNJf86fh19+MT3h6dPN+wn5+kLv3jB4MOTO7UyN4riICDMs/eab\ncOaMaStSxEymP3PmMLt372bGjBmEhoY6W6h4BYWxpA//+x9Mm2bGG7duTbpnNJgQDg2FgQOhbFn3\n1ygeISrK7Gj6xhvm0Id4mTNDePh5ChbMjGXdzY4dO8isWfSSQhTG4r0OHTKHNUybZtYIX01AgHke\n3LcvFC/utvLE8+zdCy1aJDxxybjjDhg79hi9ezeiWbNmDB8+XEEsKUphLN7Fts0ErAkTYP78pEcX\ngtkpq1w5s01ltWrmBKX8+d1eqniWX381ZxD/95+rrVAhM1Dy4IOHadYshJ07d/Lmm286V6R4LYWx\npG3nz8OOHfDnn2amzdy55s8rZcoErVvDo4+afaOzZnV/reKxvvkGHn4Yzp0z1/7+MGwYPP00nDp1\nkODgYPbt28eiRYsIDg52tljxSgpjSXtiYkzojhkDmzZd+966daFrV9P7zZbNPfVJmjJ1qnlKEX/Y\nVs6cZpJW/foQHR1NSEgI+/fvZ9myZdSrV8/ZYsVrKYwl7YiIgClTzJnB8WfVJSdrVujcGXr2NPsU\nilxFeDh06eK6LlrUHLRVvry59vPzY+jQoRQuXJhatWo5UqOkDwpjSRtWrYLHHoMDBxK3+/qac4TL\nlIHSpaFCBXOkYfbsztQpaca335rf2eLdey8sWQKFC8OuXbvYunUrLVu25OGHH3auSEk3FMbi2aKj\nzUlJb79tJmfFy50b+vQxD/V0XqzcpO3boVUrs4wJzO9yq1ebb6u//vqLhg0bEhcXR3BwMFmyZHG2\nWEkXFMbiuXbvhg4dzF7Q8fLlg1deMQ/5NAlLbsF//5lZ0ydPmusCBczQdO7csG3bNho2bIht26xc\nuVJBLG6jMBbPNHOm2YYyfusjgMaNzWybggWdq0vStB07zL4ue/ea68yZYfFis7z8999/JyQkBF9f\nX9asWUNZbfwibqRTm8SznDsHjz8OHTu6gjhDBjNpa+lSBbHckshIs814hQrw22+mzccHZs+GKlXM\n9bx58/D392fdunUKYnE7y074HM6NgoKC7E3XW5Yi6csvv5jjcP7+29V2112ml1y1qnN1SZq2dq05\ndGvHDlebjw98/HH8kqYYMmTIgG3bHDt2jHz58jlWq3g3y7I227YdlNxr6hmL87ZuNTOlq1dPHMSd\nOpltkRTEcotmz4bg4MRBXK0abN5sgvj777+nXLly/PXXX1iWpSAWxyiMxRkXL8KaNWYZ0j33wOef\nu3ZdyJrVXH/+uTbqkFu2YoXZcC3+TJDs2WH8ePjhB7jvPli7di1NmjTBsiyyajKgOEwTuMQ9Ll6E\nOXPgu+/M0YVbtrjCN6GGDc344d13u79G8RobN8JDD5mVcWCWLq1cCYGB5nrlypW0bNmSEiVKsHLl\nSgoVKuRcsSIojCW1RUebXbOGDYN//03+Hssy+0YPGGCGqkVuw/btZunS+fPmumhRc3JmfBD/8MMP\ntGjRglKlSrFy5Ury65AQ8QAappbUERsL06eb05G6d08+iEuXNsuXtm2DefMUxHLb9u0zK+COHzfX\nefKYIC5a1HXPvffeS9euXVmzZo2CWDyGesaSsmzb7LI/aFDSQ2Hz54fevaF2bbOeJGdOZ2oUr3Tw\noJmstX+/uc6SxWxvWaaMuV6xYgU1atQgW7ZsTJgwwblCRZKhnrGkDNs259BVqwZt2iQO4ly54M03\nzY5ar71mfmIqiCUFHTlivq127TLX/v5msKVaNXM9c+ZMHnjgAQYNGuRckSLXoJ6x3L69e80e0YsX\nJ27PmhWeew769lX4Sqo5cQIaNTJHWoPZI2bOHDNcDTB16lS6du1K3bp1eeONN5wrVOQaFMZy66Kj\n4d13YcgQuHDB1R4QYIajBwwwe0mLpJKzZ6FJE/j9d3Pt42OmKrRsaa4nT55M9+7dCQ4O5quvviJz\n5szOFStyDQpjuTWrV5te75YtrjbLgiefhMGDzTl0IqnsxRchfiM/y4LPPoP27c31uXPneP3112na\ntCnz5s0jICDAuUJFrkNhLDfnxx/h1VfNhh0JVawIEydqRrS4zfr1Zkl6vA8+cJ1PbNs2WbNm5dtv\nv6VQoUJkzJjRmSJFbpAmcMmN2bIFmjeHWrUSB3HmzOYQh02bFMTiNhcvmkGYeC1bmlVyAG+99RbP\nPvsstm1TvHhxBbGkCQpjubYjR8xPufvuM+tE4vn6mp+Gf/4J/fqBn59zNUq6M2yYa7/pbNlMrxhs\nhg4dyssvv8yxY8eIjY11skSRm6JhaknemTMwaRIMHZr4TGHLMscbDhmiLSvFEf/7H4wc6boeORIC\nA20GDhzIiBEjeOyxx/jkk0/w9fV1rkiRm6QwFiM62jyEW7XKTM7atMnsopVQ48YwZow52EHEATEx\n8ccemuu6dc0Gb6+++ipvvvkmTz75JB999BE+Phr0k7RFYZzeHThgesATJ8KhQ8nfU6oUjB1rNvy1\nLPfWJ3JJXBw884xr9rS/v/nW9fGBmjVr8uyzzzJ27FgFsaRJCuP06pdfYMQIs3Vlcs/WLMs8J+7S\nBXr21DNhcVRcnPk2nDjR1TZwYBynTv0MVOfBBx/kwQcfdKw+kdulME5voqLM7Jc330wawoUKmXPn\ngoOhQQPInduREkUSiouDHj1g8mRXW2hoLLt2PUmtWlP59ddfqVixonMFiqQAhXF68ttv8Nhjru2K\n4tWvb3bMat1aPWDxKDEx5pnwZ5+52jp2jAG6MHXqdAYPHkyFChUcq08kpSiMvd1//5l1wStWwLRp\nrpkvAPXqwfjxoB9m4oE2bTJB/OuvrrZOnaKJiHiEuXPnMHz4cF555RXnChRJQQpjbxQRYfaM/uIL\nc1bwlTJlMsPUffqY2S8iHuTMGRg40KwdjotztXfrBvffP5dHH53D6NGj6du3r3NFiqQwhbE3sW2Y\nPRv694d//kn+npo1YcoUM0NaxMP88AO0a2fOJo4XEGC2O+/fHywrjOLFi1G7dm3nihRJBeoWeYvN\nm6FOHQgLSxzE/v7mmfDQofD99/Dddwpi8UjLlkFISOIgbtIENm2K4PffO7Jt2x9YlqUgFq+knnFa\nFxsLb78Nr72WeHZ03rxm1nTnzmb/aBEPNmcOPPKI2XsGzLfv++9DixbnadnyQdauXUvz5s25RxvO\niJe6oZ6xZVlNLcv6y7KsnZZlvXSVe9pblrXNsqytlmXNSNkyJVkHDphT1V991RXEfn7Qty/s3Gn2\nlFYQi4f75BMzoBMfxMWKmeHq5s3P0qzZA6xbt47PP/+cRx55xNlCRVLRdXvGlmX5Ah8AjYD9wM+W\nZS20bXtbgntKAi8DtW3bPmlZVv7UKlgwz4bnzTOLL48fd7XXrAlTp0LJks7VJnIT3nvPHIsdr0wZ\nM/E/W7bTNGnyABs3bmTGjBmEhoY6V6SIG9xIz7gasNO27d22bUcBs4BWV9zzJPCBbdsnAWzbPpKy\nZQpgliVNnw733gsPP+wKYsuCQYPM3tIKYkkjxoxJHMSVK5tv4SJFwN/fn1y5cjFnzhwFsaQLN/LM\nOBD4N8H1fuDKg2tLAViW9T3gCwyxbXtZilQoZn3H5MlmOdLevYlfCww0S5gaNHCiMpFb8vbb8FKC\nB161a8PixRAdfYyTJ33JlSsXixYtwtJe6JJOpNQErgxASaABUARYb1lWBdu2TyW8ybKs7kB3gGLF\niqXQp/Zyu3bB44/Dt98mbs+SxZwnPHAg5MnjTG0it2D4cPNtG69ePRPE588fJjg4mLx587JmzRoF\nsaQrNzJMfQAomuC6yKW2hPYDC23bjrZtew+wAxPOidi2PdG27SDbtoPy5ct3qzWnD3FxMGGCGZJO\nGMR58phlSv/8A++8oyCWNGXo0MRBfP/9sGQJnDlzkAYNGrBnzx5ee+01BbGkOzcSxj8DJS3LKmFZ\nlj8QBiy84p4FmF4xlmXlxQxb707BOtOXXbvM2cG9e8P586bN19fMmt63zzwf1iEOkobYtll9N3iw\nqy0kBBYtghMn/qV+/frs37+fZcuW0bBhQ+cKFXHIdYepbduOsSzraeAbzPPgT23b3mpZ1lBgk23b\nCy+91tiyrG1ALPCibdvHr/5RJVlRUTBqFLzxBly86GovW9bMkq5a1bnaRG6RbZve8IgRrrbGjc3p\nnZkyQYsWj3HkyBGWL19OzZo1nStUxEGWbduOfOKgoCB7U/wp4eldXJw5zKFPH9i+3dXu42PWDA8d\navYEFEljbBteftlM2Ir3wANmZV78t/Tu3bs5ceIEQUFBzhQp4iaWZW22bTvZb3Rth+mUqCj45htz\nYnrRombMLmEQV64MP/0EI0cqiCXNsW2zTOmBBxIHcfPmMH8+7N37Jy+99BJxcXHceeedCmJJ97Qd\nprv9/Td89JE5oPXkyaSvZ81qhql794YM+uuRtCUuDr7+Gt56CzZsSPxaq1YQHg5///0HISEh2LZN\n7969KVq0aPIfTCQd0U97d7Bts3bj/fdh+fLk78mTB9q0MbNcihRxb30iKeDcOejQwUzKSsiyoGtX\nszhg+/b/ERISgp+fH6tXr1YQi1yiME5tf/8NvXrBypVJXytWDNq2hdatoVYt9YQlzTp8GFq0gITT\nQPz94bHHoF8/c1DY5s2badSoEVmyZGH16tWU1G5xIpfpp39qiYw0z3uHDzfvx7Ms81OrVy8zpdRH\nj+0lbduxA5o2hT17XG29e5uVeIUKudqOHz9OgQIFWLJkCSVKlHB/oSIeTGGcki5cMGcGr14Nc+ea\nk5Pi+fiYAO7bF4oXd6xEkZS0ahWEhrq2SffxgQ8/hO7dXfccO3aMvHnz0rhxY7Zs2UIGjQCJJKFu\nWUrYvNn0cnPlMn++9VbiIA4Kgp9/Ns+MFcTiBXbtMk9YQkJcQZw5M3z1VeIgXrt2LXfeeSdffvkl\ngIJY5CoUxrcjLs4cPVOzpjn3LSoq8evZssG4cWZaaeXKztQokoLOnoUBA6BcObNWOF6+fLB2rXkC\nE2/FihU0a9aMokWLUrt2bbfXKpKW6NfUW3X4MHTpAsuuOJyqXDkIDoaGDc1b9uyOlCeS0iIizOFg\nv/ySuP3RR81gUGCgq23JkiW0adOGMmXKsGLFCrQXvci1KYxv1qlTZmvKN980gRwvKMgcZVi6tHO1\niaSi559PHMQ1asC770L1Kw5U/fvvv2ndujUVK1Zk+fLl5NY+6iLXpTC+Ub/+ahZKzphhJmol1L8/\nDBtm1nKIeKHZs+Hjj13XI0eaJUvJHa5UsmRJPvzwQ9q2bUvOnDndV6RIGqa9qa/l4kUzK3rCBPjx\nx6SvFyxoesmNG7u/NhE32b0bKlWCM2fMdfv2MGtW0iCeNWsWpUqVorLmR4gk61p7U6tnnJy//4ZP\nP4XJk+HYsaSvV6hgFlI+8ojZvlLES0VFmaVL8UF8550wcWLSIJ4yZQpdu3alTZs2zJ071/2FiqRx\nCuN4x46ZjXOnTTMHNFzJzw8eftisFa5dO/nxOREv07+/a1ctPz/TI86RI/E9EydOpEePHjRq1IjP\nP//c/UWKeIH0HcbxR8t88IE5SiYmJuk9RYvCU09Bt25QoID7axRxyLvvwnvvua5Hjkx6pPb48ePp\n06cPzZs3Z+7cuQTohDGRW5I+w/jCBXNq0oQJsG1b0tf9/KBZM3j8cXPmmzYqkHRm2jQzezpe69bw\n7LOJ74mLi+Obb76hdevWhIeH468JjCK3LP2lzNat5nSkHTuSvlajBnTqZB6S5cnj/tpEPMDixeb3\n0Hi1a8P06YmfzERERJApUybmzJmDr68vfn5+7i9UxIukrx24Zs6EatUSB3HWrNCzJ/z+u5kx3auX\ngljSre+/h3btIDbWXFeoYM4nzpzZXNu2zZAhQ6hVqxanT58mICBAQSySAtJHGEdFmTG2jh1da4Qz\nZzYPxQ4cMMPVFSo4W6OIw/75B1q2NDttgdlGfdkys+U6mCB+9dVXef3117nvvvvIqpUEIinG+4ep\nL16EBx9MfJ5wqVJmY93y5Z2rS8SDREdDhw5w4oS5zp8fli+HwoXNtW3b9OvXj7Fjx9KjRw8mTJiA\nj47/FEkx3v2vKSbG9IYTBvFDD5kTlBTEIpcNHgw//GDe9/WFL7+EkiVdr48YMYKxY8fSp08fPvzw\nQwWxSArz3p6xbZslSfPnu9peew2GDNEaYZEEli83W63HGzYM6tRJfE/nzp3x9fVlwIABWPr3I5Li\nvPfX21degU8+cV2/8IKCWOQKhw6ZBQTxGjUyRyQCxMbGMnnyZGJjYylatCgvvfSSglgklXhnGI8b\nZ850i9e5M4wapSAWSSA21hx/eOSIuS5Y0Kwv9vGBmJgYOnfuzJNPPsmiRYucLVQkHfC+YerISBg0\nyHXdooXZY1rPuEQSGTECVq8271uWWUtcoABER0fzyCOPMGfOHEaMGEGrVq2cLVQkHfC+MD550rWr\nfY4c5uw3rYMUSWT9evPUJt7AgdCwIURGRhIWFsaCBQsYM2YML7zwgmM1iqQn3hfG0dGu97NmhUyZ\nnKtFxAMdPWqWMcXFmet69czcRoBt27axfPly3n//fZ5++mnnihRJZ7w7jNUjFkkkLg66dIGDB811\nnjwwYwZYVizgS6VKldi5cyeFChVyskyRdMf7HqQqjEWu6p13YMkS1/Xnn0OOHOdo1KgRH3/8MYCC\nWMQBCmORdCA62qz2e/FFV1u/flCnzhmaNm3KunXryJYtm3MFiqRzGqYW8XK7d5uN6H76ydVWvTq8\n+OIpGjduyubNm5k1axbt2rVzrkiRdE49YxEvFh4OlSolDuJGjWDu3EiaNQvhl19+Ye7cuQpiEYcp\njEW81McfQ1iYa6VfhgwwcqQ5ialIkYx06NCBBQsWaB2xiAfQMLWIF5o61WzNHu+uu8xx3kWL/sfm\nzf9StWpV+vbt61yBIpKIwljEy4SHQ9euruuqVWHFCjh37gD16zfk/Pnz7Ny5k4CAAOeKFJFEFMYi\nXuSrr+CRR1wbetx7rxmWPn36Hxo2bMiRI0dYunSpgljEw+iZsYiXmDUL2rUzB0AAlC1resSnT++h\nXr16HDt2jBUrVlC7dm1nCxWRJBTGImmcbZtDyTp0cH373303rFoF+fLB6NGjOXv2LKtWraJ69erO\nFisiyVIYi6RhsbHwzDPQv7+rrWxZcxpT/EZa77zzDhs2bKBKlSrOFCki16UwFkmjTp2Chx+G8eNd\nbfXqwfffw+nTfxAcHMzRo0fx9/enZMmSzhUqItelMBZJg5Ytg3vugQULXG3t28M338C+fb/RoEED\n/vzzT06ePOlckSJywxTGImnI2bPQvTs88AAcOOBqf+EFs474jz820bBhQzJnzsy6desoVaqUc8WK\nyA3T0iaRNGL7dmjWDPbudbXlywcffght28KmTZsIDg4md+7crFmzhuLFiztVqojcJPWMRdKArVuh\nQYPEQdy2Lfzxh/kToEiRItStW5f169criEXSGIWxiIfbssUE8ZEj5jprVpgxA+bMgfz54bfffiMm\nJoaCBQuyaNEiihYt6mi9InLzFMYiHux//4P774djx8x1tmxmklaHDmBZ8M0331CzZk0GDx7sbKEi\nclsUxiIe6vffoWFDOH7cXGfPDsuXQ61a5nrRokW0bNmSMmXK8PzzzztXqIjcNoWxiAfavRuaNIET\nJ8x1jhxma8saNcz1/PnzadOmDRUrVmTVqlXkzZvXuWJF5LZpNrWIh/nvP2jc2PwJpke8ciUEBZnr\n06dP061bN6pUqcKyZcvIkSOHc8WKSIpQGIt4kNOnoWlT2LXLXAcEwNdfu4IYIEeOHCxfvpzSpUuT\nLVs2ZwoVkRSlYWoRDxERAS1bmklbAL6+5mzievXM9aeffsp7770HQFBQkIJYxIsojEU8wPHj0KgR\nrF/vaps82YQzwEcffUS3bt1YunQpsfFnJIqI11AYizhszx6oXdsc8BBv1Cjo0sW8P27cOHr27Enz\n5s1ZsGABvr6+jtQpIqlHYSzioM2boWZN+Osvc21ZMHYs9OtnrkePHs2zzz7LQw89xLx58wgICHCu\nWBFJNZrAJeKQ1avNMPT58+Y6Y0aYNg3atXPdkzFjRkJDQ5k2bRp++n4W8VrqGYs44Ntv4cEHXUGc\nK5dZR9yuHdi2zd5Lm1D36dOHmTNnKohFvJzCWMTNNm6E5s3hwgVzHRhonhfXrWuC+OWXX+aee+7h\nr0tj15ZlOVitiLiDwljEjX77zeysdfasuS5Y0AxXly1rgrhv3768/fbbdOrUiZIlSzpbrIi4jcJY\nxE22bzfLl06dMtd585qdtUqVgri4OPr06cM777zDM888w4QJE/Dx8b5/niKSPO/7164wFg909Cg0\na+Y6fSlnTnPoQ/ny5nrKlCl88MEH9OvXj3fffVdD0yLpjGZTi6SyyEho0wYuzckiSxZYtgwqVXLd\n07lzZzJnzkxoaKiCWCQdUs9YJBXZNjz1FHz3nbm2LJg5E6pXh5iYGPr378+hQ4fIkCEDYWFhCmKR\ndEphLJKKRo+GKVNc1yNHmiVNUVFRhIWFMWrUKBYvXuxYfSLiGTRMLZJKFiyAAQNc1127Qt++EBkZ\nSfv27Vm4cCFjx47liSeecK5IEfEICmORVLB+PYSFmWFqMCcvffghXLwYQdu2bVm6dCkffPABvXr1\ncrZQEfEIGqYWSWG//WaGoiMjzfVdd8GXX4K/P1y4cIH9+/czadIkBbGIXKaesUgK2rULmjaFM2fM\ndcGCZglTQMA5oqL8yZMnD5s2bcLf39/ZQkXEo6hnLJJCDh2Cxo3h8GFznSMHfPMN5MlzmiZNmvDo\no49i27aCWESSUBiLpIB//oEGDWD3bnMdEACLFkHRoidp3LgxGzdupH379lq6JCLJ0jC1yG366y+z\nzeW//5prX1+YOxfKlj1OcHAj/vjjD+bOnUurVq2cLVREPNYN9Ywty2pqWdZflmXttCzrpWvc19ay\nLNuyrKCUK/EmKYzFjX75xZy2FB/E/v4wezY0a2bTpk0btm3bxldffaUgFpFrum7P2LIsX+ADoBGw\nH/jZsqyFtm1vu+K+bMCzwE+pUegNUxiLm3z/vdlvOn6yVubMZm1xo0YAFiNHjuTs2bOEhIQ4WaaI\npAE30jOuBuy0bXu3bdtRwCwguV/zhwFvAxdTsL6bExvrWthpWWa8UCQV7Nhhli/FB3HOnOYEpnLl\nDjBx4kQAqlevriAWkRtyI2EcCPyb4Hr/pbbLLMuqDBS1bdvZff2iolzvq1csqeTkSRPEJ0+a6/z5\nYd06CAz8h/r169OvXz8OHTrkbJEikqbc9gQuy7J8gLFAlxu4tzvQHaBYsWK3+6mT0hC1pLLoaGjX\nzvSMwcyaXrwYsmbdTb16DTl16hQrVqygUKFCzhYqImnKjfSMDwBFE1wXudQWLxtwD7DWsqy9QA1g\nYXKTuGzbnmjbdpBt20H58uW79aqvRmEsqci24ZlnYNUqV9vnn0OOHH9Tv359zp49y+rVq6levbpz\nRYpImnQjYfwzUNKyrBKWZfkDYcDC+Bdt2z5t23Ze27aL27ZdHNgAtLRte1OqVHwtCmNJRWPHwkcf\nua6HDjW95A0bNhAViMGxQgAAG29JREFUFcWaNWuoXLmycwWKSJp13TC2bTsGeBr4BtgOzLZte6tl\nWUMty2qZ2gXeFIWxpIK4OHj5ZejXz9XWoQO8+KLZfLpTp07s2LGDihUrOlShiKR1N7TO2LbtJbZt\nl7Jt+y7btodfanvNtu2FydzbwJFeMSiMJcVFRkKnTvDWW662WrWgT59fKVWqJOvWrQMgR44cDlUo\nIt7Au7bDVBhLCjp1Ch54AGbMcLW1aAEjRmykWbOGWJZFkSJFnCtQRLyGd22HqTCWFHLhgtm8Y1OC\nMZ4ePaBjxx9o0aIpefPmZc2aNdxxxx3OFSkiXkNhLHKFuDh47LHEQTxiBLRqtZ1q1RpTuHBhVq9e\nrV6xiKQYDVOLXGHwYHPQQ7z33zcTuMqUKc3zzz/PunXrFMQikqK8N4x1Zqzcghkz4I03XNd9+kCZ\nMiv5559/8PHxYdiwYdrQQ0RSnPeGsXrGcpM2bICuXV3XTZpAw4Zf07x5c1544QXnChMRr6cwFgG+\n/dacwBRplg5Ttix07Pgl7dq14d5772XSpEnOFigiXk1hLOnenDlm5nT8wQ958kD37jPp2jWUatWq\nsWLFCnLlyuVskSLi1RTGkq698w6Ehrp6xPnzw5IlscyY8Q61a9dm2bJl2tBDRFKdljZJuhQdDX37\nmpnS8UqXhsWL47jrLl+WLl1KQEAAWbJkca5IEUk31DOWdOfQIQgOThzEtWtDt24TePbZlkRGRpIn\nTx4FsYi4jcJY0pVvv4XKlc2f8dq2hVat3qV//974+HjXPwkRSRs0TC3pgm3DuHFmaDo21rT5+Jg1\nxT4+I+nffwBt27ZlxowZ+GuNuoi4mXd1AxTGkozISHjiCXjuOVcQ580Ly5eDv/8YXnppAGFhYcya\nNUtBLCKOUM9YvNrRo9CmDXz3nautWjWz3WXRopAr1/307t2b9957D19fX+cKFZF0TT1j8VpbtkDV\nqomDuHNnWLfO5s8/VwBQuXJlxo8fryAWEUcpjMUrzZ4NNWvCvn3m+v/t3X+cjXXex/HXxzCspfzM\nbSkkkojcQm4bZvzKz70li7RItyJZbKLftkj2fpRtd0PK3EJkExqWGb8GtcWqtG201pAkNon1m/n1\nvf+4ZnaGjDnMmXOdc+b9fDzm8TjXda7HOW9fY96+3+vMdZnBb34D//d/jgkTxtCpUyeSkpL8DSki\nkk3L1BJV0tPh0Ufht7/N3VeunHcDiG7dshg5ciQzZsxg9OjRdO7c2b+gIiJ5qIwlahw8CH37nr8s\nfcMNsHQpNGyYxbBhw5g9ezaPPvooL7zwAmbmX1gRkTy0TC0R74svYNQoaNDg/CLu1Qs++ggaNYIP\nPviAhIQEnnrqKRWxiIQdzYwlYiUleeeBU1LO31+iBEye7C1X51zDo02bNnzyySc0bdo09EFFRAqg\nmbFEpNdegzvv/GER33ij9/vDEyZARkYaAwYM+PcHtVTEIhKuVMYScZYsgQcfzN2OifF+l3jtWm/J\nOj4ezp07R58+fVi4cCG7du3yL6yISAC0TC0RJSUF+veHrCxvu1kzWLbMu4BHjjNnztC7d2+SkpKY\nPn06w4cP9yesiEiAVMYSMT75xPtQVlqat12/Pqxa5d2DOMfZs2fp0aMH69ev5/XXX2fo0KH+hBUR\nuQwqY4kI+/ZBly5w4oS3/ZOfeOeG8xYxQGxsLPXr12fQoEHce++9oQ8qInIFVMYS9pzzbvTw3Xfe\ndsWKXhHXqpV7zLFjxzh69Ci1a9dm+vTp/gQVEblCKmMJe7NnwxrvUtKYeeeIb7459/mjR4/SuXNn\njh49yvbt23XnJRGJOCpjCWv79sHYsbnbY8fCHXfkbh8+fJhOnTqxfft2Fi9erCIWkYikMpaw5RwM\nG5Z7nrh+fXjuudznDx06RIcOHdi1axfvvvsuXbp08SeoiEghqYwlbCUkQHKy99jM2/7Rj3KfHzdu\nHKmpqaxYsYL4+Hh/QoqIBIEu+iFhaf/+85enR4+G//qv8495+eWXSUlJURGLSMRTGUtYGj8ejh/3\nHterB5MmeY/37t3Lfffdx5kzZ6hQoQItW7b0L6SISJCojCXsbNni3X84x6xZULYs7N69m7Zt27J0\n6VL27NnjX0ARkSBTGUtYcQ7GjMnd7t0b2rWDnTt3cscdd3Dq1CnWr1/PzXl/t0lEJMLpA1wSVhYt\ngg8/9B7Hxnq3SNyxYwdxcXE450hJSaFx48b+hhQRCTLNjCVsnDnjnSvOMWoU1K0LWVlZVKtWjQ0b\nNqiIRSQqaWYsYWPaNO8iHwBVqsCQIftxrgaNGjVi27ZtlCgRXf93FBHJEV0/3VTGEevAAZgyJXd7\nyJAttG7diJdeeglARSwiUS26fsKpjCPSiRPQsyecPOlt16nzZ2bO7EjlypXp06ePv+FEREJAZSy+\nOnsWfvYz+Phjb9tsAwcPdqZ69eps2rSJWnlvzSQiEqVUxuKbzEwYOBDWr8/Z8z2lS/fk+utrsXHj\nRmrUqOFnPBGRkNEHuMQXzsGIEfDOO7n7Jk+uTOPGb9KqVSuqVq3qXzgRkRBTGYsvnnrKu7KW5116\n9IDHHuuFWQ8/Y4mI+CJ6lqkzM73pFni3+ImJ8TeP5Ovll2Hy5JytxZj14V//ehFwPqYSEfFP9JSx\nZsURYf587w5MnoWY9eP221uyYsUKzMzPaCIivlEZS8isXAlDhuRsvQEMpE2bNiQnJ3HVVVf5mExE\nxF/Rc85YZRzWUlKgTx/IyPC2K1f+lEaN4li58l3Kli3rbzgREZ+pjKXILVgAgwfn/BUdo3btq3n/\n/ZeoUiWN0qVL+5xORMR/WqaWIuOcd9ele+7J+euZRokSDZkzZx81apiKWEQkm8pYikRmJjz8cN67\nME0FxtKlS2tat67uYzIRkfCjMpagcw4eeABeeSVnz3PABO66qz/vvruQUvr7ERE5j8pYgm7SJJg9\nO2drNvA0AwcOYtGieZQsGT0fUxARCZbo+cmoMg4Lb7wBTz+du92/f1+aNj3CI4/8SrdBFBHJh8pY\ngmbtWrj/fvCupPUH2rUbwpw55YmNHedzMhGR8BY9UxWVsa/+9je46y7IyMgCRgCj6NlzPrGxficT\nEQl/mhlLoZ05A337wvHjmcAwIIGHHprA6NEP+B1NRCQiaGYshfb44/D3v2cAg4EEHnzwGX7/++d1\nrWkRkQBpZiyFkpICv/0twEFgLb16TWbGjMd9TiUiElmip4zT0nIfq4xD4tgxGDQoHe/b6Fo6dNjO\n0qWV/I4lIhJxtEwtV2zkyLN8/XVvYByVKsHcuZXQyrSIyOVTGcsVWbToDPPn9wJWAPWYOROq6yqX\nIiJXJHqWqVXGIbNz5ykGDuwBbAASGDBgCHff7XMoEZEIpjKWy5Ke7mjZshcZGRuBudSsOZA//MHv\nVCIikU3L1HJZJk40jh0bASwkJmYgCxdCxYp+pxIRiWyaGUtAjhw5wsyZf2HKlC5AbwCefRbatPE3\nl4hINFAZS4EOHz5M+/Yd2b59F859CVSlY0eYMMHvZCIi0UFlLJf07bff0r59PDt37sa5d4GqVKsG\n8+aBbsIkIhIcKmPJ14EDB4iLi2fXrn1kZf0JiMMM3nwTqlXzO52ISPRQGUu+5s1bSGrqfrKykoCf\nAt6lL+Pj/c0lIhJtAlpoNLMuZrbTzFLN7AdnCs1srJntMLPPzGydmdUKftQCqIyDxjlHejp88MFY\nMjM/I6eIX3gBRo3yN5uISDQqsIzNLAZ4BbgTaAj0N7OGFxy2DWjunLsFWAz8JthBC6QyDorU1FSa\nNGnBT3/6BYmJBtQB4JlnYPx4f7OJiESrQJapWwCpzrk9AGb2FtAL2JFzgHMuJc/xm4GBwQwZEJVx\noX322d9p0yaOEyfSgXP/3j9unFfGIiJSNAJZpq4BfJ1ne3/2vvwMBVYVJtQVyVvGsbEhf/tIt2DB\n5zRr1o4TJzKBFKApJUrAk0/C1KnoBhAiIkUoqB/gMrOBQHOgbT7PDwOGAVx33XXBfGvNjAth5sy/\nM3x4e6AUsB5owH/+J8yaBc2a+RxORKQYCGRm/A1wbZ7tmtn7zmNmHYAngJ7OuXMXPg/gnJvlnGvu\nnGtetWrVK8mbP5XxFVm4EEaOvA7oDGykfPkG/O53sGWLilhEJFQCmRlvBeqZWR28Eu4HDMh7gJnd\nCrwKdHHOHQp6ykCojC/bE09s4/nnrweuBuZTpw6sWQN16/qdTESkeClwZuycywBGAsnAF8AfnXPb\nzexZM+uZfdj/AuWAt83sUzNLLLLE+VEZX5YRI97n+efvAB4CoGFDeP99FbGIiB8COmfsnFsJrLxg\n39N5HncIcq7LpzIO2NixG5gxoxve2Yep3HYbrFoFlSv7nUxEpHiKnqsLq4wD8tRTa5g2rStQG9hA\n27Y1WLdORSwi4ieVcTGSlJTOpEkjgHrABm677T9YvhzKl/c7mYhI8aZrUxcTW7dCnz6lgCSgAjfe\nWJmVK1XEIiLhQDPjKJeVBUOHvk3r1qM4dcoBdalZszKrV0OVKn6nExER0Mw4qu3dC926vcmOHb8A\nWgNnqFSpLKtXQ7CvuSIiIldOM+MoNXcuNGgwhx077sW7INoqbr65LCkpcNNNfqcTEZG8VMZRaMYM\nGDTodc6dGwJ0wGwFEyaU4+OP4ZZb/E4nIiIX0jJ1lJkzB0aMAKgO/Df16i1g7twytGrlby4REcmf\nyjiKvPUW3HffF8BNQDdatOjGmjVw1VV+JxMRkUvRMnWUWLYMBgyYgnONgE00bQpJSSpiEZFIoDKO\nAps3O/r0+TXOPQ7056abWrN6NVSs6HcyEREJhJapI9zBg46OHZ8gM3MKMJi6dV9n3boYgn2HShER\nKTqaGUewtDTo0GENJ09OAR6gQoXZJCfHUL2638lERORyaGYcwX75S9ixoyOwHLNuLFpkugWiiEgE\n0sw4AmVlZdG58zhmzvwrYEB3pk41OnXyO5mIiFwJzYwjTGZmJl273s/q1XOAikAT+vWDRx7xOZiI\niFyx6CjjzExwzntsBjEx/uYpIhkZGXTvPojVqxcAE4HHaNIEZs/2/tgiIhKZoqOMi8GsOD09nZ49\n7yE5+W3geeAxataExEQoW9bvdCIiUhgq4wjx1VdZbNhwAngRGEulSujuSyIiUUJlHObOnj3L11+f\noVevipw9uwKI4cc/hpUrdfclEZFooTIOY6dPn6Zbt5/xl78c4/TpPwMlKVUKliyBli39TiciIsGi\nMg5TJ0+epGvXHrz33kYgASiJGcybh36FSUQkyqiMw9Dx48fp0qUrH374ITAfGABAQgL8/Oe+RhMR\nkSKgMg5D99//AJs3bwHeAu4GYPp0GDzYz1QiIlJUouMKXFFUxllZcObMCzi3jJwifuklGD7c31wi\nIlJ0NDMOE9999x0zZszg5MknWbGiFlALgEmTYMwYf7OJiEjRUhmHgX/+85/Ex8eza9ce0tN7A40A\neOghePxxf7OJiEjRUxn77JtvviEuLo6vvtpPRsZKcoq4Z094+WVd5lJEpDhQGfto3759xMXFceDA\nISAZ59oA0KIFLFwYtZfYFhGRC6iMfbR79x4OHTrF2bOrca4VAHXqwPLlut60iEhxojL2wcmTJ4mN\nLcf8+e04cWI34DXvtddCUhJcc42/+UREJLT0q00h9sUXX1C/fgMaN55HQgLkFPHtt8PWrVC/vq/x\nRETEByrjEPr8889p27Ydhw9n8I9/NPv3/l/8AlJSoFo1H8OJiIhvVMYh8umnn9KuXTuOHy9JevpG\n4GYApk6FOXOgdGlf44mIiI90zjgEvv32W+Li4jh3rhznzq0HbgBg2jQYPdrfbCIi4j/NjEPgmmuq\n0aTJs5w+vYmcIn78cRWxiIh4NDMuQu+99x5mZXjjjdvYsGHkv/cPG+Zd5lJERARUxkVm3bp19OjR\nkxIlmnLq1PuAdymt3r29OzDpyloiIpJDy9RFIDk5ma5du5OWdj2nTi0hp4gHDIA339SVtURE5Hwq\n4yBbtmwF3br1JC2tAZmZKUA1SpaE3/8e5s+HMmV8jSciImFIy9RBtHcvDB36BpmZtwDJQCV+8hNY\nvNi7qIeIiMjFaGYcJAsWZNCkCRw5Mh9YC1QiPh4++URFLCIil6YyDoLevedzzz0tOH78e6A0MTFX\nM2UKrF6tq2qJiEjBtExdSH36JLB06f1AO6AMderAggXQqlVIY4iISATTzLgQ+vZ9lXfeGQp0BFbQ\nqdOP2bZNRSwiIpcnOmbGaWm5j0NUxkOGzOXttx8EugGLad++DMuWwY9+FJK3FxGRKKKZ8RV47TWY\nMyceGAUsoXXrMiQmqohFROTKqIwvg3Pw858vY9iwTKAG8DLNm8eyciWUK1dkbysiIlFOZRygtDTH\nrbdO5I9//G8gAYBbb4XkZLj66iJ5SxERKSZUxgE4ftzRoMHj/PWvvwaGAPfRuTNs3AiVKgX97URE\npJhRGRdgxw5HnTq/4ssvXwAeBF5nyJAYli+H8uWD+lYiIlJMqYwvYfFiuO22VI4ceRXvw1rTmTix\nBLNn+34JbBERiSLR8atNQS7j9HQYP94xbZoB9YBPKVPmBl57zRg4sNAvLyIich6V8QXOnoXevTNZ\ntep+4DZgBHXr1mPJErjllkK9tIiIyEVpmTqPM2egR48MVq26F5gDfE+PHvDRRypiEREpOirjbKdP\nQ/fuaaxd2w9YCLzAY489xbJlUKFCUFKKiIhclJapgVOnoHv3LDZsuBtIBF5i4sQxPPNM0BKKiIjk\nq1jPjJ2DFSugWTPYsKEE0Bb4A889pyIWEZHQKbYz4+3bYcwYWLPmNPAPoCkwlilTYMKEoggpIiJy\nccVyZjx5sveBrDVrTgJdgfaUL3+UV19VEYuISOgVu5nx9Onw5JMAx/GKeDPx8fNYuLAiVasWYUYR\nEZF8FKuZ8fLl8PDDAEeBjpht4cUXF7F2bX8VsYiI+Cb6ZsaxsRc9ZOtW6NcPsrIAXsRsG2+99Q59\n+/YMSUQREZH8FIuZ8ZdfQvfu3u8SA9Su/Qx/+tP7KmIREQkLUV/G3sU84NChg0BvKlT4lqSkUtx5\nZ4vQZhQREclH9C1TX1DGjzwCO3bsB+KAAzz//G5uvLFaSOOJiIhcSlTPjBMTYcaMr/Au5vFPxo9P\nZvjw1iGPJyIicimRPzPOzPQupQVgBjExABw8CIMHfwm0A47Tvv1apkzR0rSIiISfyJ8ZX2RWnJUF\ngwfD0aNlgWupWnUdixe3wMyXhCIiIpcU+TPji5Txk09+yerVNYFqwHssWmRUquRLOhERkQIFNDM2\nsy5mttPMUs3sBxeMNLPSZrYo+/ktZlY72EHzlaeMXalSDB36N6ZMaQmMBmD8eKN9+5ClERERuWwF\nlrGZxQCvAHcCDYH+ZtbwgsOGAkedczcA04CpwQ6arzxlvOkkJCS0B2KBX9K8OTz7bMiSiIiIXJFA\nZsYtgFTn3B7nXBrwFtDrgmN6AW9kP14MxJuF6AxtdhlvBXplHAN+DGyka9f6JCfne0EuERGRsBFI\nGdcAvs6zvT9730WPcc5lAMeAysEIWJAli9I5B/QGriIG2MSvf12X5cvReWIREYkIIf0Al5kNA4YB\nXHfddUF5zXq10ykNvA1kWE1OrqxFly5BeWkREZGQCKSMvwGuzbNdM3vfxY7Zb2YlgauB7y98Iefc\nLGAWQPPmzd2VBL5Q4zsqsq3rE+z8OJ0egytRSUUsIiIRJpAy3grUM7M6eKXbDxhwwTGJwCDgQ6AP\nsN45F5SyLVDVqjRdMYlGGQXeylhERCQsFVjGzrkMMxsJJAMxQIJzbruZPQt85JxLBGYD88wsFTiC\nV9ghY6YiFhGRyBXQOWPn3Epg5QX7ns7z+Cxwd3CjiYiIFA+RfzlMERGRCKcyFhER8ZnKWERExGcq\nYxEREZ+pjEVERHymMhYREfGZylhERMRnKmMRERGfqYxFRER8pjIWERHxmcpYRETEZypjERERn6mM\nRUREfKYyFhER8ZnKWERExGfmnPPnjc2+A74K4ktWAQ4H8fWKK41j4WkMC09jWHgaw8IL9hjWcs5V\nvdgTvpVxsJnZR8655n7niHQax8LTGBaexrDwNIaFF8ox1DK1iIiIz1TGIiIiPoumMp7ld4AooXEs\nPI1h4WkMC09jWHghG8OoOWcsIiISqaJpZiwiIhKRIq6MzayLme00s1Qzm3CR50ub2aLs57eYWe3Q\npwxvAYzhWDPbYWafmdk6M6vlR85wVtAY5jnuLjNzZqZPtV5EIONoZn2zvx+3m9mCUGcMdwH8e77O\nzFLMbFv2v+mufuQMV2aWYGaHzOzzfJ43M/td9vh+ZmbNiiSIcy5ivoAYYDdwPRAL/BVoeMExI4CZ\n2Y/7AYv8zh1OXwGOYXugbPbj4RrDyx/D7OPKA5uAzUBzv3OH21eA34v1gG1Axezta/zOHU5fAY7h\nLGB49uOGwF6/c4fTF3AH0Az4PJ/nuwKrAANaAVuKIkekzYxbAKnOuT3OuTTgLaDXBcf0At7IfrwY\niDczC2HGcFfgGDrnUpxzp7M3NwM1Q5wx3AXyfQjwHDAVOBvKcBEkkHH8H+AV59xRAOfcoRBnDHeB\njKEDrsp+fDVwIIT5wp5zbhNw5BKH9ALmOs9moIKZVQ92jkgr4xrA13m292fvu+gxzrkM4BhQOSTp\nIkMgY5jXULz/FUquAscweynrWufcn0IZLMIE8r1YH6hvZn82s81m1iVk6SJDIGM4ERhoZvuBlcDD\noYkWNS73Z+YVKRnsF5ToYWYDgeZAW7+zRBIzKwG8BAz2OUo0KIm3VN0Ob4Vmk5k1ds79y9dUkaU/\nMMc596KZ3Q7MM7NGzrksv4NJrkibGX8DXJtnu2b2voseY2Yl8ZZlvg9JusgQyBhiZh2AJ4Cezrlz\nIcoWKQoaw/JAI2CDme3FO8+UqA9x/UAg34v7gUTnXLpz7kvgH3jlLJ5AxnAo8EcA59yHQBm8ay5L\nYAL6mVlYkVbGW4F6ZlbHzGLxPqCVeMExicCg7Md9gPUu+yy8AAGMoZndCryKV8Q6R/dDlxxD59wx\n51wV51xt51xtvPPuPZ1zH/kTN2wF8u95Gd6sGDOrgrdsvSeUIcNcIGO4D4gHMLOb8Mr4u5CmjGyJ\nwC+yP1XdCjjmnDsY7DeJqGVq51yGmY0EkvE+RZjgnNtuZs8CHznnEoHZeMswqXgn5fv5lzj8BDiG\n/wuUA97O/uzbPudcT99Ch5kAx1AKEOA4JgOdzGwHkAmMc85ppStbgGP4K+A1MxuD92GuwZqg5DKz\nhXj/4auSfV79GaAUgHNuJt559q5AKnAaGFIkOfR3IiIi4q9IW6YWERGJOipjERERn6mMRUREfKYy\nFhER8ZnKWERExGcqYxEREZ+pjEVERHymMhYREfHZ/wPNSlaT2CEdcgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = pd.concat(dfs)\n", + "import statsmodels.api as sm\n", + "grid = np.linspace(0, 1, 101)\n", + "fig = plt.figure(figsize=(8, 8))\n", + "plt.plot(grid, sm.distributions.ECDF(results['pivot'])(grid), 'b-', linewidth=3, label='Pivot')\n", + "plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value')\n", + "plt.plot([0, 1], [0, 1], 'k--')\n", + "plt.legend(fontsize=15);" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-slideshow", + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/algorithms/covtest.ipynb b/doc/source/algorithms/covtest.ipynb index 13ec59bfe..7a2aa98ed 100644 --- a/doc/source/algorithms/covtest.ipynb +++ b/doc/source/algorithms/covtest.ipynb @@ -416,5 +416,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 2 } diff --git a/doc/source/algorithms/index.rst b/doc/source/algorithms/index.rst index 245c9e2eb..1f08e567d 100644 --- a/doc/source/algorithms/index.rst +++ b/doc/source/algorithms/index.rst @@ -10,3 +10,5 @@ post-selection inference. covtest.ipynb spacings + LASSO.ipynb + ROSI.ipynb \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py index addf6895c..5ab98cea8 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -44,7 +44,7 @@ 'sphinx_rtd_theme', 'texext.math_dollar', 'numpydoc', - 'nbsphinx' + 'myst_nb' ] # Current version (as of 11/2010) of numpydoc is only compatible with sphinx > @@ -118,16 +118,27 @@ # must exist either in Sphinx' static/ path, or in one of the custom paths # given in html_static_path. +# -- Options for HTML output + +html_theme = "sphinx_book_theme" html_theme_options = { - 'logo_only': True + "repository_url": "https://github.com/jonathan-taylor/selectinf.git", + "use_repository_button": True, +} +html_title = "Introduction to Statistical Learning (Python)" +html_logo = "logo.png" + +source_suffix = { + '.rst': 'restructuredtext', + '.ipynb': 'myst-nb', + '.myst': 'myst-nb', } -html_theme_path = ["../.."] -html_logo = "_static/logo.png" + html_show_sourcelink = True # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -html_title = 'Selection Documentation' +html_title = 'Selection Inference Documentation' # The name of an image file (within the static path) to place at the top of # the sidebar. diff --git a/doc/source/download.rst b/doc/source/download.rst index 6aef2651a..5858ba0ee 100644 --- a/doc/source/download.rst +++ b/doc/source/download.rst @@ -17,13 +17,20 @@ Selection depends on the following Python tools * `Pandas `_ +The package can be installed via pip + + pip install selectinf + +Development +~~~~~~~~~~~ + You can clone the selection repo using:: git clone https://github.com/selective-inference/Python-software.git Then installation is a simple call to python:: - cd selection + cd selectinf git submodule update --init pip install -r requirements.txt python setup.py install --prefix=MYDIR @@ -41,3 +48,10 @@ There is a small but growing suite of tests that be easily checked using `nose < cd tmp nosetests -v selectinf +Building documentation +---------------------- + + cd doc + make html + +To upload a fresh build of the documentation to your :code:`gh-pages` branch, use :code:`make github`. diff --git a/doc/source/learning/Basic_example.Rmd b/doc/source/learning/Basic_example.Rmd new file mode 100644 index 000000000..e57d8d571 --- /dev/null +++ b/doc/source/learning/Basic_example.Rmd @@ -0,0 +1,106 @@ +--- +jupyter: + jupytext: + cell_metadata_filter: all,-slideshow + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.1' + jupytext_version: 1.1.1 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +# Simple example + +Here we run a simple linear regression model (even without intercept) +and make a selection when the $Z$ score is larger than 2. + +The functions `partial_model_inference` and `pivot_plot` below are just simulation utilities +used to simulate results in least squares regression. The underlying functionality +is contained in the function `selectinf.learning.core.infer_general_target`. + + +```{python collapsed=TRUE} +import functools + +import numpy as np, pandas as pd +import matplotlib.pyplot as plt +# %matplotlib inline + +from selectinf.tests.instance import gaussian_instance + +from selectinf.learning.utils import partial_model_inference, pivot_plot +from selectinf.learning.core import normal_sampler +from selectinf.learning.Rfitters import logit_fit +``` + +```{python} +np.random.seed(0) # for replicability +def simulate(n=20, p=1, s=1, signal=1, sigma=2, alpha=0.1, B=2000): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + dispersion = sigma**2 + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + sampler = normal_sampler(S, covS) + + def base_algorithm(X, dispersion, sampler): + + success = np.zeros(p) + + scale = 0. + noisy_S = sampler(scale=scale) + + Z = noisy_S / np.sqrt(np.linalg.norm(X)**2 * dispersion) + if Z > 2: + return set([0]) + else: + return set([]) + + selection_algorithm = functools.partial(base_algorithm, X, dispersion) + + # run selection algorithm + + return partial_model_inference(X, + y, + truth, + selection_algorithm, + sampler, + B=B, + fit_probability=logit_fit, + fit_args={'df':20}) +``` + +```{python} +dfs = [] +for i in range(1000): + df = simulate() + if df is not None: + dfs.append(df) +``` + +```{python} +fig = plt.figure(figsize=(8, 8)) +results = pd.concat(dfs) +pivot_plot(results, fig=fig); +``` + +```{python collapsed=TRUE} + +``` diff --git a/doc/source/learning/Basic_example.ipynb b/doc/source/learning/Basic_example.ipynb new file mode 100644 index 000000000..6b9989c17 --- /dev/null +++ b/doc/source/learning/Basic_example.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Simple example\n", + "\n", + "Here we run a simple linear regression model (even without intercept) \n", + "and make a selection when the $Z$ score is larger than 2.\n", + "\n", + "The functions `partial_model_inference` and `pivot_plot` below are just simulation utilities\n", + "used to simulate results in least squares regression. The underlying functionality\n", + "is contained in the function `selectinf.learning.core.infer_general_target`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import functools\n", + "\n", + "import numpy as np, pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "from selectinf.tests.instance import gaussian_instance\n", + "\n", + "from selectinf.learning.utils import partial_model_inference, pivot_plot\n", + "from selectinf.learning.core import normal_sampler\n", + "from selectinf.learning.Rfitters import logit_fit" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n", + " from numpy.core.umath_tests import inner1d\n", + "Using TensorFlow backend.\n", + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:455: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n", + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:456: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n", + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:457: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n", + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:458: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n", + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:459: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n", + "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:462: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n", + " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n", + "R[write to console]: Loaded gbm 2.1.5\n", + "\n", + "R[write to console]: randomForest 4.6-14\n", + "\n", + "R[write to console]: Type rfNews() to see new features/changes/bug fixes.\n", + "\n" + ] + } + ], + "source": [ + "np.random.seed(0) # for replicability\n", + "def simulate(n=20, p=1, s=1, signal=1, sigma=2, alpha=0.1, B=2000):\n", + "\n", + " # description of statistical problem\n", + "\n", + " X, y, truth = gaussian_instance(n=n,\n", + " p=p, \n", + " s=s,\n", + " equicorrelated=False,\n", + " rho=0.5, \n", + " sigma=sigma,\n", + " signal=signal,\n", + " random_signs=True,\n", + " scale=False)[:3]\n", + "\n", + " dispersion = sigma**2\n", + "\n", + " S = X.T.dot(y)\n", + " covS = dispersion * X.T.dot(X)\n", + " sampler = normal_sampler(S, covS)\n", + "\n", + " def base_algorithm(X, dispersion, sampler):\n", + "\n", + " success = np.zeros(p)\n", + "\n", + " scale = 0.\n", + " noisy_S = sampler(scale=scale)\n", + " \n", + " Z = noisy_S / np.sqrt(np.linalg.norm(X)**2 * dispersion)\n", + " if Z > 2:\n", + " return set([0])\n", + " else:\n", + " return set([])\n", + "\n", + " selection_algorithm = functools.partial(base_algorithm, X, dispersion)\n", + "\n", + " # run selection algorithm\n", + "\n", + " return partial_model_inference(X,\n", + " y,\n", + " truth,\n", + " selection_algorithm,\n", + " sampler,\n", + " B=B,\n", + " fit_probability=logit_fit,\n", + " fit_args={'df':20})" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jonathantaylor/git-repos/selectinf/selectinf/distributions/discrete_family.py:86: RuntimeWarning: divide by zero encountered in log\n", + " self._lw = np.array([np.log(v) for v in xw[:,1]])\n" + ] + } + ], + "source": [ + "dfs = []\n", + "for i in range(1000):\n", + " df = simulate()\n", + " if df is not None:\n", + " dfs.append(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHpCAYAAABqV/58AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3hUxdfA8e+kklClhCZVuliJiAUI\nhN6rIIK0nyDSXiwgIE1BsKIoxaBURZDeexUQAUWQJkU60jsJIWXeP24ISXaTbDbb93yeJw/ZuXPv\nHgzx7J05d0ZprRFCCCGEZ/JxdgBCCCGEsB9J9EIIIYQHk0QvhBBCeDBJ9EIIIYQHk0QvhBBCeDBJ\n9EIIIYQH83N2APaQN29eXbx4cWeHIYQQQjjEH3/8cUVrnc/cMY9M9MWLF2f37t3ODkMIIYRwCKXU\nqdSOydC9EEII4cEk0QshhBAeTBK9EEII4cEk0QshhBAeTBK9EEII4cE8sureErdu3eLSpUvExMQ4\nOxRhBX9/f0JCQsiRI4ezQxFCCJfmlYn+1q1bXLx4kcKFCxMUFIRSytkhiQzQWhMVFcW5c+cAJNkL\nIUQavHLo/tKlSxQuXJjg4GBJ8m5IKUVwcDCFCxfm0qVLzg5HCCFcmlcm+piYGIKCgpwdhsikoKAg\nmXoRQoh0eGWiB+RO3gPIz1AIIdLntYleCCGE8AaS6IUQQggPJoneTQ0fPpy8efM6Owy72b9/P0op\nNm3a5OxQhBDCrUmiF0IIITyYUxO9UmqKUuqSUmp/KseVUmqcUuqYUmqfUupZR8cokouJiSEuLs7Z\nYQghhNsaNw5Wr3bc+zl7wZxpwLfAjFSO1wdKJ3w9D0xM+FOk49q1a7z//vssXryYmzdv8uyzzzJ2\n7Fief/7hf74vvviC2bNnc+TIEbJkyULlypUZO3YspUqVSuwTFhZG3rx5qVOnDp988gknT57k5MmT\n/PDDD3z77besXbuWHj16sG/fPsqWLcu4ceOoWrVqsli+//57xo4dy7FjxyhQoAA9e/akf//+yfpM\nmDCB0aNHc+3aNWrWrEmfPn3s+x9I2NbZszB4MPz9N8THOzsaIVzWrdtQ7V/j+4sFIeSVGqivxtr1\nPZ2a6LXWW5RSxdPo0hSYobXWwA6lVC6lVEGt9X+2jMOVntLSOvPXiI6OplatWty4cYPPPvuMkJAQ\nJk6cSK1atTh69CgFChQA4OzZs/Tq1YtixYpx69YtJk2axIsvvsjRo0fJmTNn4vW2bdvG8ePH+eST\nTwgODk48FhkZSceOHenXrx8FChRgxIgRtGjRglOnThEcHAzAZ599xqBBg+jfvz9hYWH88ccfDBky\nhODgYHr16gXA4sWL6dmzJ2+++SbNmjVj8+bNdOnSJfP/IYRjXLwIL7xgJHshRJr8gdxAUYD/YOfc\nElQYCdmy2fFNtdZO/QKKA/tTObYMeDnJ6/VAaHrXrFSpkk7LwYMHk7020qtrfFlq2LBhOk+ePGaP\nff/999rf318fOXIksS0mJkaXLFlSv/vuu2bPiY2N1ZGRkTpbtmx6+vTpie3Vq1fXWbJk0RcuXDB5\nf0CvX78+sW3Pnj0a0CtXrtRaa33z5k2dNWtWPXz48GTnDhkyROfPn1/HxsZqrbV+7rnndL169ZL1\n+d///qcBvXHjxjT/O6T8WQoHi47W+uWXnf+LI1/y5QZfd0DXBF0U9MmEtgU00198kflfRWC31uZz\noscU4ymluimldiuldl++fNnZ4TjVunXrqFSpEiVKlCA2NpbY2FgAqlevzu7duxP77dixg9q1a5Mn\nTx78/PwIDg7mzp07HDlyJNn1KlWqRP78+U3eJyAggLCwsMTXFSpUAIyRAoDffvuNu3fv0rp168Q4\nYmNjqVmzJhcvXuTs2bPExsby559/0rRp02TXbtGihU3+Wwg769cPtm51dhRCuLzbGHPRG4DTQA3g\nFhASAn372ve9nT1Hn55zQJEkrx9NaDOhtY4AIgBCQ0O1/UNzXVeuXGHHjh34+/ubHHvssccAOH36\nNHXq1KFy5cp89913FCpUiICAABo2bMi9e/eSnWMuyQNkz54dH5+HnxUDAgIAEs+/cuUKAI8//rjZ\n88+cOUNgYCBxcXGEhIQkO5bytXBBU6bAhAnOjkIIl3cTI8n/lqTtDSAH8Fwo+Pra9/1dPdEvAXop\npWZjFOHd1Daen/dEuXPnJjQ0lIkTJ5ocCwwMBGDVqlVERkayePFismbNCkBsbCzXrl0zOcfapWZz\n584NwLJly8x+WChbtixBQUH4+vqabE4jm9WkY9EimDULnDl6tX27aZufH/z8MyQp6BTCkx0+DPPm\nwcmT5o9Hx95k/t63iOJgYlsh3mZJlg60mgGlQ3OaP9GGnJrolVI/A2FAXqXUWWAYRq0CWutJwAqg\nAXAMiAQ62yMO7WH3/+Hh4axZs4aiRYumemccFRWFj48Pfn4P/wn88ssvicP8tvDCCy8QFBTE+fPn\nadiwYar9nnnmGRYvXsybb76Z2LZgwQKbxeFxFi+G5s2dHYV5X34JrVo5OwohHOLUKXipJ5i5P0pw\nBSNtHUzS9g3n6cXY6VC6td1DBJxfdf9qOsc10NNB4bid+/fvM2/ePJP2+vXrM2nSJMLCwnj33Xcp\nWbIkV69eZefOnRQoUIB+/fpRs2ZN4uLi6Ny5M127duXAgQN8/vnn5MqVy2bx5cqVi+HDh9O3b19O\nnTpFtWrViI+P58iRI2zcuJGFCxcCMGjQIFq0aEGPHj1o3rw5mzdvZtWqVTaLw6NoDQMGODsK8zp2\nhIQnKYTwBp98klaSvwjUApIuE/Md0I0BA+CVV+wd3UOuPnQv0nD79m1atzb9SLhx40Y2btzI0KFD\nGTZsGBcvXiQkJITKlSvTpEkTAJ544gmmTZvG8OHDWbhwIU899RRz586lTZs2No2xf//+FCpUiLFj\nx/LFF1+QJUsWypQpk+x9mjdvzjfffMOYMWOYPn06YWFh/PDDD9StW9emsXiEX3+Ff/5xdhSmnnsO\nJk1yrWdVhbCju3fhxx9TOxoPNOJhklfAD0Bn6taFUaMcEGASSnvauDVGMV7S6vKUDh06RPny5R0Y\nkbAXr/tZtm8PP/3k7CiSe+opWL4cChd2diRCOMwPP8D//pdWjw1AQ+A+xppwr1G1qlFek1C+ZFNK\nqT+01qHmjskdvRDu4upVo+onpVGjjAVrnOGRR6BiRaMITwgvEhFh2taqFbz11oNXNdm1azF3794k\nLKw1ISFQvjz4OOGhdvntFMJdzJgB0dHJ2/LkgXfegYSnKYQQ9vfXX7BzZ8pWzYABitAk99Q1atRx\nZFip8pgFc4TwaFrDd9+ZtnfqJEleCAczvZv/h6xZXyZPnpNOiCZ9kuiFcAepFeF16+b4WITwYnfu\npCzCOwhU5+7d7YSH1+TMmTNOiix1kuiFcAfmJgTDwqBMGYeHIoQ3mzMHbt9+8GofxlIwFwG4ePEi\nJ1NbOceJZI5eCFeXWhFe9+6Oj0UID7N7N/zyC1y/bln/desefPcnUBswHqTPli0bK1asMNmm2xVI\nohfC1UVEmBbh5c3ruqvjCeEmFi82KuUzviDoTqAucAOAbNlysnbtKqpUqWLjCG1DEr0Qruy332DY\nMNN2KcITIlMOHjSWpch4kt+GsUWNMX7v6/sImzatpVKlSjaO0HZkjl4IV3X+PLRsCTExpsfeeMPx\n8QjhIW7cgGbNjMK6jNmEcSf/YJI+LwMHbnTpJA+S6N3W8OHDUUqZXSa2VatWyfaJT8/JkydRSrFs\n2TIbRigyJTraGFP8z8xmjW++KUV4QlgpLg5eew2OHs3omXsw9li7m/A6Py+9tInhw5+yaXz2IEP3\nbm7NmjXs2rWL5557zuprFCxYkN9++41y5crZMDKRKX36GMP2KT3/PHz1lePjEcJDDBsGK1aYtpcr\nB2+/nfp5sbEViYioy19/LSJXrkJMmLCBV14pa/e95G1BEr0by507N4ULF2bUqFEsWrTI6usEBga6\nbBGJV4qIMP84XYECMH++zM0LYaUFC8xvKJMjh7EGfdmyaZ3tT5cus+nduzf9+/enVKlS9grT5mTo\n3o0ppRg8eDBLlizh77//Ntvnv//+o0uXLpQsWZKgoCDKlCnDBx98wP379xP7pBy679Spk9kRgvHj\nxxMcHMzthIdI4+PjGTNmDKVKlSIwMJAyZcowffp0O/xNvcj27ea3evX3Nx6xk41jhLDKgQPw+uum\n7UoZ+0SlneQNgYGBREREuFWSB0n0bq9169aULl2aUanse3jlyhVy587Nl19+yapVq3jvvfeYOnUq\nvXv3TvWabdq0Yffu3Zw4cSJZ+5w5c2jQoAHZs2cHoHfv3owcOZJu3bqxfPlymjdvTpcuXWSu31pp\nFd+NGwcvveT4mITwANevG8V3d++aHhsxAho1Mm2fNWsWQ4YMwSN2eNVae9xXpUqVdFoOHjyYvMFY\nSdw1viw0bNgwnSdPHq211lOnTtU+Pj76n3/+0Vpr3bJlS129enWz58XExOiffvpJBwYG6ujoaK21\n1idOnNCAXrp0aWKfPHny6NGjRyeed/bsWa2U0nPnztVaa3306FGtlNLTpk1Ldv0OHTro0NBQi/8e\nmWXys3RX9+5pXaWK+X8Tb7yhdXy8syMUwi3Fxmpdv775X61mzbSOizM9Z9q0aVoppQE9YsQIxwdt\nBWC3TiUnyhy9B2jfvj0jRoxg9OjRTJ06NdkxrTVff/01ERERnDhxgnv37iUeO336tNkhKD8/P1q0\naMGcOXN4//33AZg7dy5Zs2alYcOGAKxfvx4fHx+aN29ObJIHUcPDw/n555+Ji4vD1x2qVBzl999h\n61bzd+tgFN7t2GHaXqUKfPONMb4ohMiwoUNh5UrT9vLljQ0hU24bO3nyZLp37554Jz937lzeeecd\nsmbN6oBo7UMSvQfw8/Ojf//+9OnTh+HDhyc79tVXX/Hee+8xYMAAqlevziOPPMKuXbvo2bNnsqSf\nUtu2bZk8eTJHjhyhTJkyzJkzhyZNmhAUFAQYUwJxcXHkzJnT7Pn//fcfjz76qM3+jm5t9GgYNCjj\n50nxnRCZsmoVfPyxafuD4ruEWchE48ePp1eSGpmnn36atWvXunWSB0n0HqNLly6MHDmSTz75JFn7\n3LlzadWqVbI5/IMHD6Z7verVq5M/f37mzJnD66+/zo4dOxg4cGDi8dy5c+Pn58e2bdvwSfmRGAgJ\nCcnE38aDzJ9vXZL39zfOLVTI9jEJ4SU++si07UHxXcqlKL788kveeeedxNehoaGsXr2a3Llz2zlK\n+5NE7yECAwN59913GThwIJUqVcLf3x+AqKgoAlPcEf7000/pXs/X15fWrVszZ84csmTJQq5cuahX\nr17i8Zo1axIXF8fNmzepXbu2bf8ynmL/fujY0bpzv/kGXnzRtvEI4UWuXzc/G2au+G7MmDHJbmSq\nVKnCqlWrUh2xdDdSdQ/OLr9L/pUJ3bt3J3v27Gzfvj2xrXbt2syZM4cJEyawevVqXn/9dY4dO2bR\n9dq0acOBAwcYO3YszZo1IyAgIPFY2bJlefPNN2nbti2ffPIJ69evZ/ny5Xz66af873//y9TfwyOk\nVeabnj59ZGc6ITJp40aIj0/eVrIkDB788LXWmg8//DBZkq9atSpr1qzxmCQPckfvUYKDg+nXrx+D\nk/xLHjp0KJcvX+aDDz4AoEWLFowbN47GjRune72XXnqJIkWKcObMGdq2bWtyfPz48ZQpU4bJkycz\ndOhQcuTIQYUKFejatavt/lLu4s8/ja+4OOP1L7/A8eOm/apVgxdeMH8Nf3/jWP369otTCC/xcDvZ\nh+rUSV58N2rUKIYl2TSqRo0aLF261O3n5FNSOpN3ka4oNDRU7969O9Xjhw4donz58g6MSNiLS/ws\nBw82X/GTUvnyRvV9ygogIYTNlS4NKQcv58+HFi0evt66dSv16tXj7t271KlTh4ULFxIcHOzYQG1E\nKfWH1jrU3DEZuhciM374wbIknzOn+TJfIYTNnTxpmuSVgho1kre9/PLLLFu2jNatW7N48WK3TfLp\nkaF7Iaz1++/w1lvp90utzFcIYRfr15u2hYbCI4+YtoeFhWVot093JHf0QljjwgVjDDDJngGpGjMG\nEhYaEkLY39q1pm3h4XH079+foxnfn9btyR29EJY4cAAOHXr4ZMTXXxtr06dUrZqx3yVAcDDUqwd1\n6zouTiG8XHy8uTv6WHbv7sy6dT/y888/s2XLFkqUKOGM8JxCEr0Q6RkwAD79NP1+oaGwejVkyWL/\nmIQQZu3bB1euJG2Jwde3PevW/QLA2bNniYiIYPTo0U6Jzxm8NtFrrVGyfrhbc8gTI4cPW5bkQ0KM\nza4lyQvhVMmH7e8DbYmLW5jY0r1791R3+/RUXjlH7+/vT1RUlLPDEJkUFRWVuAKg3Sxfnn4fPz+Y\nOxeKFLFvLEKIdD18fv4e0AJ4mOR79+7NxIkTzS7b7cm862+bICQkhHPnzhEZGemYu0JhU1prIiMj\nOXfunP3X1DdX1ZPSuHHG3LwQwqnu3YNffwWIBJoCDz+ov/vuu3z99ddeOZLrlUP3OXLkAOD8+fPE\npLZtqHBp/v7+5M+fP/FnaRfR0bBli2l7w4YQFGQ8G9+ypaxkJ4QD3b8PO3caST2lgwchKuou0BjY\nmNg+aNBgRo78yCuTPHhpogcj2ds1SQj399tvkHKKJ29eWLLEdBNrIYTd7dhhPMRy61ZqPe4A9YGt\niS0VK45g1KihDojOdXltohciXeYfxpUkL4QTxMTAK6+kleQBsgCFk7wezf/93/v2DcwNSKIXIjXm\ndsWoVcvxcQghWLYMzpxJr5cfMBOIAV4G+smvLJLohTDv+nUwtzGS/F9DCKf47jtLe/oDcwEfmjaF\nYsXsF5O7kEQvhDnmNrN+7DEoXtwp4QjhzU6cgDVrTNtDQy9y48ZMihZ9J1mhnZ+fD88/b6x1JSTR\nC2GeuWH72rUdH4cQgu+/f7j69ANFipznzp1wjh07TMuWVxg9erTXVtWnR6qKhDBH5ueFcAkxMTBl\nSsrWM0RGVufw4cMAfP755/z9998Oj81dSKIXIqVTpyDlDlfmNrMWQtjd0qXGZpEPnQCqcfWqseG8\nn58fs2fP5sknn3RGeG5Bhu6FSMnc3XxoKOTO7fhYhPByERFJXx0DagJG+b2/vz9z586ladOmTojM\nfUiiFyIlGbYXwmni4+HsWYiLg4sXkxbhHQbCAWN76MDAQBYuXEh9WZkyXZLohUgqPl4SvRBOsn49\nvPaakeCT2w/UAowDQUFBLFmyhFrye2kRSfRCJLVnT8rNrI117V980TnxCOEl9u6Fxo1NV52GvRhJ\n3vi9DAjIyooVywgLC3NsgG5MEr0QSU2datpWtarsMy+EHV29Cs2amUvyYOwpfz/h++zMnbuSsLCX\nHBecB5CqeyEeiIyEmTNN25s1c3wsQniJ2Fho2xZOnkytx3PASuBRWrdeS5MmkuQzSu7ohXhgzhzT\nHTOCg6FdO+fEI4QXGDTIfFlMjhwPH3TJnv1F6tY9xqhRgY4NzkNIohfigeTP8RhefdXYd14IYXNz\n5sBnn6Vs3USOHCH8+WcFHnssabskeWvJ0L0QAPv2GZtdp9Stm+NjEcIL3LoFPXqkbF0D1MfXN5y4\nuCNOiMozSaIXAszfzT/9NDz3nONjEcILzJplbBL50HKgMXCP69cv8Prrr6NTLnAvrCKJXojUivC6\ndTOWvhVC2JTWKbedXQQ050F1fdGiRfnpp59kkxobkUQvRGpFeK+95px4hPBwu3fDX389eDUXaA3E\nAFCiRAm2bNnCY8kn6EUmSKIXIrUivBw5HB+LEF7g4d38T0BbIBaA0qVLs2XLFooVK+akyDyTJHrh\n3ZYtM1+E172742MRwgvcugU//wwwFegAxANQsGB5Nm/ezKOPPurE6DyTJHrhvY4cMT88//TTxm51\nQgib++kniIz8DugCGMV2Pj5PsG3bJgoWLOjU2DyVJHrhnW7dMla8Szk3D9CnjxThCWEHWsOkSRr4\nPUnr03TqtIESJUKcFZbHkwVzhPeJj4eOHeHQIdNj9evD6687PiYhvMCuXbBvnwImA9HAUWA1ffs+\n4tzAPJwkeuF9Ro2CRYtM20uVMh7u9fV1fExCeIGHda++wHQgiipVsvPkk86LyRvI0L3wLnv3wrBh\npu1ZsxrJP1cux8ckhAfTWrN8+XIOH9bMmpX0iB+QXRafdABJ9MK7jBtnTBSmNH06PP644+MRwoNp\nrRk8eDCNGjXixRf/j6io5L97OXNCmzZOCs6LSKIX3uPmTZg927R94EBo2dLx8QjhwbTWvPvuu4we\nPRqA69fHAVOS9enSxVibStiXzNEL72E815O8LWdO+OAD58QjhIeKj4+nb9++fPvtt0laGwEPH2ct\nVQqGDnV4aF5JEr3wDqaLaxvat5dbCiFsZPduGDIknu3b3+TWrclJjrQAfgYCAMiWTUpiHEkSvfAO\nO3caW9GmJCvgCWETe/ZA1apx3Lv3P2BakiNtgJmAf2KLlMQ4lszRC+9gbj37KlXgiSccH4sQHubK\nFWjWLJZ7914neZLvAPxI0iQ/eDC0aOHY+LydJHrh+VIrwpO7eSEyLTYWWreO4fTpdkDS5+e6YKxn\n/3DguEEDGDHCwQEKSfTCC6RWhPfKK86JRwgPMmAAbNp0EliXpPVNjNXvHi4+1bChrEflLDJHLzxb\nakV4HTpIEZ4QGbRzJ4wfD4cPG6/j4uCPPwBKA2uBcKAjjzzyFStWKHLnNvrlzg158zolZIEkeuHp\n1qwxX4Qny3EJkSH//gt168KNG6n1qAT8hVLF+OUXRZUqDgxOpMnpQ/dKqXpKqX+UUseUUu+bOV5U\nKbVRKbVHKbVPKdXAGXEKN3TqlPH4XEovvCBFeEJk0GefJU3ydwAzm0JRnE8/VdSq5bi4RPqcmuiV\nUr7AeKA+UAF4VSlVIUW3D4BftNbPAG2BCY6NUrilyEho3twoB06pRw/HxyOEG7tzxyh1MdwC6gHV\ngP3J+r36KrzzjmNjE+lz9h19ZeCY1vpfrfV9YDbQNEUfDeRI+D4ncN6B8Ql3pLUxNL9nj+mx6tWh\nXTvHxySEG5s9G27fBrgB1AG2AVeAWsB1AJo0gcmTQSlnRSlS4+w5+sLAmSSvzwLPp+gzHFijlOoN\nZMX4lyXEQ1u2wA8/wKVLxus7d2DrVtN+RYrAL79I2a8QGWQsQ3EVI8n/mdjer99A2rR5hAIFoGhR\nSfKuytmJ3hKvAtO01l8opV4AZiqlKmqt45N2Ukp1A7oBFC1a1AlhCqdYssRYfSMuLu1+gYGwcCGE\nhDgmLiE8xJ49sGvXZYx7rIeFre+/P4HRo2UazB04e+j+HFAkyetHE9qS6gr8AqC1/g3IApg8qKG1\njtBah2qtQ/Ply2encIVLOXzYKLZLL8mDcUtSqZL9YxLCw3z11QUgjIdJXlGs2PeS5N2IsxP9LqC0\nUqqEUioAo9huSYo+pzEezkQpVR4j0V92aJTC9dy8CU2bPpg4TFufPvD66/aPSQgPc+TIOWbOrA4c\nTGjxAaYzeHBXJ0YlMsqpiV5rHQv0AlZjPKvxi9b6gFLqQ6VUk4Ru7wBvKKX2Ymx/1ElrrZ0TsXAJ\n8fHGnfyRI+n3bdsWPv/c/jEJ4WFOnz7Nyy9XR+sHv2e+wE9ky9aBtm2dGZnIKKfP0WutVwArUrQN\nTfL9QeAlR8clXNiIEbBsmWl7mTIwduzDiqASJaBsWakQEsJC+/bB1Klw8SLs2/c9ly8fTzjih/FQ\nVEteew2yZ3dikCLDnJ7ohciQxYvhww9N27NnNza4Ll/e8TEJ4QF+/dVY+S4q6kHLcIySqR+BeUBj\nQBaVdEfOnqMXwnKHDplf6Q5g5kxJ8kJY6cwZaNUqaZIHIz1EADt4kORDQ+HZZx0fn8gcSfTCPdy8\nCc2aGc/IpzR0qFGYJ4TIsHv3oGVLuHTpFBCf4qgv8EziqzffdGRkwlYk0QvXl1bxXaNGMGyY42MS\nwgNobawIvWvXHoxNabpjmuwNTZtC586OjE7YiszRC9eXWvFd2bLw44/gI59XhbDG+PEwbdoujBXv\nbgDfA49QqNCnfPLJw1+txx+HJ5+UulZ3JYleuLYtW9IuvsuZ0/ExCeEB/vwT/u//fsPYoOZWQmsu\n/Pxas2ABPJ9yMXLhtuRWSLi2r74y3/7jj1CunGNjEcKD9O27hbi4OjxM8nmADUya9JwkeQ8jiV64\nLq1h+3bT9mHDjK2yhBBWmTNnPVu31sfYVx4gH7CRHj2eoasseudxJNEL13X+vLFyR1IBATBokHPi\nEcIDrF69mvbtGwGRCS0FgE0ULvxEqgNowr1Joheu648/TNuefNJI9kKIDFu6dClNmjQhNvZeQkth\nYDNQga5d5VfLU0miF65r927TNtmBTgirREVF0aNHD+7fv5/QUgzYApTBxwcZsvdgkuiF6zJ3Ry+J\nXgirBAUFsWLFCvz9cwMlMe7kSwJQvz4ULerM6IQ9yeN1wjVpLYleCBvLm/dJ4uLWYxTfFU5sl/Xr\nPZvc0QvXlFohXsWKzolHCDd07dq1ZK+nTIH4+KdJmuQLF4YGDRwcmHAoSfTCNZmbn5dCPCEsNmnS\nJEqXLs2ePXsAiIuD77837de1K/jJ2K5Hkx+vcE0ybC+E1caNG0ffvn0BqF27Nlu2bOHEiQqcOpW8\nnxTheQe5oxeuSRK9EFb57LPPEpM8wGOPPYavb0GzO89JEZ53kDt64Xq0Nj90Hxrq+FiEcCMjR45k\nyJAhia9ffPFFlixZSYsWOTh71rS/FOF5B0n0wvWcOweXLiVvCwgwttASQpjQWjNs2DA++uijxLZq\n1aqxfPlyBg7MxpYtpudUrgwNGzowSOE0kuiF65EV8YSwmNaagQMH8sknnyS2hYeHs3jxYubOzcq3\n35qekzcvzJ0Lvr4ODFQ4jSR64Xpkfl4Ii2itefvtt/kqySL19erVY8GCBezfH2R2Xt7X10jyMjfv\nPaQYT7gemZ8XwiILFy5MluSbNGnCokWLuHUriObNITra9JwvvoCwMMfFKJxPEr1wLbIinhAWa968\nOT179gSgZcuWzJ07F6UCaTVJS4YAACAASURBVN3aKHVJ6fXXoU8fBwcpnE6G7oVrkUI8ISymlGLc\nuHE888wzdOzYET8/P3r3hl9/Ne1bqRJMmgRKOT5O4VyS6IVrkUI8IVIVGxsLgF+Spex8fHzomrDq\nzdSpmC2+y5cPFi6EoCCHhClcjAzdC9ci8/NCmBUTE0Pbtm3p0qULcXFxJsd37iTV4rtffoEiRRwQ\npHBJckcvXIvMzwthIjo6mldeeYUlS5YA4O/vz+TJk/HxMe7VLlyAFi0gcav5JL78UorvvJ0keuFa\n9u83bXv2WcfHIYSLiIqKomXLlqxcuTKxLUeOHKiEyfb790mz+K53b0dFKlyVDN0L13HnDpw5k7xN\nKShf3jnxCOFkkZGRNGnSJFmSHzBgAF9++WViou/XD7ZuNT1Xiu/EA5Lohes4fNi0rXhxqSASXunO\nnTs0aNCAdevWJbYNHTqU0aNHJyb5KVNgwgTTc6X4TiQlQ/fCdRw6ZNomd/PCC928eZMGDRqwffv2\nxLaRI0cyePDgxNe7d0OPHqbnPlj5TorvxAOS6IXrkEQvBNevX6du3brs2rUrse2zzz7j3XffTdbv\n//4v9eK76tXtHaVwJ5LoheuQRC8EHTp0SJbkv/76a/qkWM5u3z7Yts303I4dpfhOmJI5euE6JNEL\nweeff05ISAgAkyZNMknyABERpueVLy/Fd8I8uaMXriEmBo4fN22XRC+8TLly5Vi/fj179uyhQ4cO\nJscjI2HmTNPzeveGLFkcEKBwO5LohWs4dgwSlvdMlD8/PPKIc+IRwkHi4+MTF755oGLFilSsWNFs\n/zlz4Nat5G3BwfDaa/aKULg7GboXrkGG7YUXOnXqFJUqVWLHjh0Wn2Nu2P7VVyFHDhsGJjyKJHrh\nGiTRCy/z77//Uq1aNf766y/q1avHH+aWf05h3z4w95mge3c7BCg8hiR64Rok0QsvcuTIEapVq8bp\n06cBY5nbixcvpnueubv5p5+WfZ9E2mSOXrgGSfTCSxw8eJDw8HAuXLgAQJYsWVi0aBF169Y12//a\nNYiONr7MFeF17y6V9iJtkuiF88XHm1/+VhK98DD79u2jVq1aXL58GYDg4GCWLl1KzZo1TfoeOABt\n25rf5+mBrFmhXTt7RSs8hSR64XxnzhjPDCWVPTsUKuSceISwgz///JPatWtz7do1ALJly8aKFSuo\nWrWqSd8LF6BOHTh/Pu1rShGesITM0QvnS23YXsYjhYfYuXMn4eHhiUk+R44crFmzxmySv38fWrVK\nP8kDdOtm60iFJ5I7euF8Mj8vPNipU6eoVasWt2/fBiBXrlysXbuW0FQq6Pr2Nb+8bUp160oRnrCM\n3NEL55NELzxY0aJF6Z7w/FuePHlYs2YjFSuGcu8eJl8REcYytikFBkKBAsZXsWLQpQvMmiWDXsIy\nckcvnE8SvfBgSik+/fRTrl4NYvPmV6hc2fyKd6nx84O1a8HMKL8QFpFEL5xPEr3wcIcOKebO/ZA7\ndzJ+7ldfSZIXmSND98K5Ll+Gq1eTtwUEQIkSzolHiExaunQp7du3JzZh74YbN6BZM6xK8p07w1tv\n2ThA4XXkjl44l7m7+TJljPFKIdzMggULaNOmDbGxsWitmTZtBu3b+3L0aMavVbkyTJgg8/Ai8+T/\npsK5ZNheeIjZs2fTvn174uLiAPj9998ZMOAqy5eHmPT18QF/f/PXCQqC2rVh4kTZdlbYhiR64VyS\n6IUHmDFjBp07dyY+Ph6AsmXL8s476+nWzTTJZ88OO3dCuXKOjlJ4K0n0wrkk0Qs398MPP/DGG2+g\ntQagQoUKTJiwnkaNCpjt/+OPkuSFY0miF85z7Rps3mzaLoleuIkJEybQs2fPxNdPPvkk8+evo0GD\nfGaL74YPhyZNHBefECBV98KZZs40tuRKKnduqFDBOfEIkQFfffVVsiT/7LPPsnbtBvr2zWe2+K5p\nUxgyxIEBCpFAEr1wDq3hu+9M2zt2TL1KSQgXMXHiRPr165f4+vnnn2f9+vWMG5eHFStM+5crBzNm\nGEV4QjiaDN0L59i2zfz8vOzSIdxAnTp1yJevMJcvnyMg4CVOnVrB44/nMLsRTY4csGiR7DInnEc+\nXwrnMHc3X62aVCkJtxAc/BgxMeuBNty/v4oLF8wneTCK78qWdWh4QiQjd/TC8a5dg7lzTdsTNv4Q\nwtVNmgQ3bpQFZqfZb8QIaNzYMTEJkRqrEr1SqgJQDSgK5AWigEvAX8AWrfVtm0UoPM+MGaZFeHny\nQIsWzolHiDRorRk4cCANGjSgWrVqxMbC99+nf16zZvDBB/aPT4j0WJzolVKPAt2ALkDBB80pumkg\nTim1DpgILNMPHi4VAowivIgI0/aOHWUZMOFy4uPj6dWrFxMnTmT8+PGsWbOGy5dfSHWYHoyCuxYt\nYOpUKb4TriHdRK+Uyg0MB7oD/sBJYBawC7gAXAOCgDxAOeAFIAyoC/yjlHpHa73S9qELtxAVBcOG\nwYYNxl18bCwcPmza7403HB+bEGmIi4uje/fu/PDDDwDcuXOH77//ngsXXjDp26jRw33kc+aEbNkc\nGakQabPkjv4YEAh8D0zXWu9M7wSlVA6gLcYIwDKlVD+t9bhMRSrcT1ycMX65Zk3a/apXlyI84VJi\nY2Pp0qULM2fOTGxr164dgwd/R6lSpv179oTChR0YoBAZYMnA0kygpNa6pyVJHkBrfUtrHaG1DgVa\nYszfC28zaFD6SR7kkTrhUmJiYujQoUOyJN+pUydmzJjBtGl+pJyMLFYM6tRxcJBCZEC6d/Ra676Z\neQOt9aLMnC/c1Jw58Omn6fcrVEiK8ITLuH//Pq+++ioLFixIbOvWrRsTJ04kPt6HhFH8ZN54Q+bi\nhWvL8D9PpVTRhKH5tPpkV0oVtT4s4db27oUuXdLvV6gQzJ8vRXjCJURHR9OqVatkSb5Xr15MmjQJ\nHx8fli/HpAjP19eyf+pCOJM1j9edwCjO+yiNPn2ADwFfK64v3NnVq8a8fGSk6bEhQ6B1a+P7gAAo\nUwZUygc3hLCPFSuMx+KuXDE9pnUsBw404/r1VYlthQu/w19/fUb16sa/0ePHTc9r0gQKFjRtF8KV\nWJPoFaaP1Qlh6NEDTp40bW/Vylg9RBK7cIJZs+C119Lq4Qc8BzxI9IM4d24k586l/e9V1ngS7sBe\nK+MVAO7a6drCVf37r/kV7ypWNB4qliQvnODPP6FrV0t6jgCigWBgKOndzxQvDrVrZzY6IezPokSv\nlHo9RdPTZtrAGKovCrQH/s5kbMLdTJ5s2pYrl7GjhzxYLJzg8mVo3hzu3bOktwLGYOmA5VtvSRGe\ncA+W3tFPw1j1joQ/myZ8pfTgNyQS4+Ox8Bb378OUKabtAwfCY485Ph7h9WJjoU0bOH3a3NFrwKcY\npUQBSdotS/JNmkCSXWqFcGmWJvrOCX8qYAqwCFhspl8ccBX4TWt9w5ILK6XqAV9jjAZ8r7UeY6bP\nKxgFgBrYq7VuZ2HcwlGWLIFLKZZL8PeHTp2cEo7wfAsWwMKFcDuVnTUuXoQdO0zbH3/8CtHRtTl2\n7C+qVTvKsGGz8fPzt/h9ixUzvoRwFxYleq319AffK6U6Aou01jMy++ZKKV9gPFAbOAvsUkot0Vof\nTNKnNDAQeElrfV0pFZLZ9xV2YG79+ubNIUR+XML2Pv0UBgzI+Hm5cl0kJqYWx47tB+DXXxcSH7+Z\natVq2ThCIVxHhovxtNY1bPj+lYFjWut/AZRSszGmBA4m6fMGMF5rfT3h/WWVPVdz/DisXWvaLiXJ\nwg5WroT338/4eUqdJ3v2cI4cOZzwWjFlyhRq1ZIkLzyb1VX3SqlgoAXwDJALuAn8CSzUWltacV8Y\nOJPk9Vng+RR9yiS83zaM4f3hWutVCNdhbs/OUqWghi0/EwoBR49Cu3aYLEObvjPkyVOTM2eOAeDj\n48OMGTN4Le1n7oTwCNbuR98AmA7kJnn1igbGKqU6a62X2SA+MGIsjbEj3qPAFqXUEylrAJRS3TA2\n0aFoUVmUz2FSK8Lr1k0epxM2dfu2sRbTDYuqf5I6SY4cNbly5QQAfn5+zJo1i9YPFm8SwsNlONEr\npZ4FFmDcXf8EbAD+w9ijvibwKjBPKfWS1vqPdC53DiiS5PWjCW1JnQV+11rHACeUUkcwEv+upJ20\n1hFABEBoaGiGP+8LK6VWhNexo3PiER5j4UJjRigqynh98KDxlVKtWsbuceZcuHCcjz6qyfnzRum9\nv78/c+fOpWlTcw8NCeGZrLmjH4xx515Va52ypnWaUmo8sAkYhLFzXVp2AaWVUiUwEnxbIGVF/SKM\nDw9TlVJ5MYby/7UibmEP331n2taihRThiUwZMgRGjky/X8mSxv5JuXObHjty5Ag9e9bgfMIC9YGB\ngSxYsIAGDRrYOFohXJs1yz1UBeaaSfIAaK1/B+Yl9EuT1joW6AWsBg4Bv2itDyilPlRKNUnothq4\nqpQ6CGwE3tNaX7UibmFrx4/DunWm7bLtrMiEOXMsS/JZsxprMZlL8gBZsmQhICAg8fslS5ZIkhde\nyZo7+pwkL6Az5zSQ5g53D2itVwArUrQNTfK9Bt5O+BKuxNxKeFKEJzLB0o0PwVhV+YknUj9etGhR\nNmzYQIMGDZgwYQI15N+l8FLWJPrzGI/FpSUUY95eeKr7943/06YkRXjCSlevGksvmNv4MKUxYx5u\nhJiWEiVK8Pfff+PnZ69tPYRwfdb8618BvKmUeh/4TGsd9+CAUsoH6AfUAibZJkThkhYvlpXwhNW0\nhmXLYNs2iIkx2rZuhRMnTPs2amSUfQD4+UGlSlChgmm/nTt3cuvWLZPn4iXJC29nzW/AR0AzYBTQ\nXSn1K8bdewHgZaA4cAGwYJZNuC1zK+G1aAH58jk+FuF2+vWDr79Ov1/FivDzz+nvibRt2zbq169P\nbGwsK1asICwszCZxCuEJrFkZ74JS6iXgO4yla1Ou+rwWeFNrLUP3niq1IjxZCU9YICLCsiRv6caH\nmzZtolGjRty9a6zT1blzZ/7555/EQjwhvJ1VY1pa65NAXaVUYYyV8XJirIy3R2ud8jl44WnMFeGV\nLg1yFyXS8dtv0KtX+v2UMu7k09v4cN26dTRp0oSohIft8+fPz9KlSyXJC5FEpiavEpK6JHZvIkV4\nwkrnzxuzOw/m5FOjlHHHX69e2v1WrFhBixYtiI6OBqBgwYJs2LCBcuXK2ShiITyDNSvj/QJMBVZr\nreNtH5JwaeaK8AICZCU8kaboaGjVCi5cMD3WtClUTVh1IzAQXn4Znn467estXryY1q1bE5PwqaFI\nkSJs2LCBUqVK2ThyIdyfNXf0rTBWvLuklPoRmK613m/bsITLkiI8YYU+fYxh+5Sefx5mz4YsWSy/\n1rx583j11VeJjY0FoHjx4mzcuJHixYvbJlghPIw1K+NVwSjECwDeAfYqpXYrpXonLFErPNV//8lK\neCLDIiLMfz4sUADmz89Ykp81axZt27ZNTPKPPfYYW7ZskSQvRBoynOi11ju11m9hbGLzCsZz9U8C\nXwPnlFILlFLNlFLy8KqnMZfkpQhPpGH7dvPFd/7+MG8eFC6cseudOnWKuDhj6Y6yZcuyZcsWihQp\nks5ZQng3q5Ox1vo+xpr285RS+YD2QEeMZ+ybAlcB2dnEk5hL9E2bShGeMOv8eWjZ0nzx3bhx8NJL\nGb/mwIEDiY6OZt68eaxfv578+fNnPlAhPJwylpK30cWUUhgr440G/LTWvja7eAaEhobq3bt3O+Ot\nPZfW8Oijxv+9k1q1CurWdU5MwmVobdy9//03xCeU6E6fDjt3mvb93/+MoXxrPx9qrYmMjCRr1qzW\nByyEh1FK/aG1DjV3zCbD60qpshh38+2BwoACjtri2sJFHDpkmuQDAh6WSwuvpTX06GF+x+KUqlSB\nb7+1PMnPnz+fhg0bkiXJRL5SSpK8EBlgTTEeAEqpXEqpHkqpHcBB4H2MHet+wNirvqyNYhSuwNyw\n/UsvQXCw42MRLuWzzyxL8g+K7wIDLbvumDFjaNWqFa1ateL+/fuZC1IIL5bhRK+UaqyUmouxvv23\nGDvVrcO4my+gte6mtd5m2zCF05lL9Ck2DxHeZ80aGDgw/X7+/kaSL1Qo/b5aaz788EMGJlx4+fLl\njBo1KpORCuG9rBm6X5zw5xFgOjBDlr31cDExsGmTabskeq92/Di0bftwTj41SsHEifDii+lfU2vN\nBx98wMcff5zYVqNGDfr375/JaIXwXtYk+u8wFsnZYetghIvauRNu307eliuXsV+o8Apaw549cOTI\nw7aPP4br10371qsHJUoY32fPDo0bG6vdpf8emv79+/P5558nttWpU4eFCxcSLFNEQljNmt3retgj\nEOHCzA3b16wJvk55qEI4WFwcdO4MM2em3zc8HJYuNfaNzwitNf/3f//HuHHjEtsaNmzIvHnzkhXi\nCSEyTha1Eelbu9a0TYbtvcaQIZYl+eLFYc6cjCf5+Ph43nrrLb5LUtHXvHlzZs+eLbvQCWED6f5K\nKqU2ABroqLU+m/DaElprHZ6p6ITz3boFO8zM0tSu7fhYhMPNmwejR6ffLyjI2Ds+T56MXT8uLo43\n3niDqUl2RHzllVf48ccf8ff3z2C0QghzLPnsHYaR6IOTvLaE7VbiEc6zZYsxdptUsWLpbxQu3N7f\nf0OnTun38/Exdi5+6qmMv8f169f59ddfE1+3b9+eqVOn4pfRYQEhRKrS/W3SWvuk9Vp4uNSG7WXZ\nW4927Ro0awZ375oeCw+HvAnbV+XODa++av26SXnz5mXDhg1Ur16dsLAwJk+ejK/UfghhU/KxWaTN\nXCGeDNt7tLg4aNcO/v3X9Fjz5sZwvo8NP+4XKVKEHTt2kDdvXnxseWEhBJCJlfGEFzh/Hg4eNG2v\nWdPxsQiH+eADWL3atL1CBWP9+szk4nv37vGbmY3pQ0JCJMkLYSeZWQL3NaXUeqXUNaVUbMKf65RS\nr9kyQOFE5u7mn34a8uVzfCzCIebOhTFjTNtz5jSK7bJnt/7akZGRNG3alLCwMFab+yQhhLALa5bA\n9VdKLQZmADWA7MDlhD9rAjOUUouVUlIy6+5k2N6rpFZ8pxTMmgWlS1t/7bt379KoUSPWrFnD/fv3\nadasGcePH7f+gkIIi1kzRz8QaAzsSPh+q9Y6TinlC1TF2KK2ETAAGGmrQIWDaS3r23sQreHsWbhw\nwfzxmBjo0AEiI02PffQRNGhg/Xvfvn2bBg0asHXr1sS2QYMG8Zg8uSGEQ2R4P3ql1DEgHqiotTbZ\nUkopFQjsT7h2KZtEmUGyH70NHDgAFSsmbwsIMNY8leVI3Up0NLRpA4sXp983pRYtjOI7ax+yuHHj\nBvXr12dHkrUYxowZw4ABA6y7oBDCrLT2o7dmjv5RYLG5JA+gtY7G2PimsBXXFq7C3N38yy9LkndD\n06dbl+QrVIBp06xP8teuXaNWrVrJkvyXX34pSV4IB7Nm6P48kN78u39CP+GuZNlbj7FgQcbPyWzx\n3eXLl6lduzZ79+5NbPv222/p2bOndRcUQljNmjv6WUArpVQOcweVUrmAVsBPmQlMOJFsS+sxoqON\nxQ0zwscHfv7Z+uK7CxcuUKNGjcQkr5QiIiJCkrwQTmLNHf2HQEVgp1LqQ2ALcBHID1QHhgA7gY9s\nFaRwsN9/N10S7ZFH4NlnnROPsNr27RAVlbwtSxbT8osHChWCvn0zt1TCqlWrOHDgAAA+Pj5MmTKF\njh07Wn9BIUSmWJPoH/xvQwHm9rRSQGngnko+uae11rISnzswN2wv29K6JXOlFk2bwuzZ9nvPTp06\ncenSJQYNGsTMmTN59dVX7fdmQoh0WZN4f0U2rPFs8lidx3DWj7J///40adKEcuXK2f/NhBBpynCi\n11qH2SEO4Spu3TKG7lOSRO92rl8Hc0+Z2vpHefLkSUJCQghO8USGJHkhXIMsLi2S27TJdFva4sVl\nW1o3tHEjxMcnbytVyvhx2srhw4d56aWXaNq0Kffu3bPdhYUQNiOJXiSX2livbEvrduz9hOT+/fsJ\nCwvj/PnzrFu3jvbt29vu4kIIm0k30Sul3lVKZbH2DZRSzyil6lt7vnAweX7eY9hzq4K9e/dSo0YN\nLl68CEDWrFnp1auXbS4uhLApS+7oRwHHlVIDlFKFLLmoMtRVSi0EdgNPZSZI4SCbN8Phw6bt4eGO\nj0VkysmTcOxY8jaloEaNzF/7jz/+oEaNGly5cgWA7Nmzs3r1asLCwjJ/cSGEzVlSjPcE8CXGZjUj\nlVLbga0YCfw/4DqQBcgDlAOqAOFAAeAq0Av4zuaRC9s6cwZatzZtf+YZyJvX8fGITDF3Nx8aaiyH\nkBk7duygXr163Lx5E4CcOXOyevVqnn/++cxdWAhhN+kmeq31EaCRUupFoCfQEmOXOnOP2D2YyP0H\n+ASYqrW+baNYhb1ERRm7l1y+bHqsQwfHxyMyzR7D9lu3bqVBgwbcvm38SufOnZs1a9ZQqVKlzF1Y\nCGFXFj9ep7XeDmxXSr0JVANeBopi3MlHAZeAfcAmrfUBO8Qq7EFr6NHD/HNYL78Msmyp24mPh/Xr\nTdszU2qxadMmGjVqxN2EFRPz5s3LunXreOopmZUTwtVZ8xz9bWB5wpdwd99+a2xvllLhwjB3rrE1\nrXAre/dCwvR5oqAgePFF664XHx/Pe++9l5jk8+fPz/r163n88cczGakQwhHk8TpvduEC9O9v2h4Q\nAPPnQ4ECjo9JZNqUKaZt1apBYKB11/Px8WHJkiWUKVOGQoUKsXnzZknyQrgRi+7olVKvA39prffZ\nOR7hSAsXgrlFTiZNAimucks//2wM0qSU2SckCxYsyIYNG4iKiqJUqVKZu5gQwqEsvaOfBjRL2qCU\n6qiU2mDziITjmKvY6tIFOnd2fCwi0/buha5dTdv9/Mw/UJGW//77z6StcOHCkuSFcEOZGbovjrEt\nrXBHcXGwwczntC5dHB+LyLSrV6FZM9MtaQE++giKFbP8Wj/++CMlS5Zk6dKltgtQCOE0sm2st/rj\nD7hxI3lbtmxQubJz4hGJIiPh7bdh2TJIqH9LV3S0+STfujUMGGD5e0+dOpWuXbuitaZVq1asXLmS\nmpnZnF4I4XSS6L2VuWH7sDDw93d4KOKhuDho1QpWrsz8tZ54wijMs3Sbgu+++44333wz8XXZsmWp\nWLFi5gMRQjiVVN17K9lz3iUNHWqbJP/II0atZbZslvX/5ptvkiX5p59+mg0bNhASEpL5YIQQTpWR\nRG9uJTzhjiIjYds203Zb7XgirDJ/Pnz8ceav4+NjVN9burPwF198QZ8+fRJfP/fcc2zYsIG8svSx\nEB4hI4l+uFIq7sEXMBQgaVuKr1j7hCwy7ddf4f795G0FC0L58s6JR7B/P3TsmPnr5M9vJPm6dS3r\n//HHH/Puu+8mvn7hhRdYu3Ytj2R2UXwhhMvIyBx9Rjcklw3MXZXsOe9UkZEwZAisXm18D8ZKduYK\n74YNg759LbuuUpAzp2U/Rq01I0aMYMSIEYlt1apVY9myZWTPnt2yNxRCuAWLEr3WWubyPYm5Pedl\n2N5hevaEadPS79esmTFn72OH375du3YlS/I1a9ZkyZIlZM2a1fZvJoRwKkng3ubSJWNllZRkz3mH\nOHHCsiRfvjzMmGGfJA9QuXJlxo0bB0C9evVYtmyZJHkhPJQ8XudtzC2SU6ECFCrk+Fi80OTJ6ffJ\nmRMWLQJ7j6D37t2bRx99lAYNGhBo7UL4QgiXl+FEr5R6CmgHVAbyYVTjXwZ+B2Zprf+2aYTCtmTY\n3mliYsxvOJPUo4/C7NlQpoxt3zs+Pp6oqCiTu/bmzZvb9o2EEC7H4kSvlPIFvgG6YRTapSz5qQ68\np5SaAPTVWsvjeK5Ga/OJXp6fd4glS+DixeRt/v7w++/GXby/v5HobV0TGRcXR9euXTl+/DirVq2S\nIXohvExG7ug/B94E7gO/AJuAcxgJvxBQE2gF9ATuAWb2PxVO9eefcOZM8jZfX6guWxY4QkSEaVuz\nZvDMM/Z7z9jYWDp27MisWbMAaNy4McuXLycoKMh+byqEcCmWblNbCugNnALqaa3/MdNtilJqJLAK\n6KeUmqS1/td2oYpMuX4d2rQxba9Sxf6TwYJ//4U1a0zbu3e333vGxMTQrl075s2bl9hWsmRJAgIC\n7PemQgiXY2lNbweMO/dOqSR5ALTWh4GOgC/QPvPhCZuIi4N27eD4cdNjMkfrEN9/b9r22GNQo4Z9\n3i86OprWrVsnS/I9evQgIiICX19f+7ypEMIlWZroXwQOaa03p9cxoc9B4OXMBCZsaMgQWLXKtL18\neUiyvrmwj9SK8Lp1s8/jc/fu3aNFixYsXrw4sa1v376MHz8eH3s9ryeEcFmW/taXw6iqt9TvCecI\nZ5s3D0aPNm1/8AyXFGbZXWpFeJ062f69IiMjady4MStWrEhs69+/P2PHjkXJyodCeCVLE30u4FIG\nrnsRkMWyne3cOejc2bRdKfjpJ9s/wyXMMleE17w52HpjuDt37tCwYUPWJVnieMiQIYwZM0aSvBBe\nzNKq+6xAVAauGw0EZzwcYVPjx8OdO6btH30EDRs6Ph4vdP26+a0FunWz/Xu98847bNq0KfH1Rx99\nxAcffGD7NxJCuBWZsPNUqU0Mt2gBgwY5Ph4vtWEDxMcnbyte3D5FeCNHjuTxxx8H4NNPP5UkL4QA\nMvYcfTOlVHEL+9rxyWBhkdQmhidOlF3qHMjc3Xz9+vYpwsuXLx/r169n5cqVdLJHAYAQwi1lJNE/\nnfBlKVkZz5kcNTEs0mQu0dtqxeHY2Fj8/JL/CufPn1+SvBAiGUsTvZmKLuGyUludxR4TwyJVJ0/C\nsWPJ23x8ICws89e+cOECdevW5YMPPqB169aZv6AQwmNZuh/9dHsHImzI0auzCLPM3c2HhsIjmXwe\n5dy5c9SsWZMjR47QyKGyQwAAIABJREFUrl07/P39adasWeYuKoTwWE4vxlNK1VNK/aOUOqaUej+N\nfi2VUlopFerI+NyOo1dnEamyx0aBp0+fpnr16hw5cgQArTX37t3L3EWFEB7N4v/zK6XeUkoNVEr5\np9EnIKFPDwuv6QuMB+oDFYBXlVIVzPTLDvQlY4v2eCdHrs4iUhUfD+vXm7ZnZqPAEydOUL16dY4n\nLGXs5+fHnDlzaNu2rfUXFUJ4PEs3tXkRY4vaj7TWMan101rfV0oFAN8qpf7UWqeXmCsDxx5sfqOU\nmg00xVhCN6mPgE+A9yyJ16ucPAkzZsB//xmvkzxHnUiK8Bxu7164ejV5W3AwvPCCddc7evQoNWvW\n5OzZswAEBAQwb948GjdunMlIhRCeztJivI7AHYytatPzOdAP6EL6d+CFgaT7pp4Fnk/aQSn1LFBE\na71cKSWJPqmzZ409Tm/cSLufFOE5nLlh+2rVIDAw49c6dOgQ4eHh/JfwYS4wMJBFixZRr169TEYp\nhPAGlib6qsB6rbWZZdaS01rfVUqtTzgnU5RSPsCXQCcL+nYDugEULVo0s2/tHsaPTz/JSxGeU5gr\nxLNm2H7//v2Eh4dz6ZKxAnVQUBBLly4lPDw8kxEKIbyFpXP0RYGjGbjusYRz0nMOKJLk9aMJbQ9k\nByoCm5RSJ4EqwBJzBXla6witdajWOjRfvnwZCNWNmXuELqXu3aUIz8Hu3YNffzVtz2iiv3nzZrIk\nnzVrVlauXClJXgiRIZZmAF8ytgCOtvDau4DSSqkSCXP7bYEliRfR+qbWOq/WurjWujiwA2iitd6d\ngVg805UrsGdP2n1efBH69HFMPCLRtm1Gsk8qJASeeCJj18mZMycjR44EIHv27KxZs4bq1avbKEoh\nhLewdOj+MvBYBq77GHAlvU5a61ilVC9gNcaHiSla6wNKqQ+B3VrrJWlfwYtt2AA6xWevYsVgwADj\n+7JloWpVo+Je2FRcHMyfD/v2ma5jD7Bjh2lbeLh1AytvvPEGAE899RSVK1fO+AWEEF7P0kS/C6it\nlMqptb6ZVkelVE6gNmBmltKU1noFsCJF29BU+oZZFK03MDcJ3Lgx9LDoyUaRCa+/DrNmZewcS4ft\ntdYmW8o+SPZCCGENS+8xfgZyYDzznp5vMebWf7Y2KJEOrc2XdWfmIW1hkYMHM57kwbIfzfr16wkP\nD+fWrVsZfwMhhEiFpYl+PrAdY0GbzUqpWglz6kDiQjm1lFKbgHbANq31fNuHKwBjLfuTJ5O3+fra\nZhF1kabVqzN+ToUKkN6DIKtXr6ZRo0Zs3LiR+vXrc+dOug+4CCGERSxd614rpVpizKVXTfgzVin1\nYEmQPAnXUsBeoJUdYhUPmBu2r1wZcuZ0fCxextxASlr8/GDUqLT7LFu2jJYtW3L//n0ATp06xaVL\nl8iWLZuVUQohxEMWb1Ortb6olHoBeBt4A+PxuQJJupwGIoCxWusom0YpkrPVQ9oiQ+7fh82bTdvf\nfhvy5DFtDw42ivDSqrZfuHAhbdq0ISbGWHCyaNGibNiwgZIlS9ooaiGEt8vIfvQkJPBRwCil1KNA\nwYRD/2mtz9o6OGFGXJztF1EXFtmxAyIjk7flzg2ffmrMnGTUnDlzeO2114iLi4P/b+/O46Oq7v+P\nvz4JS0AQUChFKeACCGpxQYt+rUAAQUQQpQUUZNPw00Jb0baiDxXxW1vFilpRUUQWAQUEBATZF/cK\n8vtRZZNFBVdEBEG2kPP74w7JJDOTTDL75P18PPJI5tw7Nx+vIe+ce889BzjzzDNZvnw5DRs2jEK1\nIiKeUgW9P1+wK9zjbd062Lu3cNtJJ0GrVomppxwJdtm+XbuyhfzkyZPp378/eb7n85o0acKyZcuo\nX79+hFWKiBQW1mA8M7vSzMKeV9bMfm1mN5e9LAkp2GX71q2hUqXAdomqaN0xGT9+PP369csP+ebN\nm7Ny5UqFvIjERLij7ldQZL55M/ub32C8oroDL0VQl4Six+oSYt8++M9/AttLe+pnzpzJoEGDcL7J\njs4//3xWrFhBvXr1SniniEjZhBv0FqQtC6gZxVqkJD//DG+/HdjeoUP8aylnVqwInAXvjDOgtGPm\n2rVrx4UXXgjARRddxIoVK/iFlhAWkRjSaiep5J13vKHf/urWhXPPTUw95Uiwy/Zl+fuqVq1aLF68\nmP79+7Ns2TJODTZcX0Qkiso8GE8SYN68wLb27cGCXXCRaIrmE421a9fmpZd0Z0tE4kM9+lRx6BC8\n/HJguy7bx9zOnbB5c+E2M8jOLv59zjkeeOABJk2aFLviRERKoB59qpg5M/Cxuqws6No1MfWUI8F6\n8xddFHySnBOccwwfPpxHHnmEjIwMKlWqRK9evWJXpIhICKXp0ZdmPXqJtuefD2zr2RNq1Yp/LeVM\naS/bO+cYNmwYjzzyCAB5eXlMnTo1f6S9iEg8laZHP8LMRhRtNLPj0StHgtqwIfho+5yc+NdSzuTl\nlS7o8/LyGDp0KM8880x+W9euXZk+fXrA8rMiIvFQmqAv7W8pdV+iJVhv/rzz4LLL4l9LOfPRR/Dd\nd4XbsrLgiisC983Ly2Pw4MGMGzcuv+2GG25g6tSpVNKERiKSIOGuXqdBe4ly6BAEG8yVk6PR9nHg\nl9n5rrjCC3t/x48fZ+DAgYUG3vXu3ZtJkyZRoYKGwohI4ijAk12oQXh9+yamnnLkp59gypTA9p49\nC7/Ozc2lb9++hUK+X79+TJ48WSEvIgmnoE92oQbh1dSkhLH2yitw4EDhturVwX/wvHOOPn36MG3a\ntPy2W265hfHjx5NZltVuRESiTEGfzD75JPggvMGD419LOTR2bGDbTTdBtWoFr82MLl265A+0u/32\n2xk7diwZGfqnJSLJQdcVk9kLLwS2nXeelqSNg7VrvY+igj3o0KdPH44dO8bHH3/MY489ptH1IpJU\nFPTJ6tAhmDgxsH3wYA3Ci4Ngd0wuuQR869EEGDBgQGwLEhEpI11fTFYzZ8KPPxZuq1IF+vRJTD3l\nyE8/wdSpge05OXDgwAH+8Ic/8MMPP8S/MBGRMlDQJ6tgN4g1CC8upk0LPgivc+f9dOrUiWeeeYaO\nHTuyb9++xBQoIlIKCvpk9Mkn3pK0RWkmvLgIdtm+R4+9dO/egXd8/1/WrFnD/Pnz41yZiEjp6R59\nMgo1E54G4cVc8EF4e3jvvQ5s2rQuv+XJJ5/kpptuimttIiJloaBPNqFmwtMgvLgI/BvrO6pUac+m\nTf/Nb3nuuecYrEccRSRFKOiTzYwZGoSXIIGD8L4G2nHo0EbAe2Z+3LhxDBw4MBHliYiUiYI+2Wgm\nvIQpPAjvSyAb2AJARkYGEydOpI/+4BKRFKOgTyYahJdQBQ867ATaANsByMzMZMqUKfQsOsm9iEgK\n0Kj7ZPLSS4Ft55+vQXhxsHattyStpwZQB4AKFSoyY8YMhbyIpCwFfTIJNq+9lqONi8LTFpwMvEm1\napcze/YsunfvnqCqREQip0v3ycI52LgxsP2aa+JfSzkTfCa8mowe/TZduuiPLBFJbQr6ZPH117B/\nf+G2KlWgYcPE1JPi8vK8JxXD8Y9//JeDB9cC/fPbvOVoFfIikvoU9MkiWG++aVPQcqelkpcHDzwA\n//43hDdD7TqgA7DH97o/4D3N6L8crYhIqlKKJItgQd+sWfzrSHEPPgj/+7/hhvx/8B6hOxHyw4C9\ngB50EJH0oaBPFgr6iM2ZAyNHhrv3u0B74MTkRDWBxUAtLrkELrggBgWKiCSAgj5ZKOgjsnEj9O0b\n7t6rgauAn3yvTwVWAC0BGD482tWJiCSO7tEnCwV9me3bB9ddF7i0LEBWVuFhDsePL+PIkWuBEyP1\nfkFW1jIyMs6jYUO47TbQ03Qikk7Uo08GP/4I33xTuC0zExo3Tkw9KSQvzxs4t2VL4LZrr4WDBws+\nXnvtTcy6cCLk69Wrx8aNqzh06DwOHoQNG2Do0PjWLyISawr6ZBCsN3/WWVCpUvxrSTEjRkCwZeGb\nNoXJkwt68/PmzaNbt24cPnwYgPr167Nq1SrOOeec+BUrIpIACvpkoMv2ZTJ7Njz0UGB79erewLwa\nNQrajhw5wvHjxwFo1KgRq1evprGumIhIOaCgTwYK+lLbsAFuvjn4tpdfhqId9R49ejBp0iSaNGnC\nqlWrOOOMM2JfpIhIElDQJwMFfan8+GPowXcPPABduwZ/34033sj69etp0KBBbAsUEUkiCvpkoKAP\n24nBd59+Grita1e4/37v6+nTp7N79+6AfSpXrhzjCkVEkouCPtEOHYIdOwLbNUgsqHHj4I03Atv9\nB9+NGTOGnj170r59e/bs2RO4s4hIOaKgT7QtW7yV6/zVr++NKJNCnIMnnghsPzH47uSTYfTo0QwZ\nMgSA9evX89e//jXOVYqIJBcFfaLpsn3Y3n47+Ok6MfjukUceYdiwYfntrVq14l//+lccKxQRST4K\n+kRT0Ift+ecD21q39u7NP/TQQ9x999357VdccQWLFi2iZs2acaxQRCT5KOgTTUEflj17YMaMwPac\nHMd9993H/SdG4QFt27Zl4cKFnHzyyXGsUEQkOWmu+0RT0Idl8mQ4cqRw2ymnONau/RuPPz4qv61D\nhw7MmTOHqlWrxrlCEZHkpB59IuXmBp+kXUFfiHMwdmxAK40a3VEo5Dt37szcuXMV8iIifhT0ibRj\nBxw9WrjtlFOgTp3E1JOk3n4bNm0q2nqEjIx1+a+6devGrFmzyMrKimttIiLJTpfuEynUZXuz+NeS\nxAJ789CmTRZz586nY8eO1K9fnylTplCxYsX4FycikuQU9Imk+/MBnIMXX4TXXoP9+722Dz8M3C8n\nB6pXr86iRYuoUqUKFSroR1lEJBj9dkwkBX2ABx4IviIdHAOWAldz6qlw/fVea3VNLCQiUizdo0+k\ndesC28px0M+aFSrkjwK9gM7Ac/TvD5qyXkQkPAr6RPn2W1i/PrD9ggviX0sS+OSTUMvOHgF6ALN8\nr2+jZct341aXiEiq06X7RFm+PLDt3HOhXr3415Jge/d6y84ePFh0yyGgO7Aov6VDh2H07HlZHKsT\nEUltCvpEWbo0sK19+/jXEWW5ufD00/DWW4FPDoayfTts3Vq09SC1a3fl++8L/iC6667hPPro3zE9\nlSAiEjYFfSI4B0uWBLZ36BD/WqIoLw969vTutUfmJ6pUuYbvv38rv2XEiBHcf//9CnkRkVJS0CfC\np5/Czp2F2ypUgCuvTEw9UfLQQ9EI+X1kZl7NoUPv5bc8/PDDDB8+PNIDi4iUSwr6RAh22b5Vq5Re\ng37uXBgxItKj7AU6cvx4wYPzjz32GHfeeWekBxYRKbcU9ImQZvfnN22CPn0iP05GxkfAOvLyvNdP\nPfUUQ4cOjfzAIiLlmII+3nJzg4+4T9H78/v2eSPmf/opcNuQIXDVVeEdJzMTLrigHe+99wq9e/fm\n6aefJicnJ7rFioiUQwr6eFu71ktHf9WrwyWXJKaeCOTlQd++sHlz4LZrroEnn4SMUs7UcMMNN/Dp\np5/SsGHD6BQpIlLOacKceAt22b5NG0jBBVlGjoR58wLbGzeGl18uOeR37tzJ119/HdCukBcRiR4F\nfbwFe6wuBe/Pv/46PPhgYHu1ajBnDtSsWfz7P/vsM1q3bk27du347rvvYlOkiIgo6OPq4EF4N8j0\nrSl2f37TJu+SfTCTJ0Pz5sW/f9u2bbRu3ZodO3awceNGOnfuTN6JEXgiIhJVCvp4eustOHascNtp\np8E55ySmnjLYtw+6dQs++O6++7yBecXZvHkzV155JV988QUAlStXZuTIkWSU9ma+iIiEJeG/Xc2s\nk5ltNrOtZnZ3kO3DzGyDma03s2Vmlro3cEM9Vpcis72dGHy3ZUvgti5dSn6OfsOGDbRu3ZqvvvoK\ngKysLObOnUvnzp2jX6yIiAAJDnozywTGAFcDzYHeZlb0wu86oKVz7tfATODR+FYZRSk+7e2DDwYf\nfNekScmD79avX0+bNm349ttvAahatSoLFizgqnCfvxMRkTJJdI/+UmCrc267c+4o8ArQzX8H59wK\n59zPvpfvA/XjXGN0hFqWNjs7/rWUweuve6Psi6pe3Rt8V6NG6Pd+9NFHtG3blt27dwNQrVo1Fi1a\nRNu2bWNUrYiInJDooD8d8J/0fZevLZRBwMKYVhQroZalPe20+NdSSiUNvmvWLPR7P/jgA7Kzs/nh\nhx8AqFGjBkuWLOGKK66IQaUiIlJUykyYY2Z9gJZA6xDbc4AcgAYNGsSxsjClyGN1x4/D7NmwYYO3\nyB7AlCnBB9/df783MK84o0aNYp9vgqBatWqxZMkSLr744ihXLSIioSQ66L8EfuX3ur6vrRAzaw/c\nC7R2zh0JdiDn3PPA8wAtW7Z00S81As4FH4iXhPfne/aE114reb9rr4UHHih5v0mTJrFnzx4+/vhj\nli5dSosWLSIvUkREwpbooP8QaGxmZ+AFfC/gRv8dzOxCYCzQyTmXmjOrpMiytGvXhhfyTZt6l+zD\neSKuatWqzJs3jy+//JKmTZtGXqSIiJRKQu/RO+dygSHAImAjMN0594mZjTSzrr7dRgHVgBlm9n/N\nbG6Cyi27YJftk3BZ2oVhjH4oafDd9u3bA9qqVaumkBcRSZBED8bDObfAOdfEOXeWc+7vvrb7nXNz\nfV+3d87Vdc5d4PvoWvwRk1CKXLYPVqa/rCyYOjX0/D6vv/46zZo1Y/To0dEvTkREyiTRl+7TX24u\nrFgR2J5kA/FCzc47bJg3f/3JJ3sr0oUK+RkzZnDjjTeSm5vLsGHDqFWrFv37949pzSIiUjIFfayt\nWZMSy9KGmp33scdKnrhv6tSp9O3bN3+++rPOOovsFJkfQEQk3SX80n3aC3Y9vG3bpFuWNtSkfSWF\n/MSJE+nTp09+yDdt2pTVq1cn5yOOIiLlkII+1kLNb59kylLmCy+8wIABA3C+B+7PPfdcVq1axWkp\nMAmQiEh5oaCPpQMHgt/4TrKgDzU7b7t2od8zZswYcnJy8kO+RYsWrFixgrp168aoShERKQsFfSwF\nu/F9+ulJtyztsmWBbeedB/XqBd//8ccfZ8iQIfmvW7ZsyfLly6lTp06MKhQRkbJS0MdSiixLW5rL\n9p999hn33HNP/utWrVqxdOlSTjnllBhVJyIikVDQx4pzMH9+YHuSPT8fanbeUEHfqFEjZs2aRcWK\nFfntb3/L4sWLqVHc0nUiIpJQerwuVt5+G7ZsCWxPssfOtmwJPjtv66BLB3k6d+7MkiVLaNmyJSed\ndFJsCxQRkYioRx8rY8cGtrVpE/rGd4IE681fdpk3SQ6Acy5/iVl/rVu3VsiLiKQABX0s7NkDM2cG\ntg8eHP9aSlDcZXvnHH/+85+59NJL+eqrr+JbmIiIRIWCPhYmTYIjRVbTrV0bundPTD0hFDc7b15e\nHrfddhtPPfUU27ZtIzs7m++//z7+RYqISER0jz7anIPnnw9s798fKleOeznFGT06+Oy8F198nFtu\nuZWXXnopv71FixYadCcikoLUo4+2t96CTZsC22+9Nf61FGPJErj77sD21q1zueWW/oVCvk+fPkyZ\nMoWKSTZtr4iIlEw9+mgL1ptv0waaNIl7KaFs3w49e4Jveno/x/jppz7Mnz89v2XAgAG88MILZGZm\nxrVGERGJDvXooykFBuEdPAjXXQd79xbdcpSzz+7JqlUFIT948GDGjRunkBcRSWEK+mhK8kF4zsGg\nQfDf/xbdcphTT72erVtn57cMHTqUZ599lowM/YiIiKQy/RaPpgkTAtuSaBDeqFHw6quB7TVqjGLP\nnjfyX9911108+eSTWJJN1SsiIqWnoI+Wn38O1lVOmkF4ixfD8OGB7VWqwKJFf6Fjx44A3HPPPTz6\n6KMKeRGRNKHBeNGyebN3bdxfw4ZJMQhv2zbo1SvY4DsYNw5+85ssZs+ezYwZM+jbt69CXkQkjSjo\no2XjxsC2Zs3iXwewfz98/bX3dW4u9O5ddPDdYSCLO++EG2/0WqpUqcLNN98c50pFRCTWFPTRkgRB\nf/w43HEHjBkTvPfu+QG4irPOup5//vOeUDuJiEia0D36aEmCoL/vPvj3v4sL+d1ANrCWbdvu5Ykn\nHotfcSIikhDq0UdLgoN+5kz4xz+K2+MboD3wCQBmRs2aNeNQmYiIJJKCPhpyc+HTTwPb4xT0H3/s\nPcUX2ld4PfnNAGRkZDB+/Hj69esX++JERCShFPTRsG0bHDtWuK1OHTj11Kgc/siRwMVnTjgx093B\ng4HbGjUCs53s2pXNsWNbAcjMzGTy5Mn07t07KrWJiEhyU9BHQ4wu2+flwe23w+TJ3mP6pdG9O4wa\ntYP27bM5duwzACpUqMC0adPo0aNHxLWJiEhqUNBHQ4yCfvRoGDu29O9r3hzuv38rbdtms3PnTgAq\nVqzIjBkz6NatW8R1iYhI6lDQR0MMgv74cXjiidK/r0YNmDkzl65dr84P+cqVKzN79myuvvrqiGoS\nEZHUo8froiEGQb9wIezaVbr3mMHUqdCsWQWeffZZKleuTJUqVZg/f75CXkSknFKPPlLOwaZNge0R\nBn2wZe2rVIFq1YLv36iR9xx9587e6/bt2zNnzhyysrJo06ZNRLWIiEjqUtBHatcuOHCgcFu1alC/\nfpkPuXMnvPFGYPvMmQVBXlReXl7AkrKdOnUqcw0iIpIedOk+UsEu259zjncdvYzGjw+c3a5BA/At\nMBfg/fff58ILL+Tzzz8v8/cUEZH0pKCPVJTvz+fmeivKFXXLLZCZGdj+1ltv0aFDB9avX092dja7\nSntjX0RE0pqCPlJRDvo33wwchJeZCQMHBu67fPlyOnXqxAHfrYP9+/ezt/AydSIiUs4p6CMV5aAP\n9tx8ly5w+umF2xYvXsw111zDz76ZdOrWrcvKlSs5//zzy/y9RUQk/SjoIxXFoN+5ExYsCGwfPLjw\n6zfeeINrr72Ww4cPA3DaaaexatUqzj333DJ9XxERSV8K+kjs2QO7dxduq1gRzjqrTId78cXgg/Cu\nuqrg9ezZs+nevTtHjx71bW/A6tWradq0aZm+p4iIpDcFfSSC9eYbN4YKpX9qMTfXC/qibr21YBDe\n9OnT+d3vfscx3wI6Z5xxBqtXr+asMv5hISIi6U9BH4koXrYPNhNeZiYMGOB9vXLlSnr37s3x48cB\naNy4MatXr6Zhw4Zl+n4iIlI+KOgjEcWgDzYT3rXXFgzCu/zyy+nSpYvvWzRj1apV1I9gUh4RESkf\nFPSl8f33cPPNcPbZ0LBh8CHyZQj6UIPwcnIKvq5UqRLTp0/nT3/6EytXrqRevXql/j4iIlL+aArc\n0ujVC5YtK36fMgR9sEF4DRsWHoQH3ip0T5RlSTsRESm31KMP1/fflxzyZlDK0e+hZsJr1uxfjBhx\nX6mOJSIiUpR69OHaurXkfa68EqpWLdVhFy6EL78s3Gb2MG++eS9vvuldsr/vPgW+iIiUjXr04dq+\nvfjtzZrBs8+W+rCFb/M7YATO3ZvfsnTp0vxn5kVEREpLQR+uYEE/aBB89hl88w1s2FDq+/NffOH1\n6D0OuBd4MH97dnY2CxYsoFKlSmUsWkREyjtdug9XsKBv0cIbNVdGBYPwHHAX8Hj+to4dOzJ79myq\nVKlS5uOLiIioRx+uYEF/5pllPlzBTHh5wB/xD/kuXbowZ84chbyIiERMPfpwbdsW2BZB0C9YAF9+\nmQfcBhTMltO58/W89to0Xa4XEZGoUI8+HIcPBw6NB2jUqMyH9GbC+xP+IX/66b2YM+cVhbyIiESN\ngj4cn38OzhVuO+00KOOl9YJBeL8HTjyO15exYydTsWLFCAoVEREpTJfuwxHl+/MFg/B+C8wDXqNB\ng6fo1CmzzMcUEREJRkEfjmBBX8alYQNnwssGssnJKViOVkREJFp06T4cUejRHz58mIEDB/Lii1v5\n6qvC2/yXoxUREYkm9ejDEWHQ//zzz1x33XUsWbKEadOWAquBRvnbu3b1bvmLiIhEm4I+HGE+Wucc\nTJjgrX1z+LDXlpt7gHffvZbdu1cCcPjwTmA2cEf++/yXoxUREYkmBX1JnAu7R/+HPxSd7n4/0Bl4\nx69tJP4hH2w5WhERkWhR0Jdk9244eLBwW9WqULduoaaxY4uG/I9AJ+ADv7ZHgL8Wet+tt0KGRkqI\niEiMKOhLEqo3b5b/8p13YOhQ/x32AFcBH/m1jQb+XOgwlSrBwIHRK1VERKQoBX1JSrhs/9VX0KMH\nHDt2ouU7oAOw3u8Nz+BNdVsgIwOeew7q1YtuuSIiIv4U9CUpJuiPHIEbbvBWqfXsAdoCGwAwMwYP\nfoF27QYVenuFCvA//wN16sSsahEREUBBX7IQQe8cDBkC77/vv6EGcC6wgYyMDCZMmEDfvn3jU6eI\niEgQCvqShHi0buzYojPcAVSgbt0pXHIJ3HTT9fTq1SseFYqIiISkoC9JkB79R/vO4o9/DNy1YkWY\nNasil132KuY3WE9ERCRR9GBXcUIsT9v9jka+wXefAn8HvJXtnn4aLr8chbyIiCQN9eiLE2R52u8q\nns4X32UBG/EWpPkGOMitt/6dnBwFvIiIJBf16IsT5LL9pmNnAv8FWuOFPGRkPMEdd+yIa2kiIiLh\nUNAXJ0jQr6AG3iN0uwEwO4np0xfSrFnZ16cXERGJFV26L06REfcfAv9kKeBbsYaTee65hdxww+Xx\nrkxERCQsCe/Rm1knM9tsZlvN7O4g2yub2au+7R+YWaO4FefXo38PaA8czg/5mgwfvoScHIW8iIgk\nr4QGvZllAmOAq4HmQG8za15kt0HAXufc2XgTxj8Sr/rytnlBvxpv5vr9+VtO4frrl/Hww5fGqxQR\nEZEySXSP/lJgq3Nuu3PuKPAK0K3IPt2Aib6vZwLtLA7Pr7k8x9FN21mO91fIAV+7cSotWqxk2rSL\nYl2CiIhIxBJmlPJhAAAJKElEQVQd9KcDO/1e7/K1Bd3HOZcL7ANOjXVhE0btJiv3INWATF9bXaB2\n7VUsXHg+lSrFugIREZHIJTroo8bMcsxsjZmt2b17d0THOnoUFo7xLttfCrwJnAOMozFz552rFedE\nRCRlJDrovwR+5fe6vq8t6D5mVgFv5Zg9RQ/knHveOdfSOdeyToTLwlWqBC/eWzAQ73LgY+C8C5rT\nqlVEhxYREYmrRD9e9yHQ2MzOwAv0XsCNRfaZC/TDG/jeA1juXJHp6mKgerdsjv/ideb/ezufr9hO\nmwbb+XVvjbAXEZHUktCgd87lmtkQYBHerfDxzrlPzGwksMY5Nxd4EZhsZluBH/D+GIi9X/6SzO5d\n6dYdFi6Ec9oBui8vIiIpxuLQOY67li1bujVr1iS6DBERkbgws7XOuZbBtiX6Hr2IiIjEkIJeREQk\njSnoRURE0piCXkREJI0p6EVERNKYgl5ERCSNKehFRETSmIJeREQkjSnoRURE0piCXkREJI0p6EVE\nRNKYgl5ERCSNKehFRETSmIJeREQkjSnoRURE0lharkdvZruBz6N4yNrA91E8Xnml8xg5ncPI6RxG\nTucwctE+hw2dc3WCbUjLoI82M1vjnGuZ6DpSnc5j5HQOI6dzGDmdw8jF8xzq0r2IiEgaU9CLiIik\nMQV9eJ5PdAFpQucxcjqHkdM5jJzOYeTidg51j15ERCSNqUcvIiKSxhT0fsysk5ltNrOtZnZ3kO2V\nzexV3/YPzKxR/KtMbmGcw2FmtsHM1pvZMjNrmIg6k1lJ59BvvxvMzJmZRj8HEc55NLPf+34ePzGz\nqfGuMdmF8e+5gZmtMLN1vn/TnRNRZ7Iys/Fm9p2ZfRxiu5nZU77zu97MLopJIc45fXi3LzKBbcCZ\nQCXg/wHNi+xzO/Cc7+tewKuJrjuZPsI8h22Bqr6vb9M5LP059O1XHVgNvA+0THTdyfYR5s9iY2Ad\nUMv3+heJrjuZPsI8h88Dt/m+bg58lui6k+kDuBK4CPg4xPbOwELAgFbAB7GoQz36ApcCW51z251z\nR4FXgG5F9ukGTPR9PRNoZ2YWxxqTXYnn0Dm3wjn3s+/l+0D9ONeY7ML5OQR4CHgEOBzP4lJIOOfx\nVmCMc24vgHPuuzjXmOzCOYcOONn3dQ3gqzjWl/Scc6uBH4rZpRswyXneB2qaWb1o16GgL3A6sNPv\n9S5fW9B9nHO5wD7g1LhUlxrCOYf+BuH9NSsFSjyHvst7v3LOvRHPwlJMOD+LTYAmZvaOmb1vZp3i\nVl1qCOccjgD6mNkuYAEwND6lpY3S/s4skwrRPqBIOMysD9ASaJ3oWlKJmWUAjwP9E1xKOqiAd/m+\nDd6VpdVmdr5z7seEVpVaegMTnHP/MrPLgMlmdp5zLi/RhUkB9egLfAn8yu91fV9b0H3MrALepao9\ncakuNYRzDjGz9sC9QFfn3JE41ZYqSjqH1YHzgJVm9hnefb25GpAXIJyfxV3AXOfcMefcDmALXvCL\nJ5xzOAiYDuCcew/IwpvDXcIT1u/MSCnoC3wINDazM8ysEt5gu7lF9pkL9PN93QNY7nwjKgQI4xya\n2YXAWLyQ1z3RQMWeQ+fcPudcbedcI+dcI7xxDl2dc2sSU27SCuff8xy83jxmVhvvUv72eBaZ5MI5\nh18A7QDMrBle0O+Oa5WpbS5ws2/0fStgn3Pu62h/E12693HO5ZrZEGAR3mjT8c65T8xsJLDGOTcX\neBHv0tRWvAEWvRJXcfIJ8xyOAqoBM3zjGL9wznVNWNFJJsxzKCUI8zwuAq4ysw3AceAvzjldofMJ\n8xzeCbxgZnfgDczrr85PATObhvfHZG3fOIYHgIoAzrnn8MY1dAa2Aj8DA2JSh/6fiIiIpC9duhcR\nEUljCnoREZE0pqAXERFJYwp6ERGRNKagFxERSWMKehEplpn1962S1z/RtYhI6SnoRSQpmdlKM9Pz\nvyIR0oQ5IlKS2Xgz8EV9xi4RiT0FvYgUyzm3D2+lRhFJQbp0L1LOmFkj3z33CWZ2jpnNMbMfzOyg\nmb1tZlcV2b/QPXozyzKzH83sO9/iTsG+x7O+93Qp0t7OzN70fb8jZrbFzP5pZjWK1odvZUPfcU58\nrIzy6RBJewp6kfLrDOA94BS8hYZmABcDC82sZ6g3OecOA68CdYCri243s8pAT+Bb4E2/9sHAEuB/\n8BaUGY23ZsTfgHfNrKZv1x+BB4HPfa8f9PuYUKb/UpFyTHPdi5QzZtYI2OF7+Zhz7i9+21rihf8B\noKFzbr+vJ/8SMMA5N8G332XAu8BrzrkeRY7/O7ylSx93zt3pa2uItwzsEeBS59wmv/2fAW4DXnDO\n5fi1rwRaO+csWv/tIuWRevQi5dc+YKR/g2+52ylATaB7qDf61h7fAlxrZqcU2XxiKeeJfm19gErA\n0/4h73Mv8BPQ13c1QESiSEEvUn595Jz7KUj7St/nC0t4/0S88M5frtnM6gIdgXXOufV++17k+7y8\n6EGcc3uBdXhrmZ8TVuUiEjYFvUj59W2I9m98n2uE2H7CJCCPgh48wE14T/NMLLLviWOFekTvRHvN\nENtFpIwU9CLlV90Q7b/0fS72kTrn3C68HvqlZnaiJ94POAZMLbL7iWP9kuDqhfM9RaT0FPQi5ddF\nZlY9SHsb3+d1YRxjgu9zPzO7APg1sNA5t7vIfieO1aZIO77R9hcAh4GNfpuO+7ZnhlGHiISgoBcp\nv2oA9/s3+Ebd34TXs54dxjFmAfvxBtv197VNCLLfy3g9/aFmdnaRbQ8BJwMvO+eO+LXv8X1uEEYd\nIhKCZsYTKb9WA7eY2W+Ad/Aun/fE6wAMds7tL+kAzrlDZjYDGATcjhfObwTZ7zMz+zMwBvjIzKYD\nu/EmxbkM2IT3PL2/ZcDvgFlmtgA4BHzunJtclv9YkfJKPXqR8msHcDmwF/g/wO+Bj4DOzrlXS3Gc\nCb7PFYFpzrmjwXZyzj2DNyL/feAGYBjwC2AUcJlz7ocibxkH/APvysNf8Xr+g0pRl4igCXNEyh2/\nCXMmOuf6J7QYEYk59ehFRETSmIJeREQkjSnoRURE0pju0YuIiKQx9ehFRETSmIJeREQkjSnoRURE\n0piCXkREJI0p6EVERNKYgl5ERCSN/X+cKf/vaklWVQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure(figsize=(8, 8))\n", + "results = pd.concat(dfs)\n", + "pivot_plot(results, fig=fig);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-slideshow", + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/learning/Full_model_LASSO.Rmd b/doc/source/learning/Full_model_LASSO.Rmd new file mode 100644 index 000000000..bbbe6bf63 --- /dev/null +++ b/doc/source/learning/Full_model_LASSO.Rmd @@ -0,0 +1,141 @@ +--- +jupyter: + jupytext: + cell_metadata_filter: all,-slideshow + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.1' + jupytext_version: 1.1.1 + kernelspec: + display_name: Python 3 + language: python + name: python3 +--- + +# Inference in the full model + +This is the same example as considered in [Liu et al.](https://arxiv.org/abs/1801.09037) though we +do not consider the special analysis in that paper. We let the computer +guide us in correcting for selection. + +The functions `full_model_inference` and `pivot_plot` below are just simulation utilities +used to simulate results in least squares regression. The underlying functionality +is contained in the function `selectinf.learning.core.infer_full_target`. + +```{python} +import functools + +import numpy as np, pandas as pd +import matplotlib.pyplot as plt +# %matplotlib inline +import regreg.api as rr + +from selectinf.tests.instance import gaussian_instance # to generate the data +from selectinf.learning.core import normal_sampler # our representation of the (limiting) Gaussian data + +from selectinf.learning.utils import full_model_inference, pivot_plot +from selectinf.learning.fitters import gbm_fit_sk +``` + +We will know generate some data from an OLS regression model and fit the LASSO +with a fixed value of $\lambda$. In the simulation world, we know the +true parameters, hence we can then return +pivots for each variable selected by the LASSO. These pivots should look +(marginally) like a draw from `np.random.sample`. This is the plot below. + +```{python collapsed=TRUE} +np.random.seed(0) # for replicability + +def simulate(n=200, + p=20, + s=5, + signal=(0.5, 1), + sigma=2, + alpha=0.1, + B=6000, + verbose=False): + + # description of statistical problem + + X, y, truth = gaussian_instance(n=n, + p=p, + s=s, + equicorrelated=False, + rho=0.5, + sigma=sigma, + signal=signal, + random_signs=True, + scale=False)[:3] + + + XTX = X.T.dot(X) + XTXi = np.linalg.inv(XTX) + resid = y - X.dot(XTXi.dot(X.T.dot(y))) + dispersion = np.linalg.norm(resid)**2 / (n-p) + + S = X.T.dot(y) + covS = dispersion * X.T.dot(X) + + # this declares our target as linear in S where S has a given covariance + sampler = normal_sampler(S, covS) + + def base_algorithm(XTX, lam, sampler): + + p = XTX.shape[0] + success = np.zeros(p) + + loss = rr.quadratic_loss((p,), Q=XTX) + pen = rr.l1norm(p, lagrange=lam) + + scale = 0. + noisy_S = sampler(scale=scale) + loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0) + problem = rr.simple_problem(loss, pen) + soln = problem.solve(max_its=100, tol=1.e-10) + success += soln != 0 + + return set(np.nonzero(success)[0]) + + + lam = 3.5 * np.sqrt(n) + selection_algorithm = functools.partial(base_algorithm, XTX, lam) + if verbose: + print(selection_algorithm(sampler)) + # run selection algorithm + + return full_model_inference(X, + y, + truth, + selection_algorithm, + sampler, + success_params=(1, 1), + B=B, + fit_probability=gbm_fit_sk, + fit_args={'n_estimators':500}) +``` + +Let's take a look at what we get as a return value: + +```{python} +while True: + df = simulate(verbose=True) + if df is not None: + break +df.columns +``` + +```{python} +dfs = [] +for i in range(30): + df = simulate() + if df is not None: + dfs.append(df) +``` + +```{python} +fig = plt.figure(figsize=(8, 8)) +results = pd.concat(dfs) +pivot_plot(results, fig=fig); +``` diff --git a/doc/source/learning/Full_model_LASSO.ipynb b/doc/source/learning/Full_model_LASSO.ipynb new file mode 100644 index 000000000..fbceea950 --- /dev/null +++ b/doc/source/learning/Full_model_LASSO.ipynb @@ -0,0 +1,244 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference in the full model\n", + "\n", + "This is the same example as considered in [Liu et al.](https://arxiv.org/abs/1801.09037) though we\n", + "do not consider the special analysis in that paper. We let the computer\n", + "guide us in correcting for selection.\n", + "\n", + "The functions `full_model_inference` and `pivot_plot` below are just simulation utilities\n", + "used to simulate results in least squares regression. The underlying functionality\n", + "is contained in the function `selectinf.learning.core.infer_full_target`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [] + } + ], + "source": [ + "import functools\n", + "\n", + "import numpy as np, pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import regreg.api as rr\n", + "\n", + "from selectinf.tests.instance import gaussian_instance # to generate the data\n", + "from selectinf.learning.core import normal_sampler # our representation of the (limiting) Gaussian data\n", + "\n", + "from selectinf.learning.utils import full_model_inference, pivot_plot\n", + "from selectinf.learning.fitters import gbm_fit_sk" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will know generate some data from an OLS regression model and fit the LASSO\n", + "with a fixed value of $\\lambda$. In the simulation world, we know the\n", + "true parameters, hence we can then return\n", + "pivots for each variable selected by the LASSO. These pivots should look\n", + "(marginally) like a draw from `np.random.sample`. This is the plot below." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "np.random.seed(0) # for replicability\n", + "\n", + "def simulate(n=200, \n", + " p=20, \n", + " s=5, \n", + " signal=(0.5, 1), \n", + " sigma=2, \n", + " alpha=0.1, \n", + " B=6000,\n", + " verbose=False):\n", + "\n", + " # description of statistical problem\n", + "\n", + " X, y, truth = gaussian_instance(n=n,\n", + " p=p, \n", + " s=s,\n", + " equicorrelated=False,\n", + " rho=0.5, \n", + " sigma=sigma,\n", + " signal=signal,\n", + " random_signs=True,\n", + " scale=False)[:3]\n", + "\n", + "\n", + " XTX = X.T.dot(X)\n", + " XTXi = np.linalg.inv(XTX)\n", + " resid = y - X.dot(XTXi.dot(X.T.dot(y)))\n", + " dispersion = np.linalg.norm(resid)**2 / (n-p)\n", + "\n", + " S = X.T.dot(y)\n", + " covS = dispersion * X.T.dot(X)\n", + " \n", + " # this declares our target as linear in S where S has a given covariance\n", + " sampler = normal_sampler(S, covS) \n", + "\n", + " def base_algorithm(XTX, lam, sampler):\n", + "\n", + " p = XTX.shape[0]\n", + " success = np.zeros(p)\n", + "\n", + " loss = rr.quadratic_loss((p,), Q=XTX)\n", + " pen = rr.l1norm(p, lagrange=lam)\n", + "\n", + " scale = 0.\n", + " noisy_S = sampler(scale=scale)\n", + " loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)\n", + " problem = rr.simple_problem(loss, pen)\n", + " soln = problem.solve(max_its=100, tol=1.e-10)\n", + " success += soln != 0\n", + " \n", + " return set(np.nonzero(success)[0])\n", + "\n", + " \n", + " lam = 3.5 * np.sqrt(n)\n", + " selection_algorithm = functools.partial(base_algorithm, XTX, lam)\n", + " if verbose:\n", + " print(selection_algorithm(sampler))\n", + " # run selection algorithm\n", + "\n", + " return full_model_inference(X,\n", + " y,\n", + " truth,\n", + " selection_algorithm,\n", + " sampler,\n", + " success_params=(1, 1),\n", + " B=B,\n", + " fit_probability=gbm_fit_sk,\n", + " fit_args={'n_estimators':500})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's take a look at what we get as a return value:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{19}\n" + ] + }, + { + "data": { + "text/plain": [ + "Index(['B', 'alpha', 'coverage', 'id', 'length', 'lower', 'nfeature',\n", + " 'nsample', 'pivot', 'pvalue', 'target', 'upper', 'variable',\n", + " 'bonferroni_coverage', 'bonferroni_length', 'bonferroni_lower',\n", + " 'bonferroni_pvalue', 'bonferroni_upper', 'naive_coverage',\n", + " 'naive_length', 'naive_lower', 'naive_pivot', 'naive_pvalue',\n", + " 'naive_upper'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "while True:\n", + " df = simulate(verbose=True)\n", + " if df is not None:\n", + " break\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [] + } + ], + "source": [ + "dfs = []\n", + "for i in range(30):\n", + " df = simulate()\n", + " if df is not None:\n", + " dfs.append(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHpCAYAAABqV/58AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3hURRfA4d+kFyB0kCC9qIiKRCwI\nhN4hNAVURJSmIPKBKCBVFCmKonQUQVEQpHcIRUEQUcSCSpEivfdA2nx/3CUku5tkk2zf8z5PHtiz\nd+8eIOTsnTtzRmmtEUIIIYR38nN1AkIIIYRwHCn0QgghhBeTQi+EEEJ4MSn0QgghhBeTQi+EEEJ4\nMSn0QgghhBcLcHUCjlCwYEFdqlQpV6chhBBCOMXPP/98TmtdyNpzXlnoS5Uqxa5du1ydhhBCCOEU\nSqkj6T0nQ/dCCCGEF5NCL4QQQngxKfRCCCGEF5NCL4QQQngxKfRCCCGEF/PKWfe2uHLlCmfOnCEh\nIcHVqYhsCAwMpHDhwuTJk8fVqQghhFvzyUJ/5coVTp8+TWRkJKGhoSilXJ2SyAKtNXFxcRw/fhxA\nir0QQmTAJ4fuz5w5Q2RkJGFhYVLkPZBSirCwMCIjIzlz5oyr0xFCCLfmk4U+ISGB0NBQV6chcig0\nNFRuvQghRCZ8stADciXvBeTfUAghMuezhV4IIYTwBVLohRBCCC8mhd5DDR8+nIIFC7o6DYf5448/\nUEqxefNmV6cihBAeTQq9EEII4cVcWuiVUp8ppc4opf5I53mllJqolDqglPpNKfWws3MUaSUkJJCU\nlOTqNIQQQtjI1Q1zPgc+Aeak83xjoLzp61FgiulXkYkLFy7w5ptvsnTpUi5fvszDDz/MhAkTePTR\nO39977//PvPmzWPfvn2EhIRQrVo1JkyYQLly5VKOiY6OpmDBgjRo0IAxY8Zw+PBhDh8+zKeffson\nn3zC+vXr6dmzJ7/99hsVK1Zk4sSJ1KhRI00uM2fOZMKECRw4cICiRYvyyiuvMGDAgDTHTJ48mdGj\nR3PhwgXq1KnDq6++6ti/ICGE650+DUOGwM8/g5deQMTdNP6Yt25af/7UvbWp9csEh+bg0kKvtf5O\nKVUqg0NaAnO01hrYoZTKq5S6S2t90p55uNMqLa1zfo5bt25Rr149Ll26xLhx4yhcuDBTpkyhXr16\n7N+/n6JFiwJw7NgxevXqRcmSJbly5QpTp07liSeeYP/+/URERKScb9u2bRw8eJAxY8YQFhaW8tyN\nGzd4/vnn6du3L0WLFmXEiBG0bt2aI0eOEBYWBsC4ceMYNGgQAwYMIDo6mp9//pkhQ4YQFhZGr169\nAFi6dCmvvPIKPXr0ICYmhi1bttClS5ec/0UIIdzXjRtQuzb89ZerM3GoUKBUqsdxwFmghOnxxVOl\nHZ+E1tqlX6a/gz/SeW4F8GSqx7FAVGbnrFq1qs7I3r170zw2yqt7fNlq2LBhukCBAlafmzlzpg4M\nDNT79u1LiSUkJOgyZcro/v37W31NYmKivnHjhs6VK5eePXt2SrxWrVo6JCREnzp1yuL9AR0bG5sS\n2717twb06tWrtdZaX758WYeHh+vhw4enee2QIUN0kSJFdGJiotZa60ceeUQ3atQozTEvvfSSBvSm\nTZsy/Hsw/7cUQniIN990/Q9cJ39dA10HdAnQh02x7XfF2OWvE9il06mJXjMZTynVTSm1Sym16+zZ\ns65Ox6U2bNhA1apVKV26NImJiSQmJgJQq1Ytdu3alXLcjh07qF+/PgUKFCAgIICwsDCuXbvGvn37\n0pyvatWqFClSxOJ9goKCiI6OTnl83333AcZIAcD27du5fv067dq1S8kjMTGROnXqcPr0aY4dO0Zi\nYiK//PILLVu2THPu1q1b2+XvQgjhhn7/HcaPd3UWTnUV4170RuAoUBu44qT3dvU9+swcB+5O9bi4\nKWZBaz0dmA4QFRVlhwFwz3Xu3Dl27NhBYGCgxXNly5YF4OjRozRo0IBq1aoxbdo0ihUrRlBQEE2b\nNuXmzbQ3k6wVeYDcuXPj53fns2JQUBBAyuvPnTsHQKVKlay+/r///iM4OJikpCQKFy6c5jnzx0II\nL5GcDN27g+kCxBdcxijy21PFugLO2o7L3Qv9MqCXUmoexiS8y9rO9+e9Uf78+YmKimLKlCkWzwUH\nBwOwZs0abty4wdKlSwkPDwcgMTGRCxcuWLwmu61m8+fPD8CKFSusflioWLEioaGh+Pv7W2xOI5vV\nCOGlZsyA7dst4/36wbPPOj8fB3j3XfhmgfH7RC7zLy8Tx96U59947n+0bvoc/wCRd0VYP4kdubTQ\nK6W+BqKBgkqpY8AwIBBAaz0VWAU0AQ4AN4AXHJGH9rLr/7p167Ju3TpKlCiR7pVxXFwcfn5+BATc\n+Rb45ptvUob57eHxxx8nNDSUEydO0LRp03SPq1KlCkuXLqVHjx4psUWLFtktDyG81uXL8MMPcO2a\nqzOxTWIivPGGZbxCBXjnHTBdiLhSXJzxV2rlmscmZ8/CWwvBKCvnMMrWnSL/8ccfp0xEdhZXz7rv\nkMnzGnjFSel4nPj4eBYuXGgRb9y4MVOnTiU6Opr+/ftTpkwZzp8/z86dOylatCh9+/alTp06JCUl\n8cILL/Diiy/y559/Mn78ePLmzWu3/PLmzcvw4cPp06cPR44coWbNmiQnJ7Nv3z42bdrE4sWLARg0\naBCtW7emZ8+etGrVii1btrBmzRq75SGEV9q5Exo2hEuXXJ1Jzk2b5hZF/vRpqFUL/vnHLmcD6gF3\n2sRMmzaNbt262ePkWeLuQ/ciA1evXqVdu3YW8U2bNrFp0yaGDh3KsGHDOH36NIULF6ZatWq0aNEC\ngMqVK/P5558zfPhwFi9ezIMPPsiCBQt4+umn7ZrjgAEDKFasGBMmTOD9998nJCSEChUqpHmfVq1a\n8fHHH/Pee+8xe/ZsoqOj+fTTT2nYsKFdcxHCa1y/Du3aeUeR79wZUk3qdaUxY+xV5JOBZtwu8kop\nPv30U154wSGD0plS2tvGrTEm46WeXW7ur7/+4t5773ViRsJR5N9S+KT+/eH9912dRc4VLAh//w0F\nCrg6E7SGu++G41ane2fHRqApEM+cOXN47rln7HViq5RSP2uto6w9J1f0QgjhSXbvhg8/dHUWOefn\nB1OmuEWRB6M5n/2KPEAdQkOXMmLEZZ57znLk1Zmk0AshhKdISjKWppm3iw0JgWbNXJNTduTPD089\nBXXrujqTFEuWWMZKloRHHrHt9VrrNCuUiheHLl0aULmynRLMASn0QgjhKSZPhp9+sowPHQoDBzo/\nHy9irdC/8Qb07Jn5a//55x+6dOnC3LlzKVWqlN1zyykp9EII95eUBCNHwvLlnrOUzBGOHrWMVapk\nrEEX2bZ/P/z5p2XcNHc5Q3v37k3p9lmnTh22bNnC3XffnfkLnUgKvRDC/fXubdzPFZamTQNTV0qR\nPdau5qtVg8jIjF/322+/Ua9ePW63XT99+jSHDx92u0LvNb3uhRBe6rvvpMinp1s3qF7d1Vl4PGuF\nPiYm49f88ssv1K5dO6XI58qVizVr1lhs0+0OpNALIdzXrVvG5DNhqXBheO89V2fh8U6dst6RN6NC\nv3PnTurWrZvSMjwiIoL169e7ZZEHKfRCCHc2bpyxzlqkVbw4rFoF+fK5OhOPt3y5ZRv0ihUhvfYc\n27Zto169elwyNSvKly8fsbGxPPbYYw7ONPvkHr0Qwj3t2wejRlnGH30UZs+GbG625PGCgox1X776\n57ezrAzbb968mWbNmnH9+nUAChYsyIYNG3jwwQcdmGHOSaH3UMOHD2fEiBE0aNCAtWvXpnmubdu2\nnDt3js2bN9t0rsOHD1O6dGmWL19OM09aiys8y8GD8OOPkJBg2/EzZxpD96n5+8P06cYllxA5dPUq\nbNhgGbdW6Hfv3k2TJk2Ii4sDjO27Y2Nj092G251Iofdw69at46effuIRW7s6WHHXXXexfft27rnn\nHjtmJkQqU6fatiA5M/36wQMP5Pw8QgBr1kB8fNpY0aLGjHtz999/Pw0bNmTJkiUUK1aMjRs3UtFD\nPnDKPXoPlj9/fipXrsw777yTo/MEBwfz2GOP2XXnOiFS7NkD9tiWs1QpGDYs5+cRAkhOhgkTLOMt\nWxrdec0FBgYyb948unbtypYtWzymyIMUeo+mlGLw4MEsW7aM33//3eoxJ0+epEuXLpQpU4bQ0FAq\nVKjAW2+9RXyqj7GHDx9GKcWKFSsA6Ny5s9URgkmTJhEWFsbVq1cBSE5O5r333qNcuXIEBwdToUIF\nZs+e7YA/qfBY6bVszY7JkyEsLOfnEQLjDlBWZ9sHBwczffp0ypUr57jEHEAKvYdr164d5cuXT/eq\n/ty5c+TPn58PPviANWvW8PrrrzNr1ix69+6d7jmffvppdu3axaFDh9LE58+fT5MmTcidOzcAvXv3\nZtSoUXTr1o2VK1fSqlUrunTpkvKBQQimTjXuy+dU9+7QuHHOzyMEcPIkvPmmZfyee6BePeP3X331\nFUOGDMEbdniVe/TgXrNXs/hN5efnx8CBA3nxxRcZOXIkFSpUSPN85cqVGT9+fMrj6tWrEx4eTpcu\nXfj4448JstJRq379+hQoUID58+fzpul/w/Hjx9m6dSvffPMNAAcOHGDKlCnMmjWL559/HoB69epx\n8uRJRowYIZP6BJw4Yb3/esmSUKuWbecICoInn4TnnrNvbsKn9e0Lly9bxqdNg4AAmD17Ni+88AJa\nawIDAxk6dKjzk7QjKfRe4Nlnn2XEiBGMHj2aWbNmpXlOa81HH33E9OnTOXToEDdv3kx57ujRo1aH\noAICAmjdunWaQr9gwQLCw8Np2rQpALGxsfj5+dGqVSsSExNTXlu3bl2+/vprkpKS8Pf3d8QfV3iK\nPn2Mac3mvvzSKN5CuMDq1TB/vmW8SxeoWRNmzJhB9+7dU67kFyxYQL9+/QgPD3dypvYjQ/deICAg\ngAEDBvDll19y5MiRNM99+OGH9O/fn1atWrF06VJ27tzJpEmTANIUfXPt27fn119/Zd++fYAxbN+i\nRQtCQ0MB45ZAUlISERERBAYGpnx17tyZxMRETp486aA/rfAIK1bAwoWW8a5dpcgLh7l+3fh8WaEC\n3H239a82bSxfV7AgjB1rzEPq1q1bSpF/6KGH2LRpk0cXeZAreq/RpUsXRo0axZgxY9LEFyxYQNu2\nbdPcw9+7d2+m56tVqxZFihRh/vz5dOrUiR07djAw1TBs/vz5CQgIYNu2bfhZmaJauHDhHPxphEdL\nTIRXX7WMS8tW4WBvvAGm65gsmTABZs/+gH6pdgGMiopi7dq15M+f344ZuoYUei8RHBxM//79GThw\nIFWrViUwMBCAuLg4goOD0xw7d+7cTM/n7+9Pu3btmD9/PiEhIeTNm5dGjRqlPF+nTh2SkpK4fPky\n9evXt+8fRni2778Hs4mcgPHT1At+aAr3FBcHZncubVK3Lvz333sMGnTnQuaxxx5jzZo1RERE2DFD\n15GhezAmwLnLVw50796d3Llz88MPP6TE6tevz/z585k8eTJr166lU6dOHDhwwKbzPf300/z5559M\nmDCBmJiYNBP3KlasSI8ePWjfvj1jxowhNjaWlStXMnbsWF566aUc/TmEh7PWU7R2bejQwfm5CJ+x\nYQPcuJG11wQFaSpVGpmmyNeoUYN169Z5TZEHKfReJSwsjL59+6aJDR06lA4dOvDWW2/RoUMHgoKC\nmDhxok3nq169OnfffTcnT56kffv2Fs9PmjSJIUOGMGfOHJo0aULnzp1ZuXIlNWvWtMufR3ggra0X\n+i5d3Gt1i/A61r7tMlKkCLRv/w4TJ95pwlS7dm1Wr16dsoTYWyhvWCNoLioqSu/atSvd5//66y/u\nTW9rIuFR5N/SzezeDQ8/nDYWEABnzshOa8JhkpKM1rXnzqWNf/bZnXXxqfn5QbFisG3bVho1asT1\n69dp0KABixcvJsxDmzIppX7WWkdZe07u0Qsh7MfaZVV0tBR54VA//GBZ5END4emnM26m+OSTT7Ji\nxQomT57MnDlzCAkJcWyiLiKFXghhP4sXW8Yy6ikqhB1Y+3zZoIFtHZOjo6OJjo62e07uRO7RCyHs\n4+BBsLbnQosWzs9F+Iz0poWYf75MSkpiwIAB7N+/3zmJuREp9EII+1i61DIWFWV0KRHCQX7/Hf79\nN23Mzw9Sd+FOTEykc+fOjBs3jjp16ljs4+HtpNALIezDlssqIezM2rddjRpGtzuAhIQEnnnmGb78\n8ksAjh07xvTp052Yoev57D16rTVKlvt4NG9cMeKxzpyBbdss41LohYNl9PkyPj6e9u3bszjV3JHu\n3bunu9unt/LJK/rAwEDi4uJcnYbIobi4uJQOgMLFli+H5OS0sXLl4L77XJOP8AlHjhgrOs3FxBh7\nebRu3TpNke/duzdTpkyx2rbbm/nkFX3hwoU5fvw4kZGRhIaGypW9h9FaExcXx/HjxylSpIir0/FM\nX38N48YZPynt4fp1y1hMjDTJEQ5lbVrIQw9B4cI3aNmyFevWrUuJ9+/fn7Fjx/rkz3ufLPR58uQB\n4MSJEyQkJLg4G5EdgYGBFClSJOXfUmTBypXQsaPj36dVK8e/h/Bp1obtmzS5TrNmzdm0aVNKbPDg\nwbz99ts+WeTBRws9GMVeioTwOdeuwcsvO/59ihSBRx91/PsIn3X+PHz3nXn0GqtXN2b37q0pkREj\nRjB06FCn5uZufOtGhRC+bvhwOHrU8e/z7LPg7+/49xE+a8UKo/VtaiVLhlChQmTK49GjR/t8kQcf\nvqIXwufs3g0ffujY9/Dzg+bNYcgQx76P8HnWhu1btQpg7NgvSEhI4Mknn7TY5MtXSaEXwhckJUG3\nbpaXQCEhsGMHFC9un/cJCzOajAvhQDduwNq1lvGYGGP+zoIFC3xuZn1GpNAL4YlOnIB16+DCBduO\n/+svsLaj49Ch8OCD9s1NCAdbvx6MFdKngS+AfhQooKhe3XheinxaUuiF8DS7dxt7b9pa5NNTqRL0\n62efnIRwImPY/gRQF/gbOEezZqMJCPDNWfWZkY89QniS+HhjoltOizzA9OkQFJTz8wjhRImJsGTJ\nf0AtjCIPMJ6HH7ayoZIApNAL4VnGjYO9e3N+nu7d4Ykncn4eIZxs4cJDXLpUEzhgigQQFDSPrl0f\ncGVabk2G7oXwFAcOwNtv5/w8998Po0fn/DxCONmBAwfo2rUO8J8pEggsoGnTljIHNANS6IXwBFpD\njx5w61bauL+/0QAnwMb/yhUqwNNPQ7589s9RCAf6+++/qVu3LteunTBFgoHFQGPZOykTUuiF8ARz\n50JsrGW8b19jOF8IL/bHH39Qr149Tp8+bYqEAsuAevj7p917XliSQi+Eu7lyBXr3Nlp/3bhhxMyv\n5AFKlDA63Qnhxfbs2UO9evU4d+6cKRIOrACiAahVC/Lnd1FyHkIKvRDu5s03Yc6czI+bPBnCwx2f\njxAuFB8fT3x8PAB+frlJTl4NVE95XobtMyez7oVwJzdvwhdfZH5cu3bQtKnj8xHCxR555BFWr15N\nRERxkpPXk7rIA7Rs6Zq8PIlc0QvhTjZuNHaYy0hEhON71gvhRiIjnyA+/gDGBLw7qlc37mCJjMkV\nvRDuxNpOHandf7/R+rZYMefkI4STbd68mb2pekVoDb16QVxc2iKvFLz/vrOz80xyRS+Eu0hKgqVL\nLeNffWXciPTzg+Bgy+eF8BLr1q2jZcuW5M2bly1btlChQgUWLTLmpZp7+WV49FHn5+iJ5IpeCHfx\n449w5kzaWHCwse1raKgUeeHVVq5cSfPmzbl58yanTp2iU6dOXLqk6d3b8ti77oJ33nF+jp5KCr0Q\n7sLasH39+pArl/NzEcKJlixZQqtWrVJm15coUYK5c+cyZIji5EnL4ydONKaqCNtIoRfCHWgNixdb\nxmXtkPByCxYsoF27diQkJABQunRpvvvuO86fL8ukSZbHN2sGbdo4OUkPJ/fohXAHe/cavexT8/Mz\nhu2F8FJz586lU6dOJCcnA1C+fHk2btxIkSLFadnS+PybWlgYfPKJMRFP2E6u6IVwB9aG7atXh8KF\nnZ+LEE4wa9YsnnvuuZQif++997JlyxaKFy/ORx/Bnj2Wrxk5EkqWdHKiXkAKvRDuwFqhl2F74aWm\nTZtGly5d0KZL9sqVK7N582buuusuDh+GYcMsX/PQQ9Cnj3Pz9BZS6IVwtf/+g127LOPS8kt4Ia01\nP/74Y8rjhx56iNjYjRQqVJjkZHjllTtbPNymFEyfbvsmjSIt+WsTwtWWLbOMVa4MZcs6PxchHEwp\nxYwZM7h16xa//rqfxMS1FCmSz+J+fGq9esEjjzgvR28jhV4IV5PZ9sLH+Pv7M3XqbCpUiOPUqdwZ\nHhsZCaNGOSkxLyVD90K40sWLsHmzZbxVK6enIoQjaK1ZuXJlyv3422JjAzIt8gAffwx58jgqO98g\nhV4IV1q50mh9m1qJEsbMIyE8nNaawYMH06xZM1577bU0xT6zbR3AmKYig1s5J4VeCFdKb7a9LBQW\nHk5rTf/+/Rk9ejQAEydO5LPPPgMgMRGWL0//tblzQ8eOMHeu/FewB7lHL4SrxMXBmjWWcbmEER4u\nOTmZPn368Mknn6TEmjVrxjPPPAPA1q1w4ULa14SFwblzxrYOwr6k0AvhKrGxcP162li+fFCjhmvy\nEcIOkpOT6dGjBzNmzEiJtW7dmq+//pqgoCDA+kBWo0ZS5B1Fhu6FcBVrP+2aN5fFwsJjJSUl8eKL\nL6Yp8k8//TTz5s1LKfJaS38oZ5NCL4QrJCVZXz8vP+2Eh0pMTKRTp058/vnnKbHnnnuOL7/8ksDA\nwJTYnj1w5Eja1/r7Q9OmTkrUB0mhF8IVtm+Hs2fTxkJCoEED1+QjRA4kJCTQsWNHvvrqq5RYly5d\nmDVrFgFmI1TWruZr1YL8+R2dpe+SQi+EK1j7adegAYSHOz8XIXLo8OHDbNiwIeXx7Xv0/v7+FsdK\nfyjnk0IvhLPJ3vPCy5QvX57169cTERHBq6++yuTJk/Hzsywv//4Lv/1m+XrZ1sGxZNaPEM5y65Zx\nb37vXuMnXmqy97zwcFWrVuXXX3+lZMmSqHQWvy9dau11Ro8o4Tguv6JXSjVSSv2jlDqglHrTyvMl\nlFKblFK7lVK/KaWauCJPIbLt0CF44gljoXB4uPXdOWrUgIIFnZ+bENlw7do1/vrrL4t4qVKl0i3y\nILPtXcWlhV4p5Q9MAhoD9wEdlFL3mR32FvCN1roK0B6Y7NwshciBuDjj3vv27ZCcnP5x8tNOeIgr\nV67QqFEjatasyR9//GHz6w4eNBrlmJNvfcdz9RV9NeCA1vpfrXU8MA8wv1ujgdtbGkQAJ5yYnxA5\nM2oUHDiQ+XFyk1J4gEuXLtGgQQO2bdvGuXPnqFevHhcvXsz0dVrDyy9bftYtWxYqVXJQsiKFq+/R\nRwL/pXp8DHjU7JjhwDqlVG8gHKjnnNSEyKE//4SxYzM/rm5dKF3a8fkIkQPnz5+nQYMG/PLLLymx\ngQMHki9fvkxfO28erFtnGX/2Well7wyuvqK3RQfgc611caAJ8IVSyiJvpVQ3pdQupdSus+brk4Vw\ntuRk6N7d2L3DXGio8RURYUzAmz3b+fkJkQVnz56lTp06aYr85MmT6dOnT6avvXgRXnvNMl6iBPTv\nb88sRXpcXeiPA3enelzcFEvtReAbAK31diAEsJi1pLWerrWO0lpHFSpUyEHpCmGjTz+Fbdss40OH\nwo0bxtelS0Z3vMhI5+cnhI1OnTpFdHQ0v5nWxSmlmDlzJj179rTp9W+8AWfOWMYnTYJcueyZqUiP\nq4fufwLKK6VKYxT49kBHs2OOAnWBz5VS92IUerlkF+7r9GkYMMAyXr48DBzo/HyEsNGRI/DNN3DC\nNBPq2rXjfPttHS5e3AeAUn40aPA5f/zxHH37Zn6+mzchVdv7FG3aQLNmdkxcZMilhV5rnaiU6gWs\nBfyBz7TWfyqlRgK7tNbLgH7ADKVUX4yJeZ211tp1WQuRib59jat1c1OnGm1uhXBD//xjrPK8c+fz\nKFAHOGh67I/WX7J2bXvWrs3+++TODRMn5ihVkUWuvqJHa70KWGUWG5rq93uB6s7OS4hsWbsWvv7a\nMv7881CnjvPzEcIGycnQpYv59gszuVPkAzAWRbXJ8XuNHg3FiuX4NCILXH2PXgjvceMGWLtvWaAA\njB/v/HyEsNHMmfDDD+bR4UAXIAhYhD2KfLVq0KNHjk8jskgKvRD2MmqU0QXP3Pjx0vVOuK1Tp4wJ\nc5b8gOnADiDn7ZmLFjUWmFjZ50Y4mMuH7oXwCr//DuPGWcajo41heyHc1J0pJUcwFkEZ139DhkC+\nfP5AlRy/R6FC0LCh8atwPin0QuRUemvmg4KMCXjSEUTY0dWrxv5I9rB1q9HMBnYD9YFWwDSef96P\nkSPt8x7C9aTQC5FTM2YYvezNDRoEFSs6Px/hlQ4fhg4dYMcOe5/5J6ABcAmYSUhIPsaPt6Gjo/AY\nco9eiJw4edL6Dc4KFeBNi80Yhci2Xr0cUeS3Y3QVv70cNC8DBrSTKSVeRgq9EDnRty9cvmwZnzYN\ngoOdn4/wSnFxsGaNvc/6HcaV/BXT4wJUrbqR4cOtbKMsPJoUeiGya/VqmD/fMt65szEJTwg72b0b\nkpLsecZYjN3Br5keFyIkZBNz51aRKSVeSO7RC5EdN24Y+26aK1hQ1swLu/vpJ8tYcHD2esXHx6/l\n6tUY4CYAShWlatVYJky4T6aUeCkp9EJkx4gRxuwoc++/bzTIEcKOdu60jI0Ykd769/QtX76ctm3b\nAvEAREZGsnHjRipUqJDzJIXbkqF7IbLqt9+Mgm6uTh147jnn5yO8nrUr+keyeCs9Li6Onj17Eh9v\nFPmSJUvy3XffSZH3AVLohciK22vmzW+YBgfDlCmyZl7Y3cWLsH9/2phSULVq1s4TGhrKqlWryJ8/\nP2XKlGHLli2UKVPGfokKtyVD90JkxbRp1tc4DR5sLKkTws527bKMVawIERFZP9cDDzxAbGwshQoV\nIjIyMufJCY8ghV4IW504YTbIkZMAACAASURBVH1t/D33WN9/Xgg7sDZsX62aba+9cOEC+fPnTxN7\n6KGH7JCV8CQydC+ErV57Da5csYzLmnnhQNYm4tlyf37q1KmUL1+e3bt32z8p4VGk0Athi5UrYcEC\ny/iLL0LNms7PR/iM7EzEmzhxIj179uTChQvUr1+fvXv3OiY54RGk0AuRmevX4ZVXLOOFCsFY6Qku\nHOf4ceOOUWqBgfDgg+m/Zty4cfTp0yflcdmyZbnrrrsclKHwBFLohcjM8OFw5IhlfMIEMLv/KYQ9\nWbuaf+ABCAmxfvyoUaMYkGq+yBNPPMH69evJly+fgzIUnkAKvRAZOX4cPvzQMl6/PnTs6Px8hE+x\ndn/e2kQ8rTVDhw5lyJAhKbGaNWuydu1a8uTJ48AMhSeQWfdCZOTbby33mQ8JkTXzwilsuT+vtWbg\nwIGMGTMmJVa3bl2WLl1KeHi4gzMUnkAKvRAZWbLEMvbaa1C2rPNzET4lOTnzQq+15n//+x8fphp1\natSoEYsWLSI0NNQJWQpPIEP3QqTn/Hn47jvLeIcOzs9F+JwDByx3QA4Ph3vvvfN48eLFaYp8ixYt\nWLJkiRR5kYYUeiHSs3KlZavb0qWhcmXX5CN8irX781Wrgr//ncetWrXiFdOKkDZt2rBgwQKCpaeD\nMCND90KkZ/Fiy1hMjNybF05hS0c8pRQTJ06kSpUqPP/88wQEyI90YUm+K4Sw5sYNWLvWMh4T4/xc\nhE+ydkX/8MOJJCaSpqD7+fnx4osvOjEz4Wlk6F4Ia9avh7i4tLGCBeGJJ1yTj/ApW7da2zspgTlz\n2tOlSxeSzG8pCZEBuaIXwhprs+2bNwcZGhUOFh9v7ISc1i1CQp5izZplAAQGBjJjxgz8/ORaTWRO\nfmoJYS4xEZYvt4zLsL1wgvHjIW1r+jigDTdvrk6J5MmTByVzRYSN5OOgEOa2bjWW1qUWFmZ0wxPC\ngQ4cgJEjU0duAC2AO0X+jTfe4IMPPpBCL2wmV/RCmLM2bN+oEcjaZGFnyclw6NCd5ou9esGtW7ef\nvQY0A7akHD906FCGDx8uRV5kiRR6IVLT2nqhl2F7YWdbtkC7dnD2rLVnLwNNgB9SIqNGjWLw4MFO\nyk54Eyn0QqQWG2u5U52/PzRt6pp8hFc6cQJatIArV6w9exFoCNxZSD9u3Dj69+/vpOyEt5FCL8Rt\nN2/Cyy9bxmvVku1ohV316ZNekQd4jtRF/qOPPuLVV191RlrCS8lkPCFuGz0a9u+3jL/wgvNzEV5r\nxQpYuDCjI8YDhQGYOnWqFHmRY3JFLwTAX38Zhd5c9eqy77ywm2vXwNSaPo2wMChe/Pbv7+Hhh2N5\n8sndvPDCc85NUHglKfRCJCcbHUoSEtLGAwNh2jSQpiTCToYPh6NHzaPJTJ/uxzPPpI7db/oSIuek\n0Avx+efw/feW8QEDoFIlp6cjPNOFC/DJJ/D778biDXNaw9Kl5tEj5M4dQ5kyU4DHnJCl8EVKW/uO\n9HBRUVF6165drk5DeIIzZ+Cee+DixbTxsmWNn9iydl7Y4Pp1ePxx41vGdv8CtYGjREREEBsbS9Wq\nVR2ToPB6SqmftdZR1p6TMUnh2/r1syzyAFOmSJEXNhs+PKtFfh9QEzDG8ePi4jh9+rT9ExMCGboX\nvmzDBvjyS8v4M89Iu1ths19/hQkTsvKKvUBd4BQAISEhLFmyhIYNGzogOyGk0AtfFRcHPXpYxvPl\ngw8+cH4+wiMlJRnzOG3fNfY3oB5gtMMLCwtj+fLl1KlTx0EZCiGFXviqd96Bgwct4+PGQeHCzs9H\neKSpU2HnTst4z55gXrv//fcX3n67PteuXQAgV65crFq1iho1ajghU+HLZDKe8D1//glVqlgup6tR\nAzZvluV0wiYnThjzOK9eTRu/915jOD8o6E5s586dNGzYkEuXLgHGNrNr1qzh8ccfd2LGwptlNBlP\nruiFb5E188JOXn3VssgDTJ+etsgfOXKEevXqcdV0cN68eVm/fj1RUVZ/Jgthd/JTTfiW1ath2zbL\n+JtvGpdiQthg+XL49lvLeNeu8OSTaWMlSpSge/fuABQoUIBNmzZJkRdOJVf0wrd8841lrFw5GDTI\n+bkIj3TtmrFvvLnCheG99yzjSinGjh1LaGgoTz31FPffLx3vhHNJoRe+IzHRuBQzN2YMhIQ4Px/h\nkYYNs9bG1lhil94mh0opRo4c6djEhEiHDN0L3/H995bNccLDoXFj1+QjPM7u3fDhh5bxBg2gQwfj\n98uXL+fZZ58lMTHRuckJkQ65ohe+Y8kSy1ijRtIBT9gkKQm6dTPmc6YWEgKTJ4NSsGjRIp5++mkS\nExPRWjNnzhz8/f1dk7AQJnJFL3yD1tYLfUyM83MRHmnSJLC2anfIEGNrhHnz5vHUU0+lXMn/+OOP\nnD9/3slZCmFJCr3wDbt3W95Y9feHpk1dk4/wKMeOweDBlvFKlaB/f5gzZw7PPPMMSaYWeRUrVmTL\nli0UluZLwg1IoRe+wdrVfHS00fJWiEy8+qox297ctGnwxRef0rlzZ5JNY/r33XcfmzdvJjIy0slZ\nCmGdFHrhG6wV+latnJ+H8DhLl8LixZbx7t1hz57JvPTSS9zuMPrAAw+wefNmihYt6uQshUifFHrh\n/Q4etL6HaIsWzs9FeJSrV62vmS9SBEqU+JBXXnklJfbwww+zceNGChUq5MQMhcicFHrh/axdzUdF\nwd13Oz8X4VGGDjXuz5tr2nQKgwf3TXn86KOPEhsbS4ECBZyYnRC2keV1wvNoDQcOwJkzth0/f75l\nTGbbCyv+++/OnM0TJ2DiRMtjGjaEgQMbsHZtJMePH6d69eqsWrWKPHnyODdZIWwkhV54lkuXoHlz\n2Lo1Z+eRQi9SSUiATp1g3ryMjwsNhSlToHTpssTGxjJs2DBmzpxJrly5nJOoENkghV54lpdfznmR\nL18e7rvPPvkIr/Duu5kXeYDhw6F0aeP3FStWZJ4tLxLCxbJV6JVS9wE1gRJAQSAOOAP8Cnyntbay\neaMQObR2LXz9dc7PExNjtDETAvjnH6PQp08DAylTpgl9+9Z0UlZC2I/NhV4pVRzoBnQB7rodNjtM\nA0lKqQ3AFGCFvr3uRIicuHEDevbM+XmCgox1UUJgTPfo0QPi49M7IhnoBUzh1KlJ7Nq1jscff9x5\nCQphB5kWeqVUfmA40B0IBA4DXwE/AaeAC0AoUAC4B3gciAYaAv8opfpprVfbP3XhU95+Gw4dsow/\n/LDtO88VLw59+xr9SoUAZs+GzZst4/feC3nzJnHwYHfOnPkUgBs3rjFz5kwp9MLj2HJFfwAIBmYC\ns7XWOzN7gVIqD9AeYwRghVKqr9bayvxVIWzw++8wfrxlvHZtiI2VYXiRLefOGe1rzZUuDTt2JNKr\nVxe2b/8iJd6xY0emTZvmxAyFsA9bCv0XwLta69O2nlRrfQWYDkxXSsUAstm3sI3WMGcOLFxodCsB\no+GN+ZafQUEwdaoUeZFt/fuDtT1nPv44ge7dO6WZaNe5c2dmzpwpO9EJj5Rpodda98nJG2itrXQr\nESIdM2cae4FmZvBgqFDB8fkIr7RxozFsb+6pp+L57LMOLFq0KCXWrVs3pkyZgp+f9BcTninL37lK\nqRKmofmMjsmtlCqR/bSEz/rgg8yPqVgR3njD8bkIr3TzpjEBz1xExC0uXWqbpsj36tWLqVOnSpEX\nHi07372HgMyu8l81HSeE7f7+2/jKzLRpEBzs+HyEVxo9GvbvN48mUrx4DOvWLU+J9OvXj4kTJ6Lk\n9pDwcNkp9ArLZXVC5Jy1nvSpKWVc8deq5Zx8hNf5+2+j0JurXj2AVq0eSXk8aNAgxo0bJ0VeeAVH\ndcYrClx30LmFt7JW6Hv3htatjSJfuTLkz+/8vIRXSE42WigkJKSNBwYag0T33TeC+PhbhIWFMXTo\nUCnywmvYVOiVUp3MQg9ZiQH4Y3TLexawsi+oEOk4cQJ+/NEy3quXTLoT2ZKcDL/9ZiyjA9i+Hb77\nzvK4AQOgUiUAxXvvvScFXngdW6/oP8foeofp15amL3O3/4fcAEbkKDPhW5Yts4zde68UeZEtZ89C\ngwbw66/pHXEBGEuZMiMZPDgoJSpFXngjWwv9C6ZfFfAZsARYauW4JOA8sF1rfcmWEyulGgEfYYwG\nzNRav2flmKcwuvNpYI/WuqONeQtPYW3YXnaYE9mgNXTpklGRPwfUB36lWLH9BATMw2j6KYR3sqnQ\na61TVpwqpZ4Hlmit5+T0zZVS/sAkjP91x4CflFLLtNZ7Ux1THhgIVNdaX1RKFc7p+wo3c/mysbDZ\nnBR6kQ2LF8OKFek9exqoB/wBwLZti9myZQv16tVzUnZCOF+WJ+NprWvb8f2rAQe01v8CKKXmYdwS\n2JvqmK7AJK31RdP7n7Hj+wt3sHq15QypYsUgKso1+QiPdfmyMX/TuhNAXcBYwqmU4rPPPpMiL7xe\ntmfdK6XCgNZAFSAvcBn4BVistbZ1xn0k8F+qx8eAR82OqWB6v20Yw/vDtdZrspu3cEPpDdtLkxKR\nRW+9ZczrNPfII//xxx91iIs7AICfnx9z5szhmWeecXKGQjhfdvejbwLMBvKTdk29BiYopV7QWqc7\neJZFAUB5jB3xigPfKaUqm88BUEp1w9hEhxIlpCmfx7h1C1atsozLsL3Iop07YdIky3jt2oc5fLgO\ncXFGD6+AgAC++uor2rVr5+QMhXCN7LTAfRhYhHEVPxdjf/rGpl/nmuILlVJVbTjdceDuVI+Lm2Kp\nHQOWaa0TtNaHgH0YhT8NrfV0rXWU1jqqUKFCWfxTCZfZuPHO5jW3RURIUxyRJYmJxhYJWqeNh4Qc\n5J9/anHItMVxYGAgCxculCIvfEp2xkYHY1y519Bad9Jaf661Xmv6tRPwpOn5QTac6yegvFKqtFIq\nCGNrW/N1VkswruZRShXEGMr/Nxt5C3e0YIFlrGlTY3c6IWw0bRrs2WMe3UdQUE1OnDgKQHBwMEuW\nLKFlS2srg4XwXtkp9DWABVrrHdae1Fr/CCw0HZchrXUi0AtYC/wFfKO1/lMpNVIp1cJ02FrgvFJq\nL7AJeF1rbWVzSeFxdu82tqQ1J8P2IotmzLCM3XtvCPnzGx8YQ0JCWLZsGU2aNHFyZkK4Xnbu0UeQ\ndgKdNUeBDHe4u01rvQpYZRYbmur3Gvif6Ut4i6QkY6w1KSltPFcuaNTINTkJj3TokLWrefj88xIU\nKrSRJk2aMHnyZGrXtueCISE8R3YK/QmMZXEZiQJOZuPcwldMngy7dlnG33wTcud2fj7CY1lbtPHw\nw1CtGkBpfv/9dwICHLWthxDuLztD96uAOkqpN00Nb1IopfyUUv0wOlJYmUotBHDsGAwebBmvVAle\nf935+QiPdqfQ7wQ2AGnv/kiRF74uO/8D3gZigHeA7kqp7zGu3otiTMQrBZwCRtkpR+HpLl821j7F\nxRmPp061nGkPxowqmYQnsuDsWdi6FWAbxuKfRGAVMTHRrkxLCLeSnc54p5RS1YFpGK1rS5odsh7o\nobWWoXsBmzdD8+Zw7VrGx3XrBtWrOyUl4T1WrIDk5M1AM27vjB0Q8AIVKvwDyIdGISCbDXO01oeB\nhkqpSIzOeBEYnfF2a63N18ELX3X+PLRrl3mRL1wY3rPYy0iITE2fvgFoAZhGiyjCM88sJzhYirwQ\nt+Xo5pWpqEthF9YNGHBnM/CMfPgh5Mvn+HyEV/n221Xs2NEauGWK3AVs5KWX7nFhVkK4n+x0xvtG\nKdVYKSWNyEX6tmyBzz7L/Lg2baB9e8fnI7zK0qVLad8+hjtF/m7gOwoVuofHH3dhYkK4oexc0bcF\n2gBnlFJfArO11n/YNy3h0W7dgu7dLeO5c0N0tPH7wEDjnnzv3qCU5bFCpGPhwoV06NCBxMREU6QU\nRi+tUrRsCf7+6b9WCF+UnUL/GNAZeBroB/xPKbUbY5Obr7XWNozVCq82Zgz884/1eM+ezs9HeI2v\nvvqKTp06kZTSaKksRpE3tsyQpopCWMrOrPudwE6l1GsYs2CeBxoCHwHjlVIrgTnAClOLW+HNrlyB\nQYPghx/u7Clvrcg/9pj1q3whbJCUZHxOnDr1SKoiXxHYCBQDIDwc6tZ1VYZCuK9sT8bTWsdj9LRf\nqJQqBDyLUfRjgJbAeaCwPZIUbiopCRo3Nop8RgICYPp02V9eZNv//gcTJwIMxLgvvxCIBYqkHNO4\nMYSEuCQ9IdyaXX7yaq3Paq0nYCy164/RtaKAPc4t3NikSZkXeYB+/aByZcfnI7zSDz/cLvK3DQN+\nJHWRBxm2FyI9din0SqmKSql3gSPAOCAQOGCPcws3lV4bW3OlS8PQoZkfJ4QV8+d/S9euN82iCghP\nE8mTx9jdWAhhKduFXimVVynVUym1A9gLvImxY92nGHvVV7RTjsId9e6deSOckiVh+XIIC3NOTsKr\nvPfee7Rv35a9e9sC8ekeFx4On34KefM6LzchPEmW79ErpZoDnTB6TgYBGmMnidnAIq21+cdv4W2W\nLLG+ZVjnzsbNVIDQUChbVpbOiSzTWvP2228zbNgwU2QlxtYaIwCIijIKu1LGtI9y5SA42FXZCuH+\nsjMZb6np130YxX2OtL31IVevGlfz5ooUgQkT5LJK5IjWmrfeeot33303VbQ2MAAwCvv06fDAAy5J\nTwiPlJ1CPw2jSc4OeycjPMDQocb9eXMffSRFXuSI1poBAwYwfvz4VNEGwGLAuP3z2mtQpYorshPC\ncymttatzsLuoqCi9a9cuV6fhfc6dg2LF7qyXv61RI1i1SobpRbZprXnttdeYmGZ6fVOMZXTGmrkS\nJeDPPyFXLldkKIR7U0r9rLWOsvZcjja1ET5mxQrLIh8aCpMnS5EX2ZacnMzLL7/MtGnTUkVbAfNI\nvdXspElS5IXIjkwLvVJqI8aEu+e11sdMj22htdbSp8qbWJuA162bsYROiGxISkqia9euzJo1K1X0\nKeBLjFW6htatoVkzZ2cnhHew5Yo+GqPQh6V6bAvvuyfgy27cgHXrLONt2jg/F+E1Ll68yPfff5/y\nOCLiWS5fnkXqH025c5s3zBFCZEWmhV5r7ZfRY+Ej1q2DuLi0sYIF4YknXJOP8AoFCxZk48aN1KpV\ni3z5ovnllxlA2u3n3n0XIiNdk58Q3kDu0QvbWBu2b9FC9gQVNjl3Dl5/HbZvt5zmAXeTnLyDPXsK\nYt7Dq1o12fBQiJySQi8yl5hodLgzJ83FhQ3i46FBA9i9G+AmsBt43Owoy/2v/P1h2jT5LClETuWk\nBe4zSqlYpdQFpVSi6dcNSqln7JmgcANbt8KFC2ljYWFQr55r8hEe5f33bxf5GxgbW0YDazN9Xd++\n8NBDDk1NCJ+Q5UKvlApUSi3F2HO+NpAbOGv6tQ4wRym1VCkVmMFphCdZvNgy1qiRsbROiAwcPAgj\nRwJcx+iavQ6jb30McDDd15UsCcOHOyFBIXxAdq7oBwLNMfaJrA2EaK3vwuhqUQfYifE/+g17JSlc\nSGvr9+dl2F5kQmvj/vrNm1eBRsCmVM8OAspafV25ckb/pfBwq08LIbIoO/foO2FsQRuttU7ZUkpr\nnQRsVkpFA38AnYFRdshRuNKvv8LRo2lj/v6yqFlk6uuvYf36S0Bj4E7H7Mcff48vvrB+HRASYjRf\nlP5LQthPdgp9ceDj1EU+Na31LdPQ/is5yky4htZGcd+zx/j9Wiv3UqOjIV8+p6cmPMeFC9CnzwWM\nXvU/p8Tz5v2Adev6Soc7IZwoO4X+BKlbVlkXaDpOeBKtjRlQH32U8XEybC8y8eqrZzl3rj6wJ1X0\nE7744hUp8kI4WXbu0X8FtFVK5bH2pFIqL9AWmJuTxIQLLFqUeZEHaNnS8bkIj7VkySnmzq3NnSKv\ngOm0bfuK3PERwgWyU+hHAruAnUqpjkqp4qaZ+MVNS+t2YEzIe9ueiQoHu3zZ+j7z5qpWhbvvdnw+\nwiPFx8PLL68B/jRF/IBZ5M7d1abPkEII+8vO0P3tPqgK+MLK8wooD9xUaWfUaK21NOhxV4MHw8mT\nmR/31luOz0V4rLFj4eTJzsAZjJn1XwAdePddY5KdEML5slN4v0c2rPEuP/5obDVr7sEHoUoV4/d5\n8kCrVsZEPCGs2L8fRqWssxkAtADu4dFHpY2tEK6U5UKvtY52QB7CVRISjK1mtdlnt/BwWLYMSpRw\nTV7CYxw+fJhChQrTs2cYt26lfuYeaWMrhBuQneh8zaZN8OSTxjjq7a/ffrM8buRIKfIiXXv2QOPG\nULjw35QtW518+VoSG3vT4rj//c8YGBJCuI7cM/clx49D06aW282aq1IFXn3VOTkJj3P8ONSuDRcv\n/gHUA06TnHwCeBZYmHJcyZIwbJiLkhRCpMj0il4p1V8pFZLdN1BKVVFKNc7u64Udffll5kXezw+m\nT4cA+QworOvTBy5e3IPRAfu0KRoO9Epz3JQp0sZWCHdgy9D9O8BBpdQbSimb5s0qQ0Ol1GKMpXgy\neOcOrG1OY65XL4iKcnwuwiMtXw7ffvszRpE/Z4rmxtiNLjrluKeeMob2hRCuZ8tlW2XgA2A0MEop\n9QOwFaOAnwQuYmxoUwC4B3gMqAsUBc5jfMyfZvfMRdacOGHMrk9PYCC0bw/jxjkvJ+FRrl2Dl17a\ngbFBzWVTNAKjyD8KGANCTZvCp5+6JkchhKVMC73Weh/QTCn1BEb/+jZADawvsbu9cP4fYAwwS2t9\n1U65ipxYtswyds89EBtr/D5vXmOPeSHS8dJLWzlzpglw+790fmAdM2ZUpUkTI5I7t/ElhHAfNt+I\n1Vr/APyglOoB1ASeBEpgXMnHYXTI+A3YrLX+M90TCdewttVs69bSxURYlZAAW7bAP/8Yj3/7bTPz\n5zfD2FceoCCwgQYNHuTFF2W3OSHcWXbW0V8FVpq+hCe4fBk2brSMy+Y0wopbt4xvjTVrbkeSgde5\nU+SLALGEhFRi8mQp8kK4O1lH7wtWrTIu0VKLjDT61gthZuzY1EUejB8Ty4AKQDFgC1CJoUOhbFkX\nJCiEyBKbruiVUp2AX7XWVjqrCLdnbdi+ZUtj5pQQqezbB++8Y+2Zu4CNGHfpylGpEvTr59TUhBDZ\nZOtP+s+BNOO8SqnnlVJWxoOFW7l1y7iiNyfD9sKM1tCjB6Y2ttY2OIoEypE7N3z+OQQFOTU9IUQ2\n5aQrSimglp3yEI6ycaOxLiq1iAjZnEZY+OILo0MyfAl0Bb4BmlOvHlSoYBxTpIixRv6ee1yWphAi\ni6T9mbez1iSnWTNj3bwQJufOGX3pYRbwIsbq2bYUKbKapUvryMpLITyY3KT1ZklJsHSpZVyG7YWZ\nAQPg/PlpQBfutMioyIQJ90uRF8LDSaH3Zps2wZkzaWPBwdCokWvyEW5p82aYNetjoEeq6EPExGyk\nQ4fCLspKCGEvWSn01jrhCXcVH2/sPmKufn3Ilcv5+Qi3dOsWtGv3PpB6t8JHyJNnI1OmFHRVWkII\nO8rKPfrhSqnh5kGlVFI6x2uttcwBcJXx42HvXst4+/bOz0W4rSZN3uXcucGpIo8Dqxk7NoKiRV2V\nlRDCnrJSiLPa/0r6ZbnKgQPw9tuW8agoKfQCAK01vXuPYOPGEamiNYEVPPFEbrp2dVVmQgh7s6nQ\na63lXr6n0Bpefhlu3kwb9/c39pn393dNXsKt7Nz5E5MmpS7ydYBlBASEM22a9FISwpvI0Lqnu3wZ\nVq+G//4zHh89CuvXWx732mtQpYpzcxNu4+JFo63tsWPG40OHqgETMe7NNwIWAaG8/jrcf7/L0hRC\nOIAUek92+rTR+ObvvzM+rkQJGD7cGRkJN3TyJNSqBfv3mz/TGygONAGCKVMG3nrL6ekJIRwsy4Ve\nKfUg0BGoBhTCmI1/FvgR+Epr/btdMxTp69078yIPMGmSzLT3YS+/DPv3J2P0qQ83e7ZVyu+mTEHW\nzAvhhWwu9Eopf+BjoBvGRDvzyXa1gNeVUpOBPlprWY7nSCtXwoIFmR/Xtq3RCU/4pCVLYMmSJIxu\ndweBNVgWe+jYERo0cHJyQginyMoV/XiMjhrxGE2wNwPHMQp+MYzZPG2BV4CbwAB7JipSuX4dXnkl\n8+PKlIGJEx2fj3BLV69Cr16JwPPAV6Zoc2AlEJpyXPnyMGGC8/MTQjiHrdvUlsO4oXcEaKS1/sfK\nYZ8ppUZhXDL0VUpN1Vr/a79URYrhw+HIEct4p05Q2NTJrFQpaNfuzmPhcwYNSuD48Y7AwlTRMnTq\nFJTybVG2rDHoU1B64wjhtWy9on8O48q9czpFHgCt9d9KqeeBTcCzwMicpyjS+PVX65df9eoZe4cq\naV8g4IcfbvHJJ08Dqfc66Enjxp/w+ed+8m0ihA+xdbXsE8BfWustmR1oOmYv8GROEhNWJCVBt27G\nr6kFBxszqeSntwCuXbtJ48atSVvk+xASMonJk6XIC+FrbC3092DMqrfVj6bXCHuaPx9++skyPmQI\nlCvn/HyE27lx4wbVqjXnypVVqaIDgAmMHKkoVcpFiQkhXMbWQp8XOJPpUXecBvJlPR2RoSVLLGP3\n3Qevv+78XITbuXbtGk2bNuWvvzakig4B3uPBBxWvveaqzIQQrmTrPfpwjEW4troFyIpce7N2NT9u\nHAQFOT8X4Xb69evH5s2bU0XeBowOOJ98AoGBrshKCOFq0tHaU5w9C4cPp435+Rktz4QARo0aRWRk\nJdOjsdwu8qVLQ/XqLktLCOFiWVlHH6OUKmXjsdJU3d6sXc1XqgThls1PhG8qVKgQ998fy/Hjq4HO\nKfGYGJmnKYQvy0qhf8j0ZSvpjGdP1gr9I484Pw/hNhITEwkIuPNf+MYN+O67IqQu8gCtWiGE8GG2\nFvoXHJqFyNzOnZYxKfQ+69SpUzRs2JC33nqLdu3aAbBuHcSZzaQpWBCeeMIFCQoh3Iat+9HPdnQi\nIgNaW7+ir1bN+bkIu62I4AAAIABJREFUlzt+/Dh16tRh3759dOzYkcDAQGJiYqwuymjRAvz9nZ+j\nEMJ9uHwynlKqkVLqH6XUAaXUmxkc10YppZVSUc7Mzy0cOWJMxkstOBgqV3ZNPsJljh49Sq1atdi3\nbx8AWmtu3rxJYiIsX255fEyMkxMUQrgdmwu9UuplpdRApVS6i3SUUkGmY3raeE5/YBLQGLgP6KCU\nus/KcbmBPmStaY/3sHY1X6WKrJfyMYcOHaJWrVocPHgQgICAAObPn0/79u3ZuhUuXEh7fFiY0RlZ\nCOHbbCr0SqknMLaoDdZaJ6R3nNY6HggCPlFKPWrDqasBB7TW/5peOw9oaeW4t4ExGLvi+R6ZiOfz\n9u/fT82aNTlsWmIZFBTEokWLaNOmDQCLF1u+plEjCA21jAshfIutV/TPA9cwtqrNzHjgKtDFhmMj\ngf9SPT5miqVQSj0M3K21Xmlbql7I2kQ8uT/vM/766y9q1arFsWPHAAgODmbp0qU0b94cMKZwWLs/\nL8P2QgiwfdZ9DSBWa30tswO11teVUrGm1+SIUsoP+ADz9ULWj+0GdAMoUaJETt/afSQlwc8/W8bl\nit4n/PHHH9StW5czZ4wO1KGhoSxfvpy6deumHPPrr3D0aNrX+ftD06bOzFQI4a5svaIvAezPwnkP\nmF6TmePA3akeFzfFbssN3A9sVkodBh4DllmbkKe1nq61jtJaRxUqVCgLqbq5v/+Ga2afryIioHx5\n1+QjnOby5ctpinx4eDirV69OU+QBvvjC8rW1akH+/M7IUgjh7mwt9P5krQGOtvHcPwHllVKllVJB\nQHtgWcpJtL6stS6otS6ltS4F7ABaaK13ZSEXz2bt/nxUlNH+Vni1iIgIRo0aBUDu3LlZt24dtcxa\nHv/+O3z8seVrpUmOEOI2W4fuzwJls3DessC5zA7SWicqpXoBazE+THymtf5TKTUS2KW1XpbxGXyA\nrJ/3aV27dgXgwQcfpJrZv3tyMnTvDomJaV8TFASmOXpCCGFzof8JqK+UitBaX87oQKVUBFAf2JDR\ncbdprVcBq8xiQ9M5NtqmbL2JdMTzKVprlFlj+tvF3tyMGbB9u2V80CC46y5HZCeE8ES2jv9+DeTB\nWPOemU8w7q1/nd2khMmtW7Bnj2VcCr1Xio2NpW7duly5ciXTY0+dgjfesIxXqABvptt2Sgjhi2y9\nov8W+AGjoc3dGOvavzOtfcd0f70mxr6YNYBtWutvHZCvb9mzBxLM2hbcdRdERlo/XnistWvXEhMT\nw82bN2ncuDFr164lV65cKc/v2WN0vrs9L3PbNrhsZWxt2jSjaaIQQtxma697rZRqg3EvvYbp10Sl\n1HnTIQVM51LAHqCtA3L1Pek1ypE9R73KihUraNOmDfHx8QAcOXKEM2fOpBT69euhSRPLe/Hm/t/e\nnUZHVWV/H//uJAQEZR5UEBDaWVEgotAqEGSQVnFARUENoiIOrUC3aDtrO/CgIi5BJlGwxRZUEFAU\nBYR2FqEFh8Y/CMoggsyIDEnO8+IWoZKqJFWpKVX5fdbKSmrfUzebS2DnnHvuOX37QocOMU5WRJJO\nyFO3nXO/Am2B+/AWuakEHO77qOSL3Qu0c85tjH6qFcz+/d5N2KI0ES+lTJs2jUsuuaSgyDdu3JgF\nCxbQrFmzgjYPPFB6ka9bF4YNi2WmIpKswtmPHufcH8CjwKNm1gg4MOXnF+fc2mgnV6GNGBH8/vxZ\nZ8U/F4mJ1157jd69e5OXlwdAs2bNmDdvHk2aNClos3598Al3RT31FNSpE6tMRSSZhVXo/fkKu4p7\nLKxe7XXjimrZEs6OeMFBKQdefvllcnJyyM/PB+DYY49l7ty5NGrUqFC7GSE8YHr11d6HiEgwIRV6\nMzsHWO2c+7nUxl77FsBpzrlJkSRXITkHt9wCu3cXjqelwdixWignBUyYMIHrr78e57w1qE488UQ+\n+OADjgjyTFywzWq6d/d+30tLg1NPhc6dNW1DRIoXao9+PvAQ8PCBgJkNAe50zgUbMLwYuB9QoQ9m\n3z7v0blgZs2Cd94JjN96q7ciniS1119/nX79+hW8PuWUU/jggw+oX79+QNtt22DevMBzPPignrAU\nkdCFWuiD9ReqADWjmEvq27IFcnJg9uzSZ1f5a9gQHnkkZmlJ/HTq1ImWLVuyZMkSWrVqxZw5c6hT\nzM31YD8mDRtC69ZxSFREUobGgePpnnu8h6HDKfLgLWZevXpscpK4qlWrFnPmzCEnJ4e5c+cWW+Sh\n+K1ndfdGRMJR5sl4Eibn4M03w39fjx7aoSTF1K1blxdffLHENnv3Br+Doz3mRSRc6hvEy5o1sDHM\n5QWqVw++NZkkBeccDzzwAJMmhT9VZe7c4LsTF9m8TkSkVOrRx0uwzWnS0+GQQwLjZt506uHD4aij\nYp+bRJ1zjrvvvpuhQ4eSlpZGZmYmvXr1Cvn9wYbtzz8fKlWKYpIiUiGE06MPZz96KSrYcrY33ww7\ndwZ+7NgB//mPZtknKeccgwYNYujQoQDk5+czefLkgsfpSpOXB2+9FRjXsL2IlEU4PfoHzezBokEz\ny4teOilM281WCPn5+dx2222MGjWqIHbhhRcyZcqUgO1ni/P554F3eSpXhm7dopmpiFQU4RT6cJfk\n0AjAAXl58NVXgXEV+pSSn59P//79GT9+fEHs0ksvZfLkyWRmZoZ8nmCL5HTuDH6b2YmIhCzU3es0\naS8Sy5d7Q/L+qlf3Ng+XlJCXl8d1111XaOLdlVdeyaRJk8jICP336bVrva1mi9KwvYiUlQp4PAS7\nP5+VpQeiU0Rubi5XX311oSJ/7bXX8vLLL4dV5AFuvz3wd8L0dLjggmhkKiIVkSpNPAQr9NpuNiU4\n5+jTpw+vvvpqQez6669nwoQJpKenh3WuGTOCL7XQrx8EWSFXRCQkKvTxoIl4KcvMOP/88wsm2t18\n882MGTOGtDBHa3bt8rYzKKp+fXjiiWhkKiIVlZ6jj7W9e4PvK68efcro06cP+/fv55tvvuHJJ58M\neXa9v/vv99ZUKuqZZ6BWrSgkKSIVlgp9rC1d6u1W5+/ww73dSSRl9O3bt8zvXbwYRowIjHfpAmGs\nsSMiEpSG7mMt2P3500/XBuJJateuXdxyyy1s2bIlKufLy4P+/SE/v3C8ShUYNUo/JiISORX6WAt2\nf17D9klpx44ddOvWjVGjRtG1a1e2b98e8Tmfew4WLQqMP/AANG8e8elFRFToY664Hr0kla1bt9K5\nc2c+/vhjABYtWsSsWbMiOueaNXDvvYHxk06CwYMjOrWISAHdo4+lnTvh++8D4yr0SWXz5s107tyZ\nJUuWFMRGjBhB7969IzrvX/8auEMdwNix2rxGRKJHhT6WvvrK24feX/PmULt2YvKRsG3cuJFzzz2X\nZcuWFcRGjx5N//79Izrv9OnBd6i76SZo1y6iU4uIFKJCH0satk9qv/zyC506deJ736iMmTF+/Hiu\nu+66Qu1yc2HiRG86Rl6IWzy9/XZgrEEDePzxSLMWESlMhT6WNBEvaa1bt47s7Gx++OEHANLS0pg4\ncSJ9+vQp1M45uOoqmDo18u85YgTUrBn5eURE/KnQx5J69ElpzZo1dOjQgR9//BGA9PR0XnnlFa64\n4oqAtq+8Ep0if955cPnlkZ9HRKQozbqPlY0b4aefCsfS06Fly8TkIyGrUaMG9erVA6BSpUpMnTo1\naJHfvBkGDoz8+x1yCIwcqWfmRSQ2VOhjJVhv/qSToFq1+OciYalevTrvvvsu7dq148033+Tiiy8O\n2u7OO+G33yL7Xunp8MILcPTRkZ1HRKQ4GrqPFW1kk9Rq1qzJRx99VOy69QsWwIQJgfGuXaFnz9C+\nR+XKcNZZKvIiElsq9LGirWmTxrJly/jqq6/IyckpiOXmwtq1wYt8fr73GFxRNWrAiy/CEUfEKFER\nkTJQoY8F5zQRL0ksWbKEzp07s3nzZgBycnJ44QUYNAh27AjvXE88oSIvIuWPCn0srF4dePO2ShU4\n+eSEpCPBffHFF3Tt2pVt27YBMGjQIOrV68H114e/L+yZZ8KNN0Y7QxGRyKnQx0Kw3nzLllrXtBz5\n5JNP6NatGzt37gS8e/KzZs2hX7/wi3xGhrdsbZqmtopIOaT/mmJBC+WUawsXLqRLly4FRb5OnTrM\nnz+fuXOz+N//wj/fXXfBKadEOUkRkShRjz4WdH++3Jo7dy4XXHABf/zxBwD169dn7ty5ZGaezKOP\nBravXh1qFdPJr1XLW+TmrrtimLCISIRU6KMtL8/bzKYoFfqEe/fdd7n44ovZs2cPAEcccQTz5s3j\nuOOOp1Mn2Lu3cPv0dPjPf6BFiwQkKyISJSr00fb99/D774VjNWvCn/6UmHwqoO++g/Hj4ddfD8bW\nrp3JRx/1JD9/HwBVqzaidet5PPLIMWzfDvPnB55n8GAVeRFJfir00RZs2D4rSzO14mTlSu9y+0bm\n/ewFDmwt15Tdu+cxa1bxK9U0bQr33x+bHEVE4knVJ9o0ES+hnnkmWJEH6AlMAo4FFgAlL0c3apRW\nKxaR1KBCH22aiJcwzsH06SW1uApYCjQu8TxXXOHtJicikgpU6KNpzx74+uvAuAp9XCxeDGvXHng1\nBdgUpFXlEs9x6qnw3HNRTkxEJIF0jz6avv7aWyTd3xFHQMOGicmngjnYmx8J3Aq0oGXLeQweXCek\n9x95JLRt6y1iKCKSKlToo0n35xPKK/TDgUG+yFKqVLmT3r1fSFxSIiIJpqH7aNKOdQmzYgV8881Q\nDhZ5gDMZN+6pRKUkIlIuqNBHkwp9wtxyyyOA/xJ1Z9Gq1XucdFLNRKUkIlIuqNBHS36+160sqnXr\n+OdSgTjnuO+++5gzx/+h947AbHr2rJ6otEREyg3do4+WLVsCJ+IddljxC6VLxJxzDBkyhGHDhvlF\nOwPTgapcdFGCEhMRKUdU6KPFf73VAxo0iH8eFYRzjoEDBzJixAi/aHfgDaAKxx4Lxx+foORERMoR\nDd1Hiwp9XO3du5clS5b4RXoAbwLes3EXXQRmichMRKR8UY8+WjZuDIyp0JfJW2/BG2+Ab7v4YlTh\nsMNmUbt2V7Zta0R+/itApYKjGrYXEfGo0EeLevRRMXky9O4dauvDgPeAQ/D/UW7QAM44I/q5iYgk\nIw3dR4sKfVQ88URxR/YDs4PED6Po76s9emizQBGRA/TfYbSo0Eds5UpYtizYkX1AL7zJdqNLPc/l\nl0c3LxGRZKZCHy0q9BF7661g0b14W8y+6Xs9APik2HNccw1kZ0c9NRGRpKV79NESrNDXrx//PJJY\n4Bazf9Cw4cWsW/deQeSCCwaRk9M26Iz6Zs2gRQvNthcR8adCHy3q0Udk40b4+GP/yO/AhaxbN68g\ncvfdd/Poo49iquQiIiHT0H00OKdCH6GZM71VhD07gfOAg0X+wQcfVJEXESkD9eijYft22LevcOyQ\nQ+DQQxOTTxI6OGy/Ha/If1pw7LHHHuPuu+9OQFYiIslPhT4aiuvNq/cZkl274P33AbYCXYGDuwA+\n+eSTDB48OEGZiYgkPxX6aNCwfUTeew/27gVYDBxc1vaZZ57l9ttvS1RaIiIpQffoo0HL30bk4LB9\nJ+DfQCXOOmuMiryISBSoRx8N6tGX2f79MGuWf+RS4P+4664mCcpIRCS1qNBHg56hL5M1a9bw6acZ\nbNt2RKH4oYc2oVOnBCUlIpJiVOijQT36sK1evZrs7Gy2basCfAgc/MXovPOgSpVEZSYiklpU6KNB\nhT4sK1euJDs7m59//tkX6Q58wYEpI9piVkQkejQZLxpU6EO2fPlyzjnnHL8iXxl4mAM/ilWqQPfu\nicpORCT1JLzQm1k3M1tuZivM7K4gxweZ2XdmttTM5ppZ+ZulpUIfku+++4727duzfv16X6QKMAOv\nR+8ZNAhq1kxEdiIiqSmhhd7M0oGReEuhnQhcaWYnFmm2BMhyzrUAXgf+X3yzDIEKfamWLl1Khw4d\n+LXgWlUF3gG6FLRp3hzuvTcR2YmIpK5E9+jbACuccz865/bhPUTdw7+Bc26+c2637+VnQKM451iy\nXbtg9+7CscxMdUv9LF68mI4dO7Jp0yYAMjIOBd4DOhZqN3q0t3KwiIhET6ILfUNgjd/rtb5YcfoB\ns2OaUbiKe7ROy98C8Pnnn5Odnc2WLVsAqFatBrm57wNnFWrXpw+ce24CEhQRSXFJM+vezPoAWUD7\nYo7fCNwI0Lhx4/glpmH7Eg0bNozt27cDUKtWLapVe5/ff29dqE2tWvDUU4nITkQk9SW6R78OOMrv\ndSNfrBAzOxe4B7jQObc32Imcc2Odc1nOuax69erFJNmggi1/q8VyCkyaNIkOHTpQt25dLr10PmvX\ntg5oM2yYLpmISKwkukf/JXCMmR2NV+B7AVf5NzCzlsAYoJtzLkhVTTD16EtUtWpVZs6cyYcfruOS\nS44LOH722dC3bwISExGpIBLao3fO5QK34s3M+h6Y4pz71sweNrMLfc2GAYcCU83sv2Y2I0HpBqdC\nX8iPP/4YEKta9VCGDj2O/fsLxytVgjFjIC3R40oiIiks4f/FOufecc4d65xr7px71Be73zk3w/f1\nuc65Bs6503wfF5Z8xjhToS/w1ltvccIJJzB8+PBC8RdegI8+Cmx/111wwglxSk5EpIJKeKFPeir0\nAEydOpWePXuyb98+Bg0axEsvvQR4l+fOOwPbH3MM/OMf8c1RRKQiUqGPlAo9kydPplevXuTm5gLQ\nvHlzsrOzAW+lu23bAt8zerQ2rhERiQcV+khV8EI/ceJE+vTpQ35+PgDHHXccCxcupHHjxrz3Hkye\nHPiea64B3+8BIiISYyr0karAhX7cuHH07dsX5xwAJ510EgsWLODII49k924YMCDwPXXq6Jl5EZF4\nUqGPxJ49sGNH4VhamlfNUtzIkSO58cYbC4r8qaeeyvz582ng+yXnn/+EVasC3zdsGNStG89MRUQq\nNhX6SATrzderl/LPiz399NPceuutBa+zsrKYN28eBxYqWrbMK+hFdegAOTnxyVFERDypXZFirQIO\n269evZp/+E2XP/PMM/nggw+oXbs2APn50L8/+OblFcjM9CbgaQsAEZH4UqGPRLDlb1O80Ddt2pQ3\n33yTSpUqcfbZZzNnzhxq1KhRcHzcOPj008D3/eMfcFzgwngiIhJjiV4CN7lVwB49QPfu3Xn//ffJ\nysqiWrVqBfFffoEhQwLbH3ustziOiIjEn3r0kagAhd45V7DFrL/27dsXKvIAAweCb6O6QsaMgcqV\nY5WhiIiURIU+Eile6J1z3HHHHbRp04b169eX2Hb2bHjttcB4To43CU9ERBJDhT4SKVzo8/PzGTBg\nAM8++ywrV64kOzub3377LWjb33+Hm28OjNetC08+GeNERUSkRLpHH4kULfR5eXnccMMNvPjiiwWx\nU089tdCkO38PPwyrVwfGn3qqQiwpICJSrqnQRyIFC/2qVbl07tyXlSv/VRCrXbsPK1a8SNu2wX9c\n/vvfwFh2Nlx9dayyFBGRUKnQRyJYoa9fP/55RMmmTfs5+eQ+7N49xS/aly1bxrFlS3rI56lcGZ5/\nXs/Mi4iUB7pHX1a7dkGQ2ej4VodLNvv27eOMM64oUuT7A+OB0Is8wD33eI/UiYhI4qnQl9XixYGx\npk2hUqW4pxKpPXv20LHjJaxaNc0vehvwPOH+iBx/fPD950VEJDFU6Mvqyy8DY23axD+PKHjiiWF8\n8snbfpG/ASOA8MbemzaF6dP1zLyISHmie/Rl9cUXgbHTT49/HlGQkfF34GPgPeAfwD8B44EH4Pzz\nQztHtWreErcpvp+PiEjSUaEvqyTt0TsHy5fD9997r//4Ax57rAowDZgKXA0YrVvDffdBeni350VE\npJxRoS+LTZsCN1tPS4NWrRKTT4jy8+GGG2DChD1AlSJHDwGuAbw/ypgxKvIiIqlAA61lsWhRYOzE\nE+HQQ+OfSxjGjYMJE7YAZwGPFdvur3+F1q3jlpaIiMSQevRlEWzYvpzfn9+wAf7+901AZ+Br4Csg\nE2/i3UGNGnkr3YmISGpQoS+LJJyId9NNG9i581zgW1/EgJqF2mRkwPjxcNhh8c5ORERiRYU+XM4l\n3US8V15Zz1tvZQPLfZE0YAItWlxLs2ZepH596NMHzj47QUmKiEhMqNCH6+efYePGwrHMTDjllMTk\nU4offlhDTk42sMIXSQdepk6dK5k719thTkREUpcKfbiC9eZbtvSKfTmzatUq2rTJJjd3tS+SAbwK\n9OTpp1XkRUQqAs26D1eS3J9fsWIF7dq1Z/v21b5IJeB1oCcdO2pnORGRikI9+nAlwf353Nxczjvv\nPDZsWOOLVMZbEOc8MjNh9GjtLCciUlGoRx+OvDz46qvAeDnr0WdkZNC9+/N4Bf4QYBZwHqCd5URE\nKhr16MOxfDns3Fk4Vr16uauc69fDSy+dC0zHWwGvA+DtLDdkSAITExGRuFOhD0ewYfusrHKxk0t+\nfj5pvjzuuAN27ADoVqjNmDHaWU5EpKJJfIVKJuVwIt6+ffD8859x9NEtGTHiJx5/HKZODWx33XVw\nzjnxz09ERBJLPfpwlLOJeNu3w5///B++/bY7sIs77sgGFgCNCrWrVw+GDUtEhiIikmjq0Ydq3z74\n+uvAeAJ79FddNY9vv+0G7PJFdgBbA9o9/TTUrh3PzEREpLxQoQ/VunVesfdXt663C0wCPP30HN55\n5y/Abl+kAfAhUHiFvk6doHfv+OYmIiLlhwp9qNatC4w1aZKQB9KnT3+bv/3tAmCPL3Ik3pD9SYXa\n1avnTcDTM/MiIhWX7tGHKlihb9gw7mlMmzaNyy67Auf2+yKNgXlAczp39oo7QNOm3gS85s3jnqKI\niJQjKvShWr8+MHbkkXFNYcqUKVx11VXk5eX5IkcD84EmtGkDs2dDenpcUxIRkXJOhT5UCe7Rf/jh\nh1x55ZXk5+f7Isfg9eQbkZ7uDdGryIuISFG6Rx+qBBf6du3acdpp5/tenYD/Y3QDB8Jpp8UtFRER\nSSIq9KEKNnQfx0K/c2cmP/00Bbgdb3b9EYA3H/DBB+OWhoiIJBkN3YcqWI8+jvfohwyBzZsrA88U\nio8cCdWqxS0NERFJMurRh8K5uA/dP/XUU9x3330ALFwIL7wQ2Oayy+Avf4lZCiIikgLUow/F1q2w\nZ0/h2CGHQM2aMfl2jz32GPfccw8AaWmZTJlyX0Cb6tXhmWcCwiIiIoWo0IeiuEfryrASza+/woIF\nB3aXK8w5x8yZDzFz5kMFsbFjP2DDhiFAZqG2TzwR96f7REQkCanQhyJKw/affQZduwYv8uCAe4DH\n/WLZbNgwg6JF/owzoH//sL+9iIhUQCr0oYhCof/9d+jVq6Qi/zfgab9YV2AacEihlhkZMHYspGl2\nhYiIhECFPhRReLTuoYfgp5+CHcnHe2TuOb/Y+cBUoEpA68GDoUWLsL61iIhUYCr0oYjw0bqvv/a2\nig2UDwwAxvrFLgFepehwPUCrVnD//SF/WxERERX6kEQwdJ+XBzfe6H32V7kyNGt2O99/f7DIN2vW\ni/btJ5GWVqlQWzM4+WS49lqoWjXs7EVEpAJToQ9FBIV+9Gj44ovA+L33Qvv2l9Ot2wR2797N1Vdf\nzYQJE8jI0F+JiIhEj6pKKEK8R791KwwaBHPnwt69XmzLlsC3nnAC3HknZGaezcyZM3njjTd49tln\nSdeuNCIiEmXmnEt0DlGXlZXlFi1aFJ2T7d/vjbMXvU579nhxP1ddBa++WvopFy6Es8+OTnoiIiJm\n9pVzLivYMT2kVZoNGwKLfN26AUV+yxaYMqWkE+0BruOyy1aoyIuISNyo0JcmxGH7t98OnHB30G7g\nQuBFPvkkm9WrV0cvPxERkRKo0JcmxEfrpk0r7gS7gL8A7/tOt4ZpxTcWERGJKhX60oQw4373bnj3\n3cBmr766gzZtuuHtH+95+OGHGThwYHRzFBERKYZm3ZcmhEL/wQfwxx+Fm9SqtY3hw7vxxRefF8SG\nDh3KnXfeGYssRUREglKhL00I9+inTy/aYDNpaV344ovFBZHhw4dzxx13RD8/ERGREqjQl6aUe/S5\nuTBjhv/BjUBnNm9eWhAZNWoUAwYMiFmKIiIixVGhL00pQ/cffwybNx94tRnoCHwHgJkxbtw4+vXr\nF+ssRUREgtJkvNKUMnRfeNi+BnASAGlpaUycOFFFXkREEko9+pLs3Ol9+KtUCerUAbx1dAoX+gzg\nFU4/HQYNuoRevXrFK1MREZGgVOhLUtz9+TRvIGTpUii69k16eiXeeec16ta12OcnIiJSCg3dl6SU\n+/Pjx/8f8ChwcIncc85BRV5ERMoN9ehLUsL9+WXLvuf557OBDcDveAXfuOiiOOYnIiJSCvXoS1LM\n0P2yZcto1649eXkbfMFngFUA9OgRt+xERERKpUJfkiCFfgnQvn1Hdu3a5ItUA2YDzWjXDpo0iWN+\nIiIipdDQfUmKDN1/CXQZN45tu3f7ItXxinw7zODJJ+Ocn4iISCkS3qM3s25mttzMVpjZXUGOVzaz\n13zHPzezpnFLzq9H/ylwLvgV+Zp4O9K1A+Cmm6Bt27hlJiIiEpKEFnozSwdGAucBJwJXmtmJRZr1\nA7Y65/4EDAeGxiO3bdtgx/+8Qr8Q6ALsKDhaG5gLtAHg8MPh8cfjkZWIiEh4Et2jbwOscM796Jzb\nB/wbKDqdrQcw0ff160AnM4v582tbfsun6vZfmIf3W8iugiN18badbVUQefZZqFEj1hmJiIiEL9GF\nviGwxu/1Wl8saBvnXC6wHagT68TSN28kgzwOBdJ9sQYYsAA4paBd9+7Qs2essxERESmbRBf6qDGz\nG81skZkt2rRpU+lvKEX6Bm/Yvg3wLnA88ALN8O4weKpWhZEjIfbjCyIiImWT6EK/DjjK73UjXyxo\nGzPLwNs5ZnMsKE82AAAIu0lEQVSRNjjnxjrnspxzWfXq1Ys4sYxfD6bRDvgGqESzgpiZV+SbNo34\nW4mIiMRMoh+v+xI4xsyOxivovYCrirSZAVyLN/G9JzDPOeeIscrtWvPqBZOpvnMdNXato/rOdeQ3\nOI0hbb2efJcucOaZsc5CREQkMgkt9M65XDO7FXgP71b4BOfct2b2MLDIOTcDeAF42cxWAFvwfhmI\nuVonN+TKGVcWirUAusXjm4uIiERJonv0OOfeAd4pErvf7+s9wGXxzktERCQVJPoevYiIiMSQCr2I\niEgKU6EXERFJYSr0IiIiKUyFXkREJIWp0IuIiKQwFXoREZEUpkIvIiKSwlToRUREUpgKvYiISApT\noRcREUlhKvQiIiIpTIVeREQkhanQi4iIpDAVehERkRRmzrlE5xB1ZrYJ+CmKp6wL/BbF81VUuo6R\n0zWMnK5h5HQNIxfta9jEOVcv2IGULPTRZmaLnHNZic4j2ek6Rk7XMHK6hpHTNYxcPK+hhu5FRERS\nmAq9iIhIClOhD83YRCeQInQdI6drGDldw8jpGkYubtdQ9+hFRERSmHr0IiIiKUyF3o+ZdTOz5Wa2\nwszuCnK8spm95jv+uZk1jX+W5VsI13CQmX1nZkvNbK6ZNUlEnuVZadfQr92lZubMTLOfgwjlOprZ\n5b6fx2/NbHK8cyzvQvj33NjM5pvZEt+/6e6JyLO8MrMJZrbRzL4p5riZ2bO+67vUzFrFJBHnnD68\n2xfpwEqgGZAJfA2cWKTNzcBo39e9gNcSnXd5+gjxGnYEqvq+HqBrGP419LU7DFgIfAZkJTrv8vYR\n4s/iMcASoJbvdf1E512ePkK8hmOBAb6vTwRWJzrv8vQBnAO0Ar4p5nh3YDZgwJnA57HIQz36g9oA\nK5xzPzrn9gH/BnoUadMDmOj7+nWgk5lZHHMs70q9hs65+c653b6XnwGN4pxjeRfKzyHAI8BQYE88\nk0sioVzHG4CRzrmtAM65jXHOsbwL5Ro6oLrv6xrA+jjmV+455xYCW0po0gOY5DyfATXN7Iho56FC\nf1BDYI3f67W+WNA2zrlcYDtQJy7ZJYdQrqG/fni/zcpBpV5D3/DeUc65t+OZWJIJ5WfxWOBYM/vY\nzD4zs25xyy45hHINHwT6mNla4B3gtvikljLC/T+zTDKifUKRUJhZHyALaJ/oXJKJmaUBTwM5CU4l\nFWTgDd93wBtZWmhmpzjntiU0q+RyJfCSc+4pM2sLvGxmJzvn8hOdmBykHv1B64Cj/F438sWCtjGz\nDLyhqs1xyS45hHINMbNzgXuAC51ze+OUW7Io7RoeBpwMfGhmq/Hu683QhLwAofwsrgVmOOf2O+dW\nAT/gFX7xhHIN+wFTAJxznwJV8NZwl9CE9H9mpFToD/oSOMbMjjazTLzJdjOKtJkBXOv7uicwz/lm\nVAgQwjU0s5bAGLwir3uigUq8hs657c65us65ps65pnjzHC50zi1KTLrlVij/nqfj9eYxs7p4Q/k/\nxjPJci6Ua/gz0AnAzE7AK/Sb4pplcpsBXOObfX8msN0590u0v4mG7n2cc7lmdivwHt5s0wnOuW/N\n7GFgkXNuBvAC3tDUCrwJFr0Sl3H5E+I1HAYcCkz1zWP82Tl3YcKSLmdCvIZSihCv43tAFzP7DsgD\n/u6c0widT4jXcDAwzswG4k3My1Hn5yAzexXvl8m6vnkMDwCVAJxzo/HmNXQHVgC7gb4xyUN/JyIi\nIqlLQ/ciIiIpTIVeREQkhanQi4iIpDAVehERkRSmQi8iIpLCVOhFpERmluPbJS8n0bmISPhU6EWk\nXDKzD81Mz/+KREgL5ohIaabhrcAX9RW7RCT2VOhFpETOue14OzWKSBLS0L1IBWNmTX333F8ys+PN\nbLqZbTGz383sIzPrUqR9oXv0ZlbFzLaZ2Ubf5k7BvsfzvvecXyTeycze9X2/vWb2g5k9YWY1iuaH\nb2dD33kOfHwY5cshkvJU6EUqrqOBT4HaeBsNTQVaA7PN7Iri3uSc2wO8BtQDzit63MwqA1cAvwLv\n+sX7A+8Df8bbUGY43p4RQ4BPzKymr+k24CHgJ9/rh/w+XirTn1SkAtNa9yIVjJk1BVb5Xj7pnPu7\n37EsvOK/C2jinNvh68m/CPR1zr3ka9cW+AR4wznXs8j5L8PbuvRp59xgX6wJ3jawe4E2zrn/+bUf\nBQwAxjnnbvSLfwi0d85ZtP7sIhWRevQiFdd24GH/gG+721eAmsDFxb3Rt/f4D8AFZla7yOEDWzlP\n9Iv1ATKB5/yLvM89wE7gat9ogIhEkQq9SMW12Dm3M0j8Q9/nlqW8fyJe8S7YrtnMGgBdgSXOuaV+\nbVv5Ps8rehLn3FZgCd5e5seHlLmIhEyFXqTi+rWY+Abf5xrFHD9gEpDPwR48QG+8p3kmFml74FzF\nPaJ3IF6zmOMiUkYq9CIVV4Ni4of7Ppf4SJ1zbi1eD72NmR3oiV8L7AcmF2l+4FyHE9wRoXxPEQmf\nCr1IxdXKzA4LEu/g+7wkhHO85Pt8rZmdBrQAZjvnNhVpd+BcHYrE8c22Pw3YA3zvdyjPdzw9hDxE\npBgq9CIVVw3gfv+Ab9Z9b7ye9bQQzvEmsANvsl2OL/ZSkHb/wuvp32Zmfypy7BGgOvAv59xev/hm\n3+fGIeQhIsXQyngiFddC4HozOwP4GG/4/Aq8DkB/59yO0k7gnPvDzKYC/YCb8Yrz20HarTazO4CR\nwGIzmwJswlsUpy3wP7zn6f3NBS4D3jSzd4A/gJ+ccy+X5Q8rUlGpRy9Sca0C2gFbgZuAy4HFQHfn\n3GthnOcl3+dKwKvOuX3BGjnnRuHNyP8MuBQYBNQHhgFtnXNbirxlPPA43sjDnXg9/35h5CUiaMEc\nkQrHb8Gcic65nIQmIyIxpx69iIhIClOhFxERSWEq9CIiIilM9+hFRERSmHr0IiIiKUyFXkREJIWp\n0IuIiKQwFXoREZEUpkIvIiKSwlToRUREUtj/B29qUHmfUqIQAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure(figsize=(8, 8))\n", + "results = pd.concat(dfs)\n", + "pivot_plot(results, fig=fig);" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-slideshow", + "formats": "ipynb,Rmd" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/learning/Learning1.Rmd b/doc/source/learning/Learning1.Rmd deleted file mode 100644 index 359cbe982..000000000 --- a/doc/source/learning/Learning1.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -jupyter: - jupytext: - cell_metadata_filter: all,-slideshow - formats: ipynb,Rmd - text_representation: - extension: .Rmd - format_name: rmarkdown - format_version: '1.1' - jupytext_version: 1.1.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - -# Learning 1 - -```{python} -import numpy as np -print('notebook 1') -``` - -```{python collapsed=TRUE} - -``` diff --git a/doc/source/learning/Learning1.ipynb b/doc/source/learning/Learning1.ipynb deleted file mode 100644 index 6ead7af9e..000000000 --- a/doc/source/learning/Learning1.ipynb +++ /dev/null @@ -1,63 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Learning 1" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "notebook 1\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "print('notebook 1')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "all,-slideshow", - "formats": "ipynb,Rmd" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/doc/source/learning/Learning2.Rmd b/doc/source/learning/Learning2.Rmd deleted file mode 100644 index aca1f8f4d..000000000 --- a/doc/source/learning/Learning2.Rmd +++ /dev/null @@ -1,26 +0,0 @@ ---- -jupyter: - jupytext: - cell_metadata_filter: all,-slideshow - formats: ipynb,Rmd - text_representation: - extension: .Rmd - format_name: rmarkdown - format_version: '1.1' - jupytext_version: 1.1.1 - kernelspec: - display_name: Python 3 - language: python - name: python3 ---- - -# Learning 2 - -```{python} -import numpy as np -print('notebook 2') -``` - -```{python collapsed=TRUE} - -``` diff --git a/doc/source/learning/Learning2.ipynb b/doc/source/learning/Learning2.ipynb deleted file mode 100644 index 66c0d95dc..000000000 --- a/doc/source/learning/Learning2.ipynb +++ /dev/null @@ -1,63 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Learning 2" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "notebook 2\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "print('notebook 2')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "all,-slideshow", - "formats": "ipynb,Rmd" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/doc/source/learning/index.rst b/doc/source/learning/index.rst index 8a74213aa..7ee107c8a 100644 --- a/doc/source/learning/index.rst +++ b/doc/source/learning/index.rst @@ -8,5 +8,5 @@ case considered above. .. toctree:: :maxdepth: 2 - Learning1.ipynb - Learning2.ipynb \ No newline at end of file + Basic_example.ipynb + Full_model_LASSO.ipynb \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3ab08e8a6..c08d325af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,8 @@ pandas mpmath pyinter sklearn -regreg +git+https://github.com/jonathan-taylor/regreg # keras # tensorflow traitlets +scikit-learn diff --git a/selectinf/algorithms/api.py b/selectinf/algorithms/api.py index cf5391f1c..f15caa897 100644 --- a/selectinf/algorithms/api.py +++ b/selectinf/algorithms/api.py @@ -1,4 +1,5 @@ from .lasso import (lasso, + ROSI, data_carving as data_carving_lasso, additive_noise as additive_noise_lasso) diff --git a/selectinf/algorithms/barrier_affine.py b/selectinf/algorithms/barrier_affine.py new file mode 100644 index 000000000..88812c278 --- /dev/null +++ b/selectinf/algorithms/barrier_affine.py @@ -0,0 +1,139 @@ +import numpy as np + +def solve_barrier_affine_py(conjugate_arg, + precision, + feasible_point, + con_linear, + con_offset, + step=1, + nstep=1000, + min_its=200, + tol=1.e-10): + + scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) + + if feasible_point is None: + feasible_point = 1. / scaling + + objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. \ + + np.log(1.+ 1./((con_offset - con_linear.dot(u))/ scaling)).sum() + grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot(1./(scaling + con_offset - con_linear.dot(u)) - + 1./(con_offset - con_linear.dot(u))) + barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1./((scaling + con_offset-con_linear.dot(u))**2.) + + 1./((con_offset-con_linear.dot(u))**2.))).dot(con_linear) + + current = feasible_point + current_value = np.inf + + for itercount in range(nstep): + cur_grad = grad(current) + + # make sure proposal is feasible + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + if np.all(con_offset-con_linear.dot(proposal) > 0): + break + step *= 0.5 + if count >= 40: + raise ValueError('not finding a feasible point') + + # make sure proposal is a descent + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + proposed_value = objective(proposal) + if proposed_value <= current_value: + break + step *= 0.5 + if count >= 20: + if not (np.isnan(proposed_value) or np.isnan(current_value)): + break + else: + raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) + + # stop if relative decrease is small + + if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: + current = proposal + current_value = proposed_value + break + + current = proposal + current_value = proposed_value + + if itercount % 4 == 0: + step *= 2 + + hess = np.linalg.inv(precision + barrier_hessian(current)) + return current_value, current, hess + +def solve_barrier_nonneg(conjugate_arg, + precision, + feasible_point=None, + step=1, + nstep=1000, + tol=1.e-8): + + scaling = np.sqrt(np.diag(precision)) + + if feasible_point is None: + feasible_point = 1. / scaling + + objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u) / 2. + np.log( + 1. + 1. / (u / scaling)).sum() + grad = lambda u: -conjugate_arg + precision.dot(u) + (1. / (scaling + u) - 1. / u) + barrier_hessian = lambda u: (-1. / ((scaling + u) ** 2.) + 1. / (u ** 2.)) + + current = feasible_point + current_value = np.inf + + for itercount in range(nstep): + cur_grad = grad(current) + + # make sure proposal is feasible + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + if np.all(proposal > 0): + break + step *= 0.5 + if count >= 40: + raise ValueError('not finding a feasible point') + + # make sure proposal is a descent + + count = 0 + while True: + proposal = current - step * cur_grad + proposed_value = objective(proposal) + if proposed_value <= current_value: + break + step *= 0.5 + if count >= 20: + if not (np.isnan(proposed_value) or np.isnan(current_value)): + break + else: + raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) + + # stop if relative decrease is small + + if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value): + current = proposal + current_value = proposed_value + break + + current = proposal + current_value = proposed_value + + if itercount % 4 == 0: + step *= 2 + + hess = np.linalg.inv(precision + np.diag(barrier_hessian(current))) + return current_value, current, hess diff --git a/selectinf/algorithms/cox_utils.pyx b/selectinf/algorithms/cox_utils.pyx new file mode 100644 index 000000000..01244835b --- /dev/null +++ b/selectinf/algorithms/cox_utils.pyx @@ -0,0 +1,245 @@ +import warnings +import numpy as np, cython +cimport numpy as cnp + +DTYPE_float = np.float +ctypedef cnp.float_t DTYPE_float_t +DTYPE_int = np.int +ctypedef cnp.int_t DTYPE_int_t +ctypedef cnp.intp_t DTYPE_intp_t + +cdef extern from "cox_fns.h": + + void _update_cox_exp(double *linear_pred_ptr, # Linear term in objective + double *exp_ptr, # stores exp(eta) + double *exp_accum_ptr, # inner accumulation vector + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmin_ptr, # 0-based ranking with min tie breaking + long ncase # how many subjects / times + ); + + void _update_cox_expZ(double *linear_pred_ptr, # Linear term in objective + double *right_vector_ptr, # Linear term in objective + double *exp_ptr, # stores exp(eta) + double *expZ_accum_ptr, # inner accumulation vector + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmin_ptr, # 0-based ranking with min tie breaking + long ncase # how many subjects / times + ); + + void _update_outer_1st(double *linear_pred_ptr, # Linear term in objective + double *exp_accum_ptr, # inner accumulation vector + double *outer_accum_1st_ptr, # outer accumulation vector + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmin_ptr, # 0-based ranking with min tie breaking + long ncase # how many subjects / times + ); + + void _update_outer_2nd(double *linear_pred_ptr, # Linear term in objective + double *exp_accum_ptr, # inner accumulation vector Ze^{\eta} + double *expZ_accum_ptr, # inner accumulation vector e^{\eta} + double *outer_accum_2nd_ptr, # outer accumulation vector + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmin_ptr, # 0-based ranking with min tie breaking + long ncase # how many subjects / times + ); + + double _cox_objective(double *linear_pred_ptr, # Linear term in objective + double *inner_accum_ptr, # inner accumulation vector + double *outer_accum_1st_ptr, # outer accumulation vector + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmin_ptr, # 0-based ranking with min tie breaking + long *rankmax_ptr, # 0-based ranking with max tie breaking + long ncase # how many subjects / times + ); + + void _cox_gradient(double *gradient_ptr, # Where gradient is stored + double *exp_ptr, # stores exp(eta) + double *outer_accum_1st_ptr, # outer accumulation vector + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmin_ptr, # 0-based ranking with min tie breaking + long *rankmax_ptr, # 0-based ranking with max tie breaking + long ncase # how many subjects / times + ); + + void _cox_hessian(double *hessian_ptr, # Where hessian is stored + double *exp_ptr, # stores exp(eta) + double *right_vector_ptr, # Right vector in Hessian + double *outer_accum_1st_ptr, # outer accumulation vector used in outer prod "mean" + double *outer_accum_2nd_ptr, # outer accumulation vector used in "2nd" moment + double *case_weight_ptr, # case weights + long *censoring_ptr, # censoring indicator + long *ordering_ptr, # 0-based ordering of times + long *rankmax_ptr, # 0-based ranking with max tie breaking + long ncase # how many subjects / times + ); + +def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred, + cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer, + cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] case_weight, + cnp.ndarray[DTYPE_int_t, ndim=1] censoring, + cnp.ndarray[DTYPE_int_t, ndim=1] ordering, + cnp.ndarray[DTYPE_int_t, ndim=1] rankmin, + cnp.ndarray[DTYPE_int_t, ndim=1] rankmax, + long ncase): + + _update_cox_exp(linear_pred.data, + exp_buffer.data, + exp_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _update_outer_1st(linear_pred.data, + exp_accum.data, + outer_1st_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + return _cox_objective(linear_pred.data, + exp_accum.data, + outer_1st_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + rankmax.data, + ncase) + +def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient, + cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred, + cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer, + cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] case_weight, + cnp.ndarray[DTYPE_int_t, ndim=1] censoring, + cnp.ndarray[DTYPE_int_t, ndim=1] ordering, + cnp.ndarray[DTYPE_int_t, ndim=1] rankmin, + cnp.ndarray[DTYPE_int_t, ndim=1] rankmax, + long ncase): + """ + Compute Cox partial likelihood gradient in place. + """ + + # this computes e^{\eta} and stores cumsum at rankmin + + _update_cox_exp(linear_pred.data, + exp_buffer.data, + exp_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _update_outer_1st(linear_pred.data, + exp_accum.data, + outer_1st_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _cox_gradient(gradient.data, + exp_buffer.data, + outer_1st_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + rankmax.data, + ncase) + + return gradient + +def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian, + cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred, + cnp.ndarray[DTYPE_float_t, ndim=1] right_vector, + cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer, + cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] expZ_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] outer_2nd_accum, + cnp.ndarray[DTYPE_float_t, ndim=1] case_weight, + cnp.ndarray[DTYPE_int_t, ndim=1] censoring, + cnp.ndarray[DTYPE_int_t, ndim=1] ordering, + cnp.ndarray[DTYPE_int_t, ndim=1] rankmin, + cnp.ndarray[DTYPE_int_t, ndim=1] rankmax, + long ncase): + """ + Compute Cox partial likelihood gradient in place. + """ + + # this computes e^{\eta} and stores cumsum at rankmin, stored in outer_accum_1st + + _update_cox_exp(linear_pred.data, + exp_buffer.data, + exp_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _update_outer_1st(linear_pred.data, + exp_accum.data, + outer_1st_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _update_cox_expZ(linear_pred.data, + right_vector.data, + exp_buffer.data, + expZ_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _update_outer_2nd(linear_pred.data, + exp_accum.data, + expZ_accum.data, + outer_2nd_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmin.data, + ncase) + + _cox_hessian(hessian.data, + exp_buffer.data, + right_vector.data, + outer_1st_accum.data, + outer_2nd_accum.data, + case_weight.data, + censoring.data, + ordering.data, + rankmax.data, + ncase) + + return hessian + diff --git a/selectinf/algorithms/lasso.py b/selectinf/algorithms/lasso.py index fa35e5ed2..26c8cb41b 100644 --- a/selectinf/algorithms/lasso.py +++ b/selectinf/algorithms/lasso.py @@ -21,7 +21,6 @@ from regreg.api import (glm, weighted_l1norm, simple_problem, - coxph as coxph_obj, smooth_sum, squared_error, identity_quadratic, @@ -243,6 +242,7 @@ def summary(self, alternative='twosided', level=0.95, compute_intervals=False, + dispersion=None, truth=None): """ Summary table for inference adjusted for selection. @@ -259,6 +259,9 @@ def summary(self, compute_intervals : bool Should we compute confidence intervals? + dispersion : float + Scalar to multiply `self.constraints.covaraince` + truth : np.array True values of each beta for selected variables. If not None, a column 'pval' are p-values computed under these corresponding null hypotheses. @@ -276,9 +279,14 @@ def summary(self, if truth is None: truth = np.zeros_like(self.active_signs) + if dispersion is None: + dispersion = 1. + result = [] - C = self._constraints + C = self.constraints if C is not None: + _cov = C.covariance.copy() + C.covariance = _cov * dispersion one_step = self.onestep_estimator for i in range(one_step.shape[0]): eta = np.zeros_like(one_step) @@ -297,7 +305,8 @@ def summary(self, if compute_intervals: if C.linear_part.shape[0] > 0: # there were some constraints try: - _interval = C.interval(eta, one_step, + _interval = C.interval(eta, + one_step, alpha=alpha) except OverflowError: _interval = (-np.inf, np.inf) @@ -321,10 +330,11 @@ def summary(self, lower_trunc, upper_trunc, sd)) - + C.covariance = _cov + df = pd.DataFrame(index=self.active, data=dict([(n, d) for n, d in zip(['variable', - 'pval', + 'pvalue', 'lasso', 'onestep', 'lower_confidence', @@ -470,13 +480,13 @@ def logistic(klass, covariance_estimator=covariance_estimator) @classmethod - def coxph(klass, - X, - times, - status, - feature_weights, - covariance_estimator=None, - quadratic=None): + def cox(klass, + X, + times, + status, + feature_weights, + covariance_estimator=None, + quadratic=None): r""" Cox proportional hazards LASSO with feature weights. Objective function is @@ -521,7 +531,7 @@ def coxph(klass, coordinates of the gradient of the likelihood at the unpenalized estimator. """ - loglike = coxph_obj(X, times, status, quadratic=quadratic) + loglike = glm.cox(X, times, status, quadratic=quadratic) return klass(loglike, feature_weights, covariance_estimator=covariance_estimator) @@ -1003,14 +1013,14 @@ def poisson(klass, return klass(loglike1, loglike2, loglike, feature_weights) @classmethod - def coxph(klass, - X, - times, - status, - feature_weights, - split_frac=0.9, - sigma=1., - stage_one=None): + def cox(klass, + X, + times, + status, + feature_weights, + split_frac=0.9, + sigma=1., + stage_one=None): n, p = X.shape if stage_one is None: @@ -1025,9 +1035,9 @@ def coxph(klass, times1, X1, status1 = times[stage_one], X[stage_one], status[stage_one] times2, X2, status2 = times[stage_two], X[stage_two], status[stage_two] - loglike = coxph_obj(X, times, status) - loglike1 = coxph_obj(X1, times1, status1) - loglike2 = coxph_obj(X2, times2, status2) + loglike = glm.cox(X, times, status) + loglike1 = glm.cox(X1, times1, status1) + loglike2 = glm.cox(X2, times2, status2) return klass(loglike1, loglike2, loglike, feature_weights) @@ -1488,7 +1498,7 @@ def _data_carving_deprec(X, y, splitting_pvalues, splitting_intervals), L else: - pvalues = [p for _, p in L.summary("twosided")['pval']] + pvalues = [p for _, p in L.summary("twosided")['pvalue']] intervals = np.array([L.intervals['lower'], L.intervals['upper']]).T if splitting: splitting_pvalues = np.random.sample(len(pvalues)) @@ -1878,7 +1888,8 @@ def fit(self, # Needed for finding truncation intervals - self._Qbeta_bar = X.T.dot(W * X.dot(lasso_solution)) - self.loglike.smooth_objective(lasso_solution, 'grad') + self._Qbeta_bar = (X.T.dot(W * X.dot(lasso_solution)) - + self.loglike.smooth_objective(lasso_solution, 'grad')) self._W = W if n > p and self.approximate_inverse is None: @@ -1957,14 +1968,14 @@ def summary(self, Estimate of dispersion. Defaults to a Pearson's X^2 estimate in the relaxed model. truth : np.array - True values of each beta for selected variables. If not None, a column 'pval' are p-values + True values of each beta for selected variables. If not None, a column 'pvalue' are p-values computed under these corresponding null hypotheses. Returns ------- pval_summary : np.recarray Array with one entry per active variable. - Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'. + Columns are 'variable', 'pvalue', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'. """ if len(self.active) > 0: @@ -2009,7 +2020,7 @@ def summary(self, df = pd.DataFrame(index=self.active, data=dict([(n, d) for n, d in zip(['variable', - 'pval', + 'pvalue', 'lasso', 'onestep', 'sd', @@ -2311,7 +2322,8 @@ def fit(self, self.inactive = np.arange(lasso_solution.shape[0]) return self.lasso_solution - def summary(self, level=0.05, + def summary(self, + level=0.95, compute_intervals=False, dispersion=None): """ @@ -2328,7 +2340,7 @@ def summary(self, level=0.05, ------- pval_summary : np.recarray Array with one entry per active variable. - Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'. + Columns are 'variable', 'pvalue', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'. """ if len(self.active) > 0: @@ -2366,7 +2378,7 @@ def summary(self, level=0.05, df = pd.DataFrame(index=self.active, data=dict([(n, d) for n, d in zip(['variable', - 'pval', + 'pvalue', 'lasso', 'onestep', 'sd', diff --git a/selectinf/algorithms/sqrt_lasso.py b/selectinf/algorithms/sqrt_lasso.py index e29409892..8bb03c5ef 100644 --- a/selectinf/algorithms/sqrt_lasso.py +++ b/selectinf/algorithms/sqrt_lasso.py @@ -10,6 +10,7 @@ # regreg http://github.com/regreg import regreg.api as rr +from regreg.atoms.mixed_lasso import NONNEGATIVE import regreg.affine as ra from regreg.smooth.glm import gaussian_loglike from regreg.affine import astransform @@ -427,7 +428,7 @@ def solve_sqrt_lasso_skinny(X, Y, weights=None, initial=None, quadratic=None, so weights = lam * np.ones((p,)) weight_dict = dict(zip(np.arange(p), 2 * weights)) - penalty = rr.mixed_lasso(list(np.arange(p)) + [rr.NONNEGATIVE], lagrange=1., + penalty = rr.mixed_lasso(list(np.arange(p)) + [NONNEGATIVE], lagrange=1., weights=weight_dict) loss = sqlasso_objective_skinny(X, Y) diff --git a/selectinf/algorithms/tests/test_ROSI.py b/selectinf/algorithms/tests/test_ROSI.py index 886648221..9629de691 100644 --- a/selectinf/algorithms/tests/test_ROSI.py +++ b/selectinf/algorithms/tests/test_ROSI.py @@ -103,7 +103,7 @@ def test_modelQ(): LX.fit() SX = LX.summary(dispersion=1) - np.testing.assert_allclose(S['pval'], SX['pval'], rtol=1.e-5, atol=1.e-4) + np.testing.assert_allclose(S['pvalue'], SX['pvalue'], rtol=1.e-5, atol=1.e-4) diff --git a/selectinf/algorithms/tests/test_compareR.py b/selectinf/algorithms/tests/test_compareR.py index d7ef21b76..81b01d877 100644 --- a/selectinf/algorithms/tests/test_compareR.py +++ b/selectinf/algorithms/tests/test_compareR.py @@ -3,6 +3,7 @@ import numpy as np, pandas as pd import regreg.api as rr import nose.tools as nt +from numpy.testing import dec try: import rpy2.robjects as rpy @@ -22,7 +23,7 @@ from ...randomized.lasso import lasso as rlasso, selected_targets, full_targets, debiased_targets from ...tests.instance import gaussian_instance, logistic_instance -@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test") +@dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test") def test_fixed_lambda(): """ Check that Gaussian LASSO results agree with R @@ -86,7 +87,7 @@ def test_fixed_lambda(): yield np.testing.assert_allclose, L.fit()[1:], beta_hat, 1.e-2, 1.e-2, False, 'fixed lambda, sigma=%f coef' % s yield np.testing.assert_equal, L.active, selected_vars - yield np.testing.assert_allclose, S['pval'], R_pvals, tol, tol, False, 'fixed lambda, sigma=%f pval' % s + yield np.testing.assert_allclose, S['pvalue'], R_pvals, tol, tol, False, 'fixed lambda, sigma=%f pval' % s yield np.testing.assert_allclose, S['sd'], sdvar, tol, tol, False, 'fixed lambda, sigma=%f sd ' % s yield np.testing.assert_allclose, S['onestep'], coef, tol, tol, False, 'fixed lambda, sigma=%f estimator' % s @@ -240,7 +241,7 @@ def test_coxph(): beta_hat = np.asarray(rpy.r('beta_hat')) x = np.asarray(rpy.r('x')) - L = lasso.coxph(x, tim, status, 1.5) + L = lasso.cox(x, tim, status, 1.5) beta2 = L.fit() G1 = L.loglike.gradient(beta_hat) @@ -251,7 +252,7 @@ def test_coxph(): yield np.testing.assert_equal, np.array(L.active) + 1, selected_vars yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'cox coeff' - yield np.testing.assert_allclose, L.summary('onesided')['pval'], R_pvals, tol, tol, False, 'cox pvalues' + yield np.testing.assert_allclose, L.summary('onesided')['pvalue'], R_pvals, tol, tol, False, 'cox pvalues' @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test") def test_logistic(): @@ -310,7 +311,7 @@ def test_logistic(): yield np.testing.assert_equal, L.active[1:], selected_vars yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'logistic coef' - yield np.testing.assert_allclose, L.summary('onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues' + yield np.testing.assert_allclose, L.summary('onesided')['pvalue'][1:], R_pvals, tol, tol, False, 'logistic pvalues' @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test") @@ -553,8 +554,8 @@ def test_liu_gaussian(): active_set = rpy.r('active_vars') print(pvalues) - print(S['pval']) - nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999) + print(S['pvalue']) + nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999) numpy2ri.deactivate() break @@ -609,8 +610,8 @@ def test_liu_logistic(): pvalues = pvalues[~np.isnan(pvalues)] active_set = rpy.r('active_vars') print(pvalues) - print(S['pval']) - nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999) + print(S['pvalue']) + nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999) numpy2ri.deactivate() break @@ -668,9 +669,9 @@ def test_ROSI_gaussian_JM(): active_set = rpy.r('active_vars') print(pvalues) - print(np.asarray(S['pval'])) + print(np.asarray(S['pvalue'])) - nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999) + nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999) numpy2ri.deactivate() break @@ -723,9 +724,9 @@ def test_ROSI_logistic_JM(): active_set = rpy.r('active_vars') print(pvalues) - print(np.asarray(S['pval'])) + print(np.asarray(S['pvalue'])) - nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999) + nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999) numpy2ri.deactivate() break @@ -789,9 +790,9 @@ def test_ROSI_gaussian_BN(): active_set = rpy.r('active_vars') print(pvalues) - print(np.asarray(S['pval'])) + print(np.asarray(S['pvalue'])) - nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999) + nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999) numpy2ri.deactivate() break @@ -845,9 +846,9 @@ def test_ROSI_logistic_BN(): active_set = rpy.r('active_vars') print(pvalues) - print(np.asarray(S['pval'])) + print(np.asarray(S['pvalue'])) - nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999) + nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999) numpy2ri.deactivate() break @@ -874,7 +875,7 @@ def test_rlasso_gaussian(): random_signs=True) sigma_ = np.std(y) - if target is not 'debiased': + if target != 'debiased': lam = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_ else: lam = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ @@ -971,21 +972,45 @@ def test_rlasso_gaussian(): nonzero, penalty=L.penalty) - _, pval, intervals = L.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - opt_sample=(np.asarray(R_opt_samples),), - target_sample=np.asarray(R_target_samples), - ndraw=8000,#ndraw, - burnin=burnin, - compute_intervals=True) + result = L.summary(observed_target, + cov_target, + cov_target_score, + alternatives, + opt_sample=(np.asarray(R_opt_samples),), + target_sample=np.asarray(R_target_samples), + ndraw=8000,#ndraw, + burnin=burnin, + compute_intervals=True) + pval = np.asarray(result['pvalue']) tol = 1.e-5 - yield np.testing.assert_allclose, initial_soln, R_soln, tol, tol, False, 'checking initial rlasso solution' - yield np.testing.assert_allclose, cond_mean, R_cond_mean, tol, tol, False, 'checking conditional mean' - yield np.testing.assert_allclose, cond_cov, R_cond_cov, tol, tol, False, 'checking conditional covariance' - yield np.testing.assert_allclose, pval, R_pvalues, tol, tol, False, 'checking pvalues' + yield (np.testing.assert_allclose, + initial_soln, + R_soln, + tol, + tol, + False, 'checking initial rlasso solution') + yield (np.testing.assert_allclose, + cond_mean, + R_cond_mean, + tol, + tol, + False, + 'checking conditional mean') + yield (np.testing.assert_allclose, + cond_cov, + R_cond_cov, + tol, + tol, + False, + 'checking conditional covariance') + yield (np.testing.assert_allclose, + pval, + R_pvalues, + tol, + tol, + False, + 'checking pvalues') break diff --git a/selectinf/algorithms/tests/test_covtest.py b/selectinf/algorithms/tests/test_covtest.py index 9f0a2c2be..f80981659 100644 --- a/selectinf/algorithms/tests/test_covtest.py +++ b/selectinf/algorithms/tests/test_covtest.py @@ -1,7 +1,6 @@ import itertools import numpy as np -import numpy.testing.decorators as dec from ...tests.instance import gaussian_instance from ...tests.flags import SET_SEED, SMALL_SAMPLES diff --git a/selectinf/algorithms/tests/test_debiased_lasso.py b/selectinf/algorithms/tests/test_debiased_lasso.py index 51eb94f94..161cb5196 100644 --- a/selectinf/algorithms/tests/test_debiased_lasso.py +++ b/selectinf/algorithms/tests/test_debiased_lasso.py @@ -1,6 +1,5 @@ import numpy as np import nose.tools as nt -import numpy.testing.decorators as dec from ...tests.instance import gaussian_instance as instance diff --git a/selectinf/algorithms/tests/test_lasso.py b/selectinf/algorithms/tests/test_lasso.py index d5ab6a38c..a64bd869d 100644 --- a/selectinf/algorithms/tests/test_lasso.py +++ b/selectinf/algorithms/tests/test_lasso.py @@ -1,6 +1,5 @@ import numpy as np, pandas as pd import nose.tools as nt -import numpy.testing.decorators as dec from itertools import product from ...tests.flags import SMALL_SAMPLES @@ -30,6 +29,7 @@ except ImportError: statsmodels_available = False +@set_seed_iftrue(True) def test_gaussian(n=100, p=20): y = np.random.standard_normal(n) @@ -64,6 +64,7 @@ def test_gaussian(n=100, p=20): np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset) +@set_seed_iftrue(True) def test_sqrt_lasso(n=100, p=20): y = np.random.standard_normal(n) @@ -91,7 +92,7 @@ def test_sqrt_lasso(n=100, p=20): np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset) - +@set_seed_iftrue(True) def test_logistic(): for Y, T in [(np.random.binomial(1,0.5,size=(10,)), @@ -114,10 +115,11 @@ def test_logistic(): np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset) - P = L.summary()['pval'] + P = L.summary()['pvalue'] return L, C, P +@set_seed_iftrue(True) def test_poisson(): X = np.random.standard_normal((10,5)) @@ -135,11 +137,12 @@ def test_poisson(): np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset) - P = L.summary()['pval'] + P = L.summary()['pvalue'] return L, C, P -@dec.skipif(not statsmodels_available, "needs statsmodels") +@set_seed_iftrue(True) +@np.testing.dec.skipif(not statsmodels_available, "needs statsmodels") def test_coxph(): Q = rr.identity_quadratic(0.01, 0, np.ones(5), 0) @@ -147,10 +150,10 @@ def test_coxph(): T = np.random.standard_exponential(100) S = np.random.binomial(1, 0.5, size=(100,)) - L = lasso.coxph(X, T, S, 0.1, quadratic=Q) + L = lasso.cox(X, T, S, 0.1, quadratic=Q) L.fit() - L = lasso.coxph(X, T, S, 0.1, quadratic=Q) + L = lasso.cox(X, T, S, 0.1, quadratic=Q) L.fit() C = L.constraints @@ -159,7 +162,7 @@ def test_coxph(): np.dot(L.constraints.linear_part, L.onestep_estimator), L.constraints.offset) - P = L.summary()['pval'] + P = L.summary()['pvalue'] return L, C, P @@ -446,7 +449,7 @@ def test_data_carving_poisson(n=500, @wait_for_return_value() @set_seed_iftrue(True) -@dec.skipif(not statsmodels_available, "needs statsmodels") +@np.testing.dec.skipif(not statsmodels_available, "needs statsmodels") @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10) def test_data_carving_coxph(n=400, p=20, @@ -474,14 +477,14 @@ def test_data_carving_coxph(n=400, lam_theor = 10. * np.ones(p) lam_theor[0] = 0. - DC = data_carving.coxph(X, T, S, feature_weights=lam_theor, - stage_one=stage_one) + DC = data_carving.cox(X, T, S, feature_weights=lam_theor, + stage_one=stage_one) DC.fit() if len(DC.active) < n - int(n*split_frac): - DS = data_splitting.coxph(X, T, S, feature_weights=lam_theor, - stage_one=stage_one) + DS = data_splitting.cox(X, T, S, feature_weights=lam_theor, + stage_one=stage_one) DS.fit(use_full_cov=True) data_split = True else: @@ -540,7 +543,7 @@ def test_gaussian_pvals(n=100, if set(true_active).issubset(L.active): S = L.summary('onesided') S = L.summary('twosided') - return S['pval'], [v in true_active for v in S['variable']] + return S['pvalue'], [v in true_active for v in S['variable']] @wait_for_return_value() def test_sqrt_lasso_pvals(n=100, @@ -569,7 +572,7 @@ def test_sqrt_lasso_pvals(n=100, if set(true_active).issubset(L.active): S = L.summary('onesided') S = L.summary('twosided') - return S['pval'], [v in true_active for v in S['variable']] + return S['pvalue'], [v in true_active for v in S['variable']] @wait_for_return_value() @@ -601,7 +604,7 @@ def test_sqrt_lasso_sandwich_pvals(n=200, if set(true_active).issubset(L_SQ.active): S = L_SQ.summary('twosided') - return S['pval'], [v in true_active for v in S['variable']] + return S['pvalue'], [v in true_active for v in S['variable']] @wait_for_return_value() def test_gaussian_sandwich_pvals(n=200, @@ -651,13 +654,13 @@ def test_gaussian_sandwich_pvals(n=200, if set(true_active).issubset(L_P.active): S = L_P.summary('twosided') - P_P = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active] + P_P = [p for p, v in zip(S['pvalue'], S['variable']) if v not in true_active] L_S = lasso.gaussian(X, y, feature_weights, covariance_estimator=sandwich) L_S.fit() S = L_S.summary('twosided') - P_S = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active] + P_S = [p for p, v in zip(S['pvalue'], S['variable']) if v not in true_active] return P_P, P_S, [v in true_active for v in S['variable']] @@ -690,7 +693,7 @@ def test_logistic_pvals(n=500, print(true_active, L.active) if set(true_active).issubset(L.active): - return S['pval'], [v in true_active for v in S['variable']] + return S['pvalue'], [v in true_active for v in S['variable']] @set_seed_iftrue(True) def test_adding_quadratic_lasso(): diff --git a/selectinf/algorithms/tests/test_softmax.py b/selectinf/algorithms/tests/test_softmax.py index 329f847d5..1f6e64664 100644 --- a/selectinf/algorithms/tests/test_softmax.py +++ b/selectinf/algorithms/tests/test_softmax.py @@ -1,9 +1,8 @@ import numpy as np -import numpy.testing.decorators as dec from itertools import product from ..softmax import softmax_objective -@dec.skipif(True, "need some tests for softmax objective") +@np.testing.dec.skipif(True, "need some tests for softmax objective") def test_softmax(): raise ValueError('need some tests for softmax objective') diff --git a/selectinf/algorithms/tests/test_sqrt_lasso.py b/selectinf/algorithms/tests/test_sqrt_lasso.py index 0d05495d1..86edb6078 100644 --- a/selectinf/algorithms/tests/test_sqrt_lasso.py +++ b/selectinf/algorithms/tests/test_sqrt_lasso.py @@ -1,7 +1,6 @@ from __future__ import division import numpy as np -import numpy.testing.decorators as dec import nose.tools as nt import regreg.api as rr @@ -23,7 +22,7 @@ @wait_for_return_value() @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, burnin=10, ndraw=10) -@dec.slow +@np.testing.dec.slow def test_goodness_of_fit(n=20, p=25, s=10, sigma=20., nsim=10, burnin=2000, ndraw=8000): P = [] diff --git a/selectinf/base.py b/selectinf/base.py index dc6db4230..d2b9d9a1b 100644 --- a/selectinf/base.py +++ b/selectinf/base.py @@ -1,6 +1,13 @@ +from typing import NamedTuple + +import numpy as np + import regreg.api as rr import regreg.affine as ra +from .algorithms.debiased_lasso import (debiasing_matrix, + pseudoinverse_debiasing_matrix) + def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}): """ Fit a restricted model using only columns `active`. @@ -35,3 +42,231 @@ def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}): beta_E = loss_restricted.solve(**solve_args) return beta_E + + +# functions construct targets of inference +# and covariance with score representation + +class TargetSpec(NamedTuple): + + observed_target : np.ndarray + cov_target : np.ndarray + regress_target_score : np.ndarray + alternatives : list + +def selected_targets(loglike, + solution, + features=None, + sign_info={}, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 100}, + hessian=None): + + if features is None: + features = solution != 0 + + X, y = loglike.data + n, p = X.shape + + observed_target = restricted_estimator(loglike, features, solve_args=solve_args) + linpred = X[:, features].dot(observed_target) + + Hfeat = _compute_hessian(loglike, + solution, + features)[1] + Qfeat = Hfeat[features] + _score_linear = -Hfeat + + cov_target = np.linalg.inv(Qfeat) + crosscov_target_score = _score_linear.dot(cov_target) + alternatives = ['twosided'] * features.sum() + features_idx = np.arange(p)[features] + + for i in range(len(alternatives)): + if features_idx[i] in sign_info.keys(): + alternatives[i] = sign_info[features_idx[i]] + + if dispersion is None: # use Pearson's X^2 + dispersion = _pearsonX2(y, + linpred, + loglike, + observed_target.shape[0]) + + regress_target_score = np.zeros((cov_target.shape[0], p)) + regress_target_score[:,features] = cov_target + + return TargetSpec(observed_target, + cov_target * dispersion, + regress_target_score, + alternatives) + +def full_targets(loglike, + solution, + features=None, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 50}, + hessian=None): + + if features is None: + features = solution != 0 + + X, y = loglike.data + n, p = X.shape + features_bool = np.zeros(p, np.bool) + features_bool[features] = True + features = features_bool + + # target is one-step estimator + + full_estimator = loglike.solve(**solve_args) + linpred = X.dot(full_estimator) + Qfull = _compute_hessian(loglike, + full_estimator) + + Qfull_inv = np.linalg.inv(Qfull) + cov_target = Qfull_inv[features][:, features] + observed_target = full_estimator[features] + crosscov_target_score = np.zeros((p, cov_target.shape[0])) + crosscov_target_score[features] = -np.identity(cov_target.shape[0]) + + if dispersion is None: # use Pearson's X^2 + dispersion = _pearsonX2(y, + linpred, + loglike, + p) + + alternatives = ['twosided'] * features.sum() + regress_target_score = Qfull_inv[features] # weights missing? + + return TargetSpec(observed_target, + cov_target * dispersion, + regress_target_score, + alternatives) + +def debiased_targets(loglike, + solution, + features=None, + sign_info={}, + penalty=None, #required kwarg + dispersion=None, + approximate_inverse='JM', + debiasing_args={}): + + if features is None: + features = solution != 0 + + if penalty is None: + raise ValueError('require penalty for consistent estimator') + + X, y = loglike.data + n, p = X.shape + features_bool = np.zeros(p, np.bool) + features_bool[features] = True + features = features_bool + + # relevant rows of approximate inverse + + linpred = X.dot(solution) + W = loglike.saturated_loss.hessian(linpred) + if approximate_inverse == 'JM': + Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(W)[:, None], + np.nonzero(features)[0], + **debiasing_args)) / n + else: + Qinv_hat = np.atleast_2d(pseudoinverse_debiasing_matrix(X * np.sqrt(W)[:, None], + np.nonzero(features)[0], + **debiasing_args)) + + problem = rr.simple_problem(loglike, penalty) + nonrand_soln = problem.solve() + G_nonrand = loglike.smooth_objective(nonrand_soln, 'grad') + + observed_target = nonrand_soln[features] - Qinv_hat.dot(G_nonrand) + + Qfull, Qrelax = _compute_hessian(loglike, + solution, + features) + + if p > n: + M1 = Qinv_hat.dot(X.T) + cov_target = (M1 * W[None, :]).dot(M1.T) + crosscov_target_score = -(M1 * W[None, :]).dot(X).T + else: + Qfull = X.T.dot(W[:, None] * X) + cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T)) + crosscov_target_score = -Qinv_hat.dot(Qfull).T + + if dispersion is None: # use Pearson's X^2 + relaxed_soln = nonrand_soln[features] - np.linalg.inv(Qrelax[features]).dot(G_nonrand[features]) + Xfeat = X[:, features] + linpred = Xfeat.dot(relaxed_soln) + dispersion = _pearsonX2(y, + linpred, + loglike, + features.sum()) + + alternatives = ['twosided'] * features.sum() + + return TargetSpec(observed_target, + cov_target * dispersion, + Qinv_hat, + alternatives) + +def form_targets(target, + loglike, + solution, + features, + **kwargs): + _target = {'full':full_targets, + 'selected':selected_targets, + 'debiased':debiased_targets}[target] + return _target(loglike, + solution, + features, + **kwargs) + +def _compute_hessian(loglike, + beta_bar, + *bool_indices): + + X, y = loglike.data + linpred = X.dot(beta_bar) + n = linpred.shape[0] + + if hasattr(loglike.saturated_loss, "hessian"): # a GLM -- all we need is W + W = loglike.saturated_loss.hessian(linpred) + parts = [np.dot(X.T, X[:, bool_idx] * W[:, None]) for bool_idx in bool_indices] + _hessian = np.dot(X.T, X * W[:, None]) # CAREFUL -- this will be big + elif hasattr(loglike.saturated_loss, "hessian_mult"): + parts = [] + for bool_idx in bool_indices: + _right = np.zeros((n, bool_idx.sum())) + for i, j in enumerate(np.nonzero(bool_idx)[0]): + _right[:,i] = loglike.saturated_loss.hessian_mult(linpred, + X[:,j], + case_weights=loglike.saturated_loss.case_weights) + parts.append(X.T.dot(_right)) + _hessian = np.zeros_like(X) + for i in range(X.shape[1]): + _hessian[:,i] = loglike.saturated_loss.hessian_mult(linpred, + X[:,i], + case_weights=loglike.saturated_loss.case_weights) + _hessian = X.T.dot(_hessian) + else: + raise ValueError('saturated_loss has no hessian or hessian_mult method') + + if bool_indices: + return (_hessian,) + tuple(parts) + else: + return _hessian + +def _pearsonX2(y, + linpred, + loglike, + df_fit): + + W = loglike.saturated_loss.hessian(linpred) + n = y.shape[0] + resid = y - loglike.saturated_loss.mean_function(linpred) + return (resid ** 2 / W).sum() / (n - df_fit) + diff --git a/selectinf/bayesian/utils.py b/selectinf/bayesian/utils.py new file mode 100644 index 000000000..90e51ae8a --- /dev/null +++ b/selectinf/bayesian/utils.py @@ -0,0 +1,73 @@ +import numpy as np +from scipy.linalg import fractional_matrix_power +from scipy.stats import norm as ndist + +class langevin(object): + + def __init__(self, + initial_condition, + gradient_map, + stepsize, + proposal_scale): + + (self.state, + self.gradient_map, + self.stepsize) = (np.copy(initial_condition), + gradient_map, + stepsize) + self._shape = self.state.shape[0] + self._sqrt_step = np.sqrt(self.stepsize) + self._noise = ndist(loc=0,scale=1) + self.sample = np.copy(initial_condition) + + self.proposal_scale = proposal_scale + self.proposal_sqrt = fractional_matrix_power(self.proposal_scale, 0.5) + + def __iter__(self): + return self + + def next(self): + return self.__next__() + + def __next__(self): + while True: + + gradient_posterior, draw, _ = self.gradient_map(self.state) + + candidate = (self.state + self.stepsize * self.proposal_scale.dot(gradient_posterior) + + np.sqrt(2.) * (self.proposal_sqrt.dot(self._noise.rvs(self._shape))) * self._sqrt_step) + + if not np.all(np.isfinite(self.gradient_map(candidate)[0])): + self.stepsize *= 0.5 + self._sqrt_step = np.sqrt(self.stepsize) + else: + self.state[:] = candidate + self.sample[:] = draw + #print(" next sample ", self.state[:], self.sample[:]) + break + + return self.sample + +def langevin_sampler(posterior, + nsample=2000, + nburnin=100, + step_frac=0.3, + start=None): + + if start is None: + start = posterior.initialize_sampler(posterior.initial_estimate) + + state = np.append(start, np.ones(posterior.target_size)) + stepsize = 1. / (step_frac * (2 * posterior.target_size)) + proposal_scale = np.identity(int(2 * posterior.target_size)) + sampler = langevin(state, posterior.gradient_log_likelihood, stepsize, proposal_scale) + + samples = np.zeros((nsample, 2 * posterior.target_size)) + + for i, sample in enumerate(sampler): + samples[i, :] = sampler.sample.copy() + print(" next sample ", i, samples[i, :]) + if i == nsample - 1: + break + + return samples[nburnin:, :] \ No newline at end of file diff --git a/selectinf/constraints/tests/test_affine.py b/selectinf/constraints/tests/test_affine.py index 82cfad9db..dca0f70a5 100644 --- a/selectinf/constraints/tests/test_affine.py +++ b/selectinf/constraints/tests/test_affine.py @@ -2,6 +2,7 @@ import nose import numpy as np +from numpy.testing import dec from scipy.stats import chi import nose.tools as nt @@ -168,7 +169,7 @@ def test_sampling(): np.outer(V.mean(0), V.mean(0)) - S) < 0.01) @set_seed_iftrue(SET_SEED) -@np.testing.decorators.skipif(True, msg="optimal tilt undefined -- need to implement softmax version") +@dec.skipif(True, msg="optimal tilt undefined -- need to implement softmax version") def test_optimal_tilt(): A = np.vstack(-np.identity(4)) diff --git a/selectinf/constraints/tests/test_quadratic_tests.py b/selectinf/constraints/tests/test_quadratic_tests.py index 5ea4e2767..1a1698f1a 100644 --- a/selectinf/constraints/tests/test_quadratic_tests.py +++ b/selectinf/constraints/tests/test_quadratic_tests.py @@ -1,7 +1,6 @@ import numpy as np from scipy.stats import chi import nose.tools as nt -import numpy.testing.decorators as dec from ...distributions import chisq from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue, rpy_test_safe diff --git a/selectinf/distributions/discrete_family.py b/selectinf/distributions/discrete_family.py index 5c6e6fc23..7b96476db 100644 --- a/selectinf/distributions/discrete_family.py +++ b/selectinf/distributions/discrete_family.py @@ -25,7 +25,7 @@ def crit_func(test_statistic, left_cut, right_cut): Parameters ---------- - test_statistic : np.float + test_statistic : float Observed value of test statistic. left_cut : (float, float) @@ -37,7 +37,7 @@ def crit_func(test_statistic, left_cut, right_cut): Returns ------- - decision : np.float + decision : float """ CL, gammaL = left_cut @@ -80,10 +80,10 @@ def __init__(self, sufficient_stat, weights, theta=0.): The weights are normalized to sum to 1. """ - xw = np.array(sorted(zip(sufficient_stat, weights)), np.float) + xw = np.array(sorted(zip(sufficient_stat, weights)), float) self._x = xw[:,0] self._w = xw[:,1] - self._lw = np.array([np.log(v) for v in xw[:,1]]) + self._lw = np.log(xw[:,1]) self._w /= self._w.sum() # make sure they are a pmf self.n = len(xw) self._theta = np.nan @@ -479,7 +479,12 @@ def interval(self, observed, alpha=0.05, randomize=True, auxVar=None, tol=1e-6): lower = self._inter2Lower(observed, 0., alpha, tol) return lower, upper - def equal_tailed_interval(self, observed, alpha=0.05, randomize=True, auxVar=None, tol=1e-6): + def equal_tailed_interval(self, + observed, + alpha=0.05, + randomize=True, + auxVar=None, + tol=1e-6): """ Form interval by inverting equal-tailed test with $\alpha/2$ in each tail. diff --git a/selectinf/info.py b/selectinf/info.py index b228f8e56..1df639924 100644 --- a/selectinf/info.py +++ b/selectinf/info.py @@ -43,7 +43,7 @@ # versions NUMPY_MIN_VERSION='1.7.1' SCIPY_MIN_VERSION = '0.9' -CYTHON_MIN_VERSION = '0.21' +CYTHON_MIN_VERSION = '0.29.5' MPMATH_MIN_VERSION = "0.18" PYINTER_MIN_VERSION = "0.1.6" SKLEARN_MIN_VERSION = '0.19' diff --git a/selectinf/learning/core.py b/selectinf/learning/core.py index 7ad6b4b68..a293dc9fd 100644 --- a/selectinf/learning/core.py +++ b/selectinf/learning/core.py @@ -364,22 +364,8 @@ def _inference(observed_target, else: weight_val = np.squeeze(weight_fn(target_val)) - if DEBUG: - import matplotlib.pyplot as plt, uuid - plt.plot(target_val, weight_val) - id_ = 'inference_' + str(uuid.uuid1()) - plt.savefig(id_+'_prob.png') - plt.clf() - weight_val *= ndist.pdf((target_val - observed_target) / target_sd) - plt.plot(target_val, weight_val) - plt.plot(target_val, ndist.pdf((target_val - observed_target) / target_sd), label='gaussian') - plt.plot([hypothesis], [0], '+', color='orange') - plt.legend() - plt.savefig(id_+'_dens.png') - plt.clf() - exp_family = discrete_family(target_val, weight_val) pivot = exp_family.cdf((hypothesis - observed_target) @@ -474,7 +460,13 @@ def repeat_selection(base_algorithm, sampler, min_success, num_tries): return set(final_value) -def cross_inference(learning_data, nuisance, direction, fit_probability, nref=200, fit_args={}): +def cross_inference(learning_data, + nuisance, + direction, + fit_probability, + nref=200, + fit_args={}, + verbose=False): T, Y = learning_data @@ -514,7 +506,8 @@ def new_weight_fn(nuisance, direction, weight_fn, target_val): weight_val = new_weight_fn(d_T) exp_family = discrete_family(d_T, weight_val) - print(ref_Y) + if verbose: + print(ref_Y) pval = [exp_family.cdf(0, x=t) for t, y in zip(ref_T, ref_Y) if y == 1] pvalues.append(pval) diff --git a/selectinf/learning/fitters.py b/selectinf/learning/fitters.py index 525179102..30bcf8e86 100644 --- a/selectinf/learning/fitters.py +++ b/selectinf/learning/fitters.py @@ -8,15 +8,17 @@ def gbm_fit_sk(T, Y, **params): fitfns = [] for j in range(Y.shape[1]): - print('variable %d' % (j+1,)) y = Y[:,j].astype(np.int) - clf = ensemble.GradientBoostingClassifier(**params) - clf.fit(T, y) + if len(np.unique(y)) > 1: + clf = ensemble.GradientBoostingClassifier(**params) + clf.fit(T, y) - def fit_fn(clf, t): - return clf.predict_proba(t)[:,1] - - fitfns.append(functools.partial(fit_fn, clf)) + def fit_fn(clf, t): + return clf.predict_proba(t)[:,1] + fit_fn = functools.partial(fit_fn, clf) + else: + fit_fn = lambda t: np.atleast_1d(np.ones(t.shape[0])) + fitfns.append(fit_fn) return fitfns @@ -24,7 +26,6 @@ def random_forest_fit_sk(T, Y, **params): fitfns = [] for j in range(Y.shape[1]): - print('variable %d' % (j+1,)) y = Y[:,j].astype(np.int) clf = ensemble.RandomForestClassifier(**params) clf.fit(T, y) diff --git a/selectinf/learning/learners.py b/selectinf/learning/learners.py index c34a80d5a..717ab1e08 100644 --- a/selectinf/learning/learners.py +++ b/selectinf/learning/learners.py @@ -191,7 +191,8 @@ def learn(self, fit_probability, fit_args = {}, B=500, - check_selection=None): + check_selection=None, + verbose=False): """ fit_probability : callable @@ -206,11 +207,14 @@ def learn(self, check_selection : callable (optional) Callable that determines selection variable. + verbose : bool + Print out probability of selection? """ learning_selection, learning_T, random_algorithm = self.generate_data(B=B, check_selection=check_selection) - print('prob(select): ', np.mean(learning_selection, 0)) + if verbose: + print('prob(select): ', np.mean(learning_selection, 0)) conditional_laws = fit_probability(learning_T, learning_selection, **fit_args) return conditional_laws, (learning_T, learning_selection) diff --git a/selectinf/learning/utils.py b/selectinf/learning/utils.py index a590a418b..6a0cf897a 100644 --- a/selectinf/learning/utils.py +++ b/selectinf/learning/utils.py @@ -49,7 +49,7 @@ def full_model_inference(X, if how_many is None: how_many = len(observed_list) - observed_list = observed_list[:how_many] + observed_list = list(np.random.choice(observed_list, how_many, replace=False)) # find the target, based on the observed outcome @@ -402,8 +402,10 @@ def naive_partial_model_inference(X, return pd.DataFrame({'naive_pivot':naive_pivots, 'naive_coverage':naive_covered, 'naive_length':naive_lengths, + 'naive_pvalue':naive_pvalues, 'nfeature':X.shape[1], 'naive_lower':naive_lower, + 'naive_upper':naive_upper, 'target':final_target, 'variable':observed_list }) @@ -441,14 +443,16 @@ def lee_inference(X, try: import matplotlib.pyplot as plt - def pivot_plot(df, - outbase, - figsize=(8,8)): + def pivot_plot_old(df, + outbase=None, + figsize=(8,8), + verbose=False): - print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage'])) - print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage'])) + if verbose: + print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage'])) + print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage'])) - print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length']))) + print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length']))) f = plt.figure(num=1, figsize=figsize) plt.clf() @@ -457,7 +461,8 @@ def pivot_plot(df, plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'r', label='Naive', linewidth=3) plt.legend(fontsize=15) plt.plot([0,1], [0,1], 'k--', linewidth=2) - plt.savefig(outbase + '.pdf') + if outbase is not None: + plt.savefig(outbase + '.pdf') pivot_ax = plt.gca() pivot_ax.set_ylabel(r'P(pivot < t)') pivot_ax.set_xlabel(r't') @@ -514,25 +519,27 @@ def liu_inference(X, import statsmodels.api as sm def pvalue_plot(df, - outbase, + outbase=None, figsize=(8, 8), naive=True, split=False, - bonferroni=False): + bonferroni=False, + verbose=False): - print("selective:", np.mean(df['pvalue']), np.std(df['pvalue']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage'])) + if verbose: + print("selective:", np.mean(df['pvalue']), np.std(df['pvalue']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage'])) - if naive: - print("naive:", np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage'])) - print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length']))) + if naive: + print("naive:", np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage'])) + print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length']))) - if split: - print("split:", np.mean(df['split_length']), np.std(df['split_length']), np.mean(df['split_coverage'])) - print("len ratio selective divided by split:", np.mean(np.array(df['length']) / np.array(df['split_length']))) + if split: + print("split:", np.mean(df['split_length']), np.std(df['split_length']), np.mean(df['split_coverage'])) + print("len ratio selective divided by split:", np.mean(np.array(df['length']) / np.array(df['split_length']))) - if bonferroni: - print("bonferroni:", np.mean(df['bonferroni_length']), np.std(df['bonferroni_length']), np.mean(df['bonferroni_coverage'])) - print("len ratio selective divided by bonferroni:", np.mean(np.array(df['length']) / np.array(df['bonferroni_length']))) + if bonferroni: + print("bonferroni:", np.mean(df['bonferroni_length']), np.std(df['bonferroni_length']), np.mean(df['bonferroni_coverage'])) + print("len ratio selective divided by bonferroni:", np.mean(np.array(df['length']) / np.array(df['bonferroni_length']))) f = plt.figure(figsize=figsize) plt.clf() @@ -569,21 +576,29 @@ def pvalue_plot(df, pvalue_ax.set_ylabel(r'ECDF(pvalue)', fontsize=20) pvalue_ax.set_xlabel(r'pvalue', fontsize=20) - plt.savefig(outbase + '_pvalues.pdf') - plt.savefig(outbase + '_pvalues.png', dpi=300) + if outbase is not None: + plt.savefig(outbase + '_pvalues.pdf') + plt.savefig(outbase + '_pvalues.png', dpi=300) return pvalue_ax - def pivot_plot_new(df, - outbase, - palette = {'Learned': 'b', - 'Naive': 'r', - 'Bonferroni': 'gray', - 'Lee':'gray', - 'Strawman':'gray'}, - figsize=(8, 8), straw=False): - - f = plt.figure(figsize=figsize) + def pivot_plot(df, + outbase=None, + palette = {'Learned': 'b', + 'Naive': 'r', + 'Bonferroni': 'gray', + 'Lee':'gray', + 'Strawman':'gray'}, + fig=None, + figsize=(8, 8), + straw=False, + verbose=False): + + if fig is None: + f = plt.figure(figsize=figsize) + else: + f = fig + f.clf() new_df = pd.DataFrame({'Learned': df['pivot'], 'Naive': df['naive_pivot']}) if straw: @@ -598,8 +613,11 @@ def pivot_plot_new(df, ax.set_ylabel('ECDF(pivot)', fontsize=20) ax.legend(fontsize=15) - pngfile = outbase + '_pivot.png' - plt.savefig(pngfile, dpi=300) + if outbase is not None: + pngfile = outbase + '_pivot.png' + plt.savefig(pngfile, dpi=300) + else: + pngfile = None return ax, f, pngfile, df, new_df diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py new file mode 100644 index 000000000..81f907e13 --- /dev/null +++ b/selectinf/randomized/approx_reference.py @@ -0,0 +1,443 @@ +from __future__ import division, print_function +from typing import NamedTuple + +import numpy as np, pandas as pd +from scipy.interpolate import interp1d + +from ..distributions.discrete_family import discrete_family +from ..algorithms.barrier_affine import solve_barrier_affine_py +from .selective_MLE import mle_inference +from .base import target_query_Interactspec + +class ConditionalSpec(NamedTuple): + + # description of (preselection) conditional law of + # targets \hat{\theta} | u, N + # if they were unbiased, then: + # 1) precision will agree with marginal variance + # 2) scalings will all be 1 + # 3) shifts will be 0 + + precision : np.ndarray + scalings : np.ndarray + shifts : np.ndarray + T : np.ndarray # what is T? + +class grid_inference(object): + + def __init__(self, + query_spec, + target_spec, + solve_args={'tol': 1.e-12}, + ngrid=1000): + + """ + Produce p-values and confidence intervals for targets + of model including selected features + Parameters + ---------- + query : `gaussian_query` + A Gaussian query which has information + to describe implied Gaussian. + observed_target : ndarray + Observed estimate of target. + cov_target : ndarray + Estimated covaraince of target. + cov_target_score : ndarray + Estimated covariance of target and score of randomized query. + solve_args : dict, optional + Arguments passed to solver. + """ + + self.query_spec = query_spec + self.target_spec = target_spec + self.solve_args = solve_args + self.ngrid = ngrid + + G = mle_inference(query_spec, + target_spec, + solve_args=solve_args) + + _, inverse_info, log_ref = G.solve_estimating_eqn() + + TS = target_spec + self.ntarget = ntarget = TS.cov_target.shape[0] + _scale = 4 * np.sqrt(np.diag(inverse_info)) + self.inverse_info = inverse_info + + self.stat_grid = np.zeros((ntarget, ngrid)) + for j in range(ntarget): + self.stat_grid[j, :] = np.linspace(TS.observed_target[j] - 1.5 * _scale[j], + TS.observed_target[j] + 1.5 * _scale[j], + num=ngrid) + + def summary(self, + alternatives=None, + parameter=None, + level=0.9): + """ + Produce p-values and confidence intervals for targets + of model including selected features + Parameters + ---------- + alternatives : [str], optional + Sequence of strings describing the alternatives, + should be values of ['twosided', 'less', 'greater'] + parameter : np.array + Hypothesized value for parameter -- defaults to 0. + level : float + Confidence level. + """ + + TS = self.target_spec + + if parameter is not None: + pivots = self._pivots(parameter, + alternatives=alternatives) + else: + pivots = None + + pvalues = self._pivots(np.zeros_like(TS.observed_target), + alternatives=alternatives) + lower, upper = self._intervals(level=level) + + result = pd.DataFrame({'target': TS.observed_target, + 'pvalue': pvalues, + 'alternative': alternatives, + 'lower_confidence': lower, + 'upper_confidence': upper}) + + if not np.all(parameter == 0): + result.insert(4, 'pivot', pivots) + result.insert(5, 'parameter', parameter) + + return result + + def _approx_log_reference(self, + observed_target, + cov_target, + linear_coef, + grid): + + """ + Approximate the log of the reference density on a grid. + """ + if np.asarray(observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + ref_hat = [] + solver = solve_barrier_affine_py + + for k in range(grid.shape[0]): + # in the usual D = N + Gamma theta.hat, + # regress_opt_target is "something" times Gamma, + # where "something" comes from implied Gaussian + # cond_mean is "something" times D + # Gamma is cov_target_score.T.dot(prec_target) + + cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + self.cond_mean) + conjugate_arg = self.cond_precision.dot(cond_mean_grid) + + val, _, _ = solver(conjugate_arg, + self.cond_precision, + self.observed_soln, + self.linear_part, + self.offset, + **self.solve_args) + + ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.)) + + return np.asarray(ref_hat) + + def _pivots(self, + mean_parameter, + alternatives=None): + + TS = self.target_spec + + if not hasattr(self, "_families"): + self._construct_density() # generic + self._construct_families() # specific to the method + precs, S, r, _ = self.conditional_spec + + if alternatives is None: + alternatives = ['twosided'] * self.ntarget + + pivot = [] + + for m in range(self.ntarget): + + family = self._families[m] + var_target = 1. / (precs[m][0, 0]) + + mean = S[m].dot(mean_parameter[m].reshape((1,))) + r[m] + # construction of pivot from families follows `selectinf.learning.core` + + _cdf = family.cdf((mean[0] - TS.observed_target[m]) / var_target, x=TS.observed_target[m]) + + if alternatives[m] == 'twosided': + pivot.append(2 * min(_cdf, 1 - _cdf)) + elif alternatives[m] == 'greater': + pivot.append(1 - _cdf) + elif alternatives[m] == 'less': + pivot.append(_cdf) + else: + raise ValueError('alternative should be in ["twosided", "less", "greater"]') + return pivot # , self._log_ref + + def _intervals(self, + level=0.9): + + TS = self.target_spec + + if not hasattr(self, "_families"): + self._construct_density() # generic + self._construct_families() # specific to the method + + precs, S, r, _ = self.conditional_spec + + lower, upper = [], [] + + for m in range(self.ntarget): + # construction of intervals from families follows `selectinf.learning.core` + family = self._families[m] + observed_target = TS.observed_target[m] + unbiased_est = (observed_target - r[m][0]) * (1./(S[m][0,0])) + + _l, _u = family.equal_tailed_interval(observed_target, + alpha=1 - level) + l = _l * (1./(S[m][0,0])) + u = _u * (1./(S[m][0,0])) + + var_target = 1. / (precs[m][0, 0]) + + lower.append(l * var_target + unbiased_est) + upper.append(u * var_target + unbiased_est) + + return np.asarray(lower), np.asarray(upper) + + ### Private method + + def _construct_density(self): + """ + What is this method doing? + """ + + TS = self.target_spec + QS = self.query_spec + + precs = [] + S = [] + r = [] + T = [] + + p = TS.regress_target_score.shape[1] + + for m in range(self.ntarget): + observed_target_uni = (TS.observed_target[m]).reshape((1,)) + cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1)) + regress_target_score_uni = TS.regress_target_score[m, :].reshape((1, p)) + + U1, U2, U3, U4, U5 = self._form_interaction_pieces(QS, + regress_target_score_uni, + cov_target_uni) + + prec_target = 1. / cov_target_uni + + # JT: what is _T? + _T = QS.cond_cov.dot(U5.T) + + prec_target_nosel = prec_target + U2 - U3 + + _P = -(U1.T.dot(QS.M5) + U2.dot(observed_target_uni)) + + bias_target = cov_target_uni.dot( + U1.T.dot(-U4.dot(observed_target_uni) + QS.M4.dot(QS.cond_mean)) - _P) + + _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target)) + _S = np.linalg.inv(prec_target_nosel).dot(prec_target) + + S.append(_S) + r.append(_r) + precs.append(prec_target_nosel) + T.append(_T) + + self.conditional_spec = ConditionalSpec(np.array(precs), + np.array(S), + np.array(r), + np.array(T) # what is T here? + ) + + return self.conditional_spec + + # Private + + def _form_interaction_pieces(self, + QS, + regress_target_score, + cov_target): + + return target_query_Interactspec(QS, + regress_target_score, + cov_target) + + +class approximate_grid_inference(grid_inference): + + def __init__(self, + query_spec, + target_spec, + solve_args={'tol': 1.e-12}, + ngrid=1000, + ncoarse=40): + + """ + Produce p-values and confidence intervals for targets + of model including selected features + Parameters + ---------- + query : `gaussian_query` + A Gaussian query which has information + to describe implied Gaussian. + observed_target : ndarray + Observed estimate of target. + cov_target : ndarray + Estimated covaraince of target. + cov_target_score : ndarray + Estimated covariance of target and score of randomized query. + solve_args : dict, optional + Arguments passed to solver. + """ + + grid_inference.__init__(self, + query_spec, + target_spec, + solve_args=solve_args) + + self.ncoarse = ncoarse + + def _approx_log_reference(self, + observed_target, + cov_target, + linear_coef, + grid): + + """ + Approximate the log of the reference density on a grid. + """ + + TS = self.target_spec + QS = self.query_spec + cond_precision = np.linalg.inv(QS.cond_cov) + + if np.asarray(observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + ref_hat = [] + solver = solve_barrier_affine_py + + for k in range(grid.shape[0]): + # in the usual D = N + Gamma theta.hat, + # regress_opt_target is "something" times Gamma, + # where "something" comes from implied Gaussian + # cond_mean is "something" times D + # Gamma is cov_target_score.T.dot(prec_target) + + cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + + QS.cond_mean) + conjugate_arg = cond_precision.dot(cond_mean_grid) + + val, _, _ = solver(conjugate_arg, + cond_precision, + QS.observed_soln, + QS.linear_part, + QS.offset, + **self.solve_args) + + ref_hat.append(-val - (conjugate_arg.T.dot(QS.cond_cov).dot(conjugate_arg) / 2.)) + + return np.asarray(ref_hat) + + def _construct_families(self): + + TS = self.target_spec + QS = self.query_spec + + precs, S, r, T = self.conditional_spec + + self._families = [] + + if self.ncoarse is not None: + coarse_grid = np.zeros((self.stat_grid.shape[0], self.ncoarse)) + for j in range(coarse_grid.shape[0]): + coarse_grid[j,:] = np.linspace(self.stat_grid[j].min(), + self.stat_grid[j].max(), + self.ncoarse) + eval_grid = coarse_grid + else: + eval_grid = self.stat_grid + + _log_ref = np.zeros((self.ntarget, self.stat_grid[0].shape[0])) + + for m in range(self.ntarget): + + observed_target_uni = (TS.observed_target[m]).reshape((1,)) + cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1)) + + var_target = 1. / (precs[m][0, 0]) + + approx_log_ref = self._approx_log_reference(observed_target_uni, + cov_target_uni, + T[m], + eval_grid[m]) + + if self.ncoarse is None: + + logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - TS.observed_target[m]) ** 2 / var_target) + logW -= logW.max() + _log_ref[m,:] = logW + self._families.append(discrete_family(self.stat_grid[m], + np.exp(logW))) + else: + + approx_fn = interp1d(eval_grid[m], + approx_log_ref, + kind='quadratic', + bounds_error=False, + fill_value='extrapolate') + + grid = self.stat_grid[m] + logW = (approx_fn(grid) - + 0.5 * (grid - TS.observed_target[m]) ** 2 / var_target) + + logW -= logW.max() + + DEBUG = False # JT: this can be removed + if DEBUG: + approx_log_ref2 = self._approx_log_reference(observed_target_uni, + cov_target_uni, + T[m], + grid) + logW2 = (approx_log_ref2 - 0.5 * (grid - TS.observed_target[m]) ** 2 / var_target) + logW2 -= logW2.max() + import matplotlib.pyplot as plt + plt.plot(grid, logW, label='extrapolated') + + plt.plot(grid, logW2, label='fine grid') + plt.legend() + + plt.figure(num=2) + plt.plot(eval_grid[m], approx_fn(eval_grid[m]), label='extrapolated coarse') + plt.plot(grid, approx_fn(grid), label='extrapolated fine') + plt.plot(grid, approx_log_ref2, label='fine grid') + plt.legend() + + plt.show() + stop + + _log_ref[m, :] = logW + self._families.append(discrete_family(grid, + np.exp(logW))) + + self._log_ref = _log_ref + diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py new file mode 100644 index 000000000..acd9bf811 --- /dev/null +++ b/selectinf/randomized/approx_reference_grouplasso.py @@ -0,0 +1,887 @@ +from __future__ import print_function +from scipy.linalg import block_diag +from scipy.stats import norm as ndist +from scipy.interpolate import interp1d + +import collections +import numpy as np +from numpy import log +from numpy.linalg import norm, qr, inv, eig +import pandas as pd + +import regreg.api as rr +from .randomization import randomization +from ..base import restricted_estimator +from ..algorithms.barrier_affine import solve_barrier_affine_py as solver +from ..distributions.discrete_family import discrete_family + +class group_lasso(object): + + def __init__(self, + loglike, + groups, + weights, + ridge_term, + randomizer, + use_lasso=True, # should lasso solver be used where applicable - defaults to True + perturb=None): + + _check_groups(groups) # make sure groups looks sensible + + # log likelihood : quadratic loss + self.loglike = loglike + self.nfeature = self.loglike.shape[0] + + # ridge parameter + self.ridge_term = ridge_term + + # group lasso penalty (from regreg) + # use regular lasso penalty if all groups are size 1 + if use_lasso and groups.size == np.unique(groups).size: + # need to provide weights an an np.array rather than a dictionary + weights_np = np.array([w[1] for w in sorted(weights.items())]) + self.penalty = rr.weighted_l1norm(weights=weights_np, + lagrange=1.) + else: + self.penalty = rr.group_lasso(groups, + weights=weights, + lagrange=1.) + + # store groups as a class variable since the non-group lasso doesn't + self.groups = groups + + self._initial_omega = perturb + + # gaussian randomization + self.randomizer = randomizer + + def fit(self, + solve_args={'tol': 1.e-12, 'min_its': 50}, + perturb=None): + + # solve the randomized version of group lasso + (self.observed_soln, + self.observed_subgrad) = self._solve_randomized_problem(perturb=perturb, + solve_args=solve_args) + + # initialize variables + active_groups = [] # active group labels + active_dirs = {} # dictionary: keys are group labels, values are unit-norm coefficients + unpenalized = [] # selected groups with no penalty + overall = np.ones(self.nfeature, np.bool) # mask of active features + ordered_groups = [] # active group labels sorted by label + ordered_opt = [] # gamma's ordered by group labels + ordered_vars = [] # indices "ordered" by sorting group labels + + tol = 1.e-20 + + _, self.prec_randomizer = self.randomizer.cov_prec + + # now we are collecting the directions and norms of the active groups + for g in sorted(np.unique(self.groups)): # g is group label + + group_mask = self.groups == g + soln = self.observed_soln # do not need to keep setting this + + if norm(soln[group_mask]) > tol * norm(soln): # is group g appreciably nonzero + ordered_groups.append(g) + + # variables in active group + ordered_vars.extend(np.flatnonzero(group_mask)) + + if self.penalty.weights[g] == 0: + unpenalized.append(g) + + else: + active_groups.append(g) + active_dirs[g] = soln[group_mask] / norm(soln[group_mask]) + + ordered_opt.append(norm(soln[group_mask])) + else: + overall[group_mask] = False + + self.selection_variable = {'directions': active_dirs, + 'active_groups': active_groups} # kind of redundant with keys of active_dirs + + self._ordered_groups = ordered_groups + + # exception if no groups are selected + if len(self.selection_variable['active_groups']) == 0: + return np.sign(soln), soln + + # otherwise continue as before + self.observed_opt_state = np.hstack(ordered_opt) # gammas as array + + _beta_unpenalized = restricted_estimator(self.loglike, # refit OLS on E + overall, + solve_args=solve_args) + + beta_bar = np.zeros(self.nfeature) + beta_bar[overall] = _beta_unpenalized # refit OLS beta with zeros + self._beta_full = beta_bar + + X, y = self.loglike.data + W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar)) # all 1's for LS + opt_linearNoU = np.dot(X.T, X[:, ordered_vars] * W[:, np.newaxis]) + + for i, var in enumerate(ordered_vars): + opt_linearNoU[var, i] += self.ridge_term + + self.observed_score_state = -opt_linearNoU.dot(_beta_unpenalized) + self.observed_score_state[~overall] += self.loglike.smooth_objective(beta_bar, 'grad')[~overall] + + active_signs = np.sign(self.observed_soln) + active = np.flatnonzero(active_signs) + self.active = active + + def compute_Vg(ug): + pg = ug.size # figure out size of g'th group + if pg > 1: + Z = np.column_stack((ug, np.eye(pg, pg - 1))) + Q, _ = qr(Z) + Vg = Q[:, 1:] # drop the first column + else: + Vg = np.zeros((1, 0)) # if the group is size one, the orthogonal complement is empty + return Vg + + def compute_Lg(g): + pg = active_dirs[g].size + Lg = self.penalty.weights[g] * np.eye(pg) + return Lg + + sorted_active_dirs = collections.OrderedDict(sorted(active_dirs.items())) + + Vs = [compute_Vg(ug) for ug in sorted_active_dirs.values()] + V = block_diag(*Vs) # unpack the list + Ls = [compute_Lg(g) for g in sorted_active_dirs] + L = block_diag(*Ls) # unpack the list + XE = X[:, ordered_vars] # changed to ordered_vars + Q = XE.T.dot(self._W[:, None] * XE) + QI = inv(Q) + C = V.T.dot(QI).dot(L).dot(V) + + self.XE = XE + self.Q = Q + self.QI = QI + self.C = C + + U = block_diag(*[ug for ug in sorted_active_dirs.values()]).T + + self.opt_linear = opt_linearNoU.dot(U) + self.active_dirs = active_dirs + self.ordered_vars = ordered_vars + + self.linear_part = -np.eye(self.observed_opt_state.shape[0]) + self.offset = np.zeros(self.observed_opt_state.shape[0]) + + return active_signs, soln + + def _solve_randomized_problem(self, + perturb=None, + solve_args={'tol': 1.e-15, 'min_its': 100}): + + # take a new perturbation if supplied + if perturb is not None: + self._initial_omega = perturb + if self._initial_omega is None: + self._initial_omega = self.randomizer.sample() + + quad = rr.identity_quadratic(self.ridge_term, + 0, + -self._initial_omega, + 0) + + problem = rr.simple_problem(self.loglike, self.penalty) + + # if all groups are size 1, set up lasso penalty and run usual lasso solver... (see existing code)... + + observed_soln = problem.solve(quad, **solve_args) + observed_subgrad = -(self.loglike.smooth_objective(observed_soln, + 'grad') + + quad.objective(observed_soln, 'grad')) + + return observed_soln, observed_subgrad + + @staticmethod + def gaussian(X, + Y, + groups, + weights, + sigma=1., + quadratic=None, + ridge_term=0., + perturb=None, + use_lasso=True, # should lasso solver be used when applicable - defaults to True + randomizer_scale=None): + + loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic) + n, p = X.shape + + mean_diag = np.mean((X ** 2).sum(0)) + if ridge_term is None: + ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) + + if randomizer_scale is None: + randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) + + randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) + + return group_lasso(loglike, + groups, + weights, + ridge_term, + randomizer, + use_lasso, + perturb) + + def _setup_implied_gaussian(self): + + _, prec = self.randomizer.cov_prec + + if np.asarray(prec).shape in [(), (0,)]: + cond_precision = self.opt_linear.T.dot(self.opt_linear) * prec + cond_cov = inv(cond_precision) + regress_opt = -cond_cov.dot(self.opt_linear.T) * prec + else: + cond_precision = self.opt_linear.T.dot(prec.dot(self.opt_linear)) + cond_cov = inv(cond_precision) + regress_opt = -cond_cov.dot(self.opt_linear.T).dot(prec) + + cond_mean = regress_opt.dot(self.observed_score_state + self.observed_subgrad) + self.cond_mean = cond_mean + self.cond_cov = cond_cov + self.cond_precision = cond_precision + self.regress_opt = regress_opt + + return cond_mean, cond_cov, cond_precision, regress_opt + + def selective_MLE(self, + solve_args={'tol': 1.e-12}, + level=0.9, + useJacobian=True, + dispersion=None): + + """Do selective_MLE for group_lasso + Note: this masks the selective_MLE inherited from query + because that is not adapted for the group_lasso. Also, assumes + you have already run the fit method since this uses results + from that method. + Parameters + ---------- + observed_target: from selected_targets + cov_target: from selected_targets + cov_target_score: from selected_targets + observed_soln: (opt_state) initial (observed) value of optimization variables + cond_mean: conditional mean of optimization variables (model on _setup_implied_gaussian) + cond_cov: conditional variance of optimization variables (model on _setup_implied_gaussian) + regress_opt: (model on _setup_implied_gaussian) + linear_part: like A_scaling (from lasso) + offset: like b_scaling (from lasso) + solve_args: passed on to solver + level: level of confidence intervals + useC: whether to use python or C solver + JacobianPieces: (use self.C defined in fitting) + """ + + self._setup_implied_gaussian() # Calculate useful quantities + (observed_target, cov_target, cov_target_score, alternatives) = self.selected_targets(dispersion) + + observed_soln = self.observed_opt_state # just the gammas + cond_mean = self.cond_mean + cond_cov = self.cond_cov + regress_opt = self.regress_opt + linear_part = self.linear_part + offset = self.offset + + if np.asarray(observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + observed_target = np.atleast_1d(observed_target) + prec_target = inv(cov_target) + + prec_opt = self.cond_precision + + score_offset = self.observed_score_state + self.observed_subgrad + + # regress_opt_target determines how the conditional mean of optimization variables + # vary with target + # regress_opt determines how the argument of the optimization density + # depends on the score, not how the mean depends on score, hence the minus sign + + regress_score_target = cov_target_score.T.dot(prec_target) + resid_score_target = score_offset - regress_score_target.dot(observed_target) + + regress_opt_target = regress_opt.dot(regress_score_target) + resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target) + + if np.asarray(self.prec_randomizer).shape in [(), (0,)]: + _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer + prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot( + prec_opt).dot( + regress_opt_target) + else: + _P = regress_score_target.T.dot(self.prec_randomizer).dot(resid_score_target) + prec_target_nosel = prec_target + (regress_score_target.T.dot(self.prec_randomizer).dot(regress_score_target)) - regress_opt_target.T.dot( + prec_opt).dot(regress_opt_target) + + C = cov_target.dot(_P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target)) + + conjugate_arg = prec_opt.dot(cond_mean) + + val, soln, hess = solve_barrier_affine_jacobian_py(conjugate_arg, + prec_opt, + observed_soln, + linear_part, + offset, + self.C, + self.active_dirs, + useJacobian, + **solve_args) + + final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \ + + cov_target.dot(regress_opt_target.T.dot(prec_opt.dot(cond_mean - soln))) + C + + unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) + cov_target.dot( + _P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target)) + + L = regress_opt_target.T.dot(prec_opt) + observed_info_natural = prec_target_nosel + L.dot(regress_opt_target) - L.dot(hess.dot(L.T)) + + observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) + + Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) + + pvalues = ndist.cdf(Z_scores) + + pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + + alpha = 1 - level + quantile = ndist.ppf(1 - alpha / 2.) + + intervals = np.vstack([final_estimator - + quantile * np.sqrt(np.diag(observed_info_mean)), + final_estimator + + quantile * np.sqrt(np.diag(observed_info_mean))]).T + + log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2. + + result = pd.DataFrame({'MLE': final_estimator, + 'SE': np.sqrt(np.diag(observed_info_mean)), + 'Zvalue': Z_scores, + 'pvalue': pvalues, + 'lower_confidence': intervals[:, 0], + 'upper_confidence': intervals[:, 1], + 'unbiased': unbiased_estimator}) + + return result, observed_info_mean, log_ref + + def selected_targets(self, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + X, y = self.loglike.data + n, p = X.shape + + XE = self.XE + Q = self.Q + observed_target = restricted_estimator(self.loglike, self.ordered_vars, solve_args=solve_args) + _score_linear = -XE.T.dot(self._W[:, None] * X).T + alternatives = ['twosided'] * len(self.active) + + if dispersion is None: # use Pearson's X^2 + dispersion = ((y - self.loglike.saturated_loss.mean_function( + XE.dot(observed_target))) ** 2 / self._W).sum() / (n - XE.shape[1]) + + cov_target = self.QI * dispersion + crosscov_target_score = _score_linear.dot(self.QI).T * dispersion + + return (observed_target, + cov_target, + crosscov_target_score, + alternatives) + + +class approximate_grid_inference(object): + + def __init__(self, + query, + dispersion, + solve_args={'tol': 1.e-12}, + useIP=True): + + """ + Produce p-values and confidence intervals for targets + of model including selected features + Parameters + ---------- + query : `gaussian_query` + A Gaussian query which has information + to describe implied Gaussian. + observed_target : ndarray + Observed estimate of target. + cov_target : ndarray + Estimated covaraince of target. + cov_target_score : ndarray + Estimated covariance of target and score of randomized query. + solve_args : dict, optional + Arguments passed to solver. + """ + + self.solve_args = solve_args + + result, inverse_info = query.selective_MLE(dispersion=dispersion)[:2] + + self.linear_part = query.linear_part + self.offset = query.offset + + self.regress_opt = query.regress_opt + self.cond_mean = query.cond_mean + self.prec_opt = np.linalg.inv(query.cond_cov) + self.cond_cov = query.cond_cov + self.C = query.C + self.active_dirs = query.active_dirs + + (observed_target, cov_target, cov_target_score, alternatives) = query.selected_targets(dispersion) + self.observed_target = observed_target + self.cov_target_score = cov_target_score + self.cov_target = cov_target + + self.observed_soln = query.observed_opt_state + + self.prec_randomizer = query.prec_randomizer + self.score_offset = query.observed_score_state + query.observed_subgrad + + self.ntarget = ntarget = cov_target.shape[0] + _scale = 4 * np.sqrt(np.diag(inverse_info)) + + if useIP == False: + ngrid = 1000 + self.stat_grid = np.zeros((ntarget, ngrid)) + for j in range(ntarget): + self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j], + observed_target[j] + 1.5 * _scale[j], + num=ngrid) + else: + ngrid = 100 + self.stat_grid = np.zeros((ntarget, ngrid)) + for j in range(ntarget): + self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j], + observed_target[j] + 1.5 * _scale[j], + num=ngrid) + + self.opt_linear = query.opt_linear + self.useIP = useIP + + def summary(self, + alternatives=None, + parameter=None, + level=0.9): + """ + Produce p-values and confidence intervals for targets + of model including selected features + Parameters + ---------- + alternatives : [str], optional + Sequence of strings describing the alternatives, + should be values of ['twosided', 'less', 'greater'] + parameter : np.array + Hypothesized value for parameter -- defaults to 0. + level : float + Confidence level. + """ + + if parameter is not None: + pivots = self._approx_pivots(parameter, + alternatives=alternatives) + else: + pivots = None + + pvalues = self._approx_pivots(np.zeros_like(self.observed_target), + alternatives=alternatives) + lower, upper = self._approx_intervals(level=level) + + result = pd.DataFrame({'target': self.observed_target, + 'pvalue': pvalues, + 'lower_confidence': lower, + 'upper_confidence': upper}) + + if not np.all(parameter == 0): + result.insert(4, 'pivot', pivots) + result.insert(5, 'parameter', parameter) + + return result + + def log_reference(self, + observed_target, + cov_target, + cov_target_score, + grid): + + """ + Approximate the log of the reference density on a grid. + """ + + if np.asarray(observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + prec_target = np.linalg.inv(cov_target) + regress_opt_target = self.regress_opt.dot(cov_target_score.T.dot(prec_target)) + + ref_hat = [] + + for k in range(grid.shape[0]): + # in the usual D = N + Gamma theta.hat, + # regress_opt_target is "something" times Gamma, + # where "something" comes from implied Gaussian + # cond_mean is "something" times D + # Gamma is cov_target_score.T.dot(prec_target) + + num_opt = self.prec_opt.shape[0] + num_con = self.linear_part.shape[0] + + cond_mean_grid = (regress_opt_target.dot(np.atleast_1d(grid[k] - observed_target)) + + self.cond_mean) + + #direction for decomposing o + + eta = self.prec_opt.dot(self.regress_opt.dot(cov_target_score.T)) + + implied_mean = (eta.T.dot(cond_mean_grid)).item() + implied_cov = (eta.T.dot(self.cond_cov).dot(eta)).item() + implied_prec = 1./implied_cov + + _A = self.cond_cov.dot(eta) * implied_prec + R = np.identity(num_opt) - _A.dot(eta.T) + + A = self.linear_part.dot(_A).reshape((-1,)) + b = self.offset-self.linear_part.dot(R).dot(self.observed_soln) + + conjugate_arg = implied_mean * implied_prec + + val, soln, _ = solver(np.asarray([conjugate_arg]), + np.reshape(implied_prec, (1,1)), + eta.T.dot(self.observed_soln), + A.reshape((A.shape[0],1)), + b, + **self.solve_args) + + gamma_ = _A.dot(soln) + R.dot(self.observed_soln) + log_jacob = jacobian_grad_hess(gamma_, self.C, self.active_dirs) + + ref_hat.append(-val - ((conjugate_arg ** 2) * implied_cov)/ 2. + log_jacob[0]) + + return np.asarray(ref_hat) + + def _construct_families(self): + + self._construct_density() + + self._families = [] + + for m in range(self.ntarget): + p = self.cov_target_score.shape[1] + observed_target_uni = (self.observed_target[m]).reshape((1,)) + + cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1)) + cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p)) + + var_target = 1. / ((self.precs[m])[0, 0]) + + log_ref = self.log_reference(observed_target_uni, + cov_target_uni, + cov_target_score_uni, + self.stat_grid[m]) + if self.useIP == False: + logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target) + logW -= logW.max() + self._families.append(discrete_family(self.stat_grid[m], + np.exp(logW))) + else: + approx_fn = interp1d(self.stat_grid[m], + log_ref, + kind='quadratic', + bounds_error=False, + fill_value='extrapolate') + + grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000) + logW = (approx_fn(grid) - + 0.5 * (grid - self.observed_target[m]) ** 2 / var_target) + + logW -= logW.max() + self._families.append(discrete_family(grid, + np.exp(logW))) + + def _approx_pivots(self, + mean_parameter, + alternatives=None): + + if not hasattr(self, "_families"): + self._construct_families() + + if alternatives is None: + alternatives = ['twosided'] * self.ntarget + + pivot = [] + + for m in range(self.ntarget): + + family = self._families[m] + var_target = 1. / ((self.precs[m])[0, 0]) + + mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m] + + _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m]) + print("variable completed ", m) + + if alternatives[m] == 'twosided': + pivot.append(2 * min(_cdf, 1 - _cdf)) + elif alternatives[m] == 'greater': + pivot.append(1 - _cdf) + elif alternatives[m] == 'less': + pivot.append(_cdf) + else: + raise ValueError('alternative should be in ["twosided", "less", "greater"]') + return pivot + + def _approx_intervals(self, + level=0.9): + + if not hasattr(self, "_families"): + self._construct_families() + + lower, upper = [], [] + + for m in range(self.ntarget): + # construction of intervals from families follows `selectinf.learning.core` + family = self._families[m] + observed_target = self.observed_target[m] + + l, u = family.equal_tailed_interval(observed_target, + alpha=1 - level) + + var_target = 1. / ((self.precs[m])[0, 0]) + + lower.append(l * var_target + observed_target) + upper.append(u * var_target + observed_target) + + return np.asarray(lower), np.asarray(upper) + + ### Private method + def _construct_density(self): + + precs = {} + S = {} + r = {} + + p = self.cov_target_score.shape[1] + + for m in range(self.ntarget): + observed_target_uni = (self.observed_target[m]).reshape((1,)) + cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1)) + prec_target = 1. / cov_target_uni + cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p)) + + regress_score_target = cov_target_score_uni.T.dot(prec_target) + resid_score_target = (self.score_offset - regress_score_target.dot(observed_target_uni)).reshape( + (regress_score_target.shape[0],)) + + regress_opt_target = self.regress_opt.dot(regress_score_target) + resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],)) + + prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot( + self.prec_opt).dot(regress_opt_target) + + _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer + _r = (1. / prec_target_nosel).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P) + _S = np.linalg.inv(prec_target_nosel).dot(prec_target) + + S[m] = _S + r[m] = _r + precs[m] = prec_target_nosel + + self.precs = precs + self.S = S + self.r = r + + +def solve_barrier_affine_jacobian_py(conjugate_arg, + precision, + feasible_point, + con_linear, + con_offset, + C, + active_dirs, + useJacobian=True, + step=1, + nstep=2000, + min_its=500, + tol=1.e-12): + """ + This needs to be updated to actually use the Jacobian information (in self.C) + arguments + conjugate_arg: \\bar{\\Sigma}^{-1} \bar{\\mu} + precision: \\bar{\\Sigma}^{-1} + feasible_point: gamma's from fitting + con_linear: linear part of affine constraint used for barrier function + con_offset: offset part of affine constraint used for barrier function + C: V^T Q^{-1} \\Lambda V + active_dirs: + """ + scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) + + if feasible_point is None: + feasible_point = 1. / scaling + + def objective(gs): + p1 = -gs.T.dot(conjugate_arg) + p2 = gs.T.dot(precision).dot(gs) / 2. + if useJacobian: + p3 = - jacobian_grad_hess(gs, C, active_dirs)[0] + else: + p3 = 0 + p4 = log(1. + 1. / ((con_offset - con_linear.dot(gs)) / scaling)).sum() + return p1 + p2 + p3 + p4 + + def grad(gs): + p1 = -conjugate_arg + precision.dot(gs) + p2 = -con_linear.T.dot(1. / (scaling + con_offset - con_linear.dot(gs))) + if useJacobian: + p3 = - jacobian_grad_hess(gs, C, active_dirs)[1] + else: + p3 = 0 + p4 = 1. / (con_offset - con_linear.dot(gs)) + return p1 + p2 + p3 + p4 + + def barrier_hessian(gs): # contribution of barrier and jacobian to hessian + p1 = con_linear.T.dot(np.diag(-1. / ((scaling + con_offset - con_linear.dot(gs)) ** 2.) + + 1. / ((con_offset - con_linear.dot(gs)) ** 2.))).dot(con_linear) + if useJacobian: + p2 = - jacobian_grad_hess(gs, C, active_dirs)[2] + else: + p2 = 0 + return p1 + p2 + + current = feasible_point + current_value = np.inf + + for itercount in range(nstep): + cur_grad = grad(current) + + # make sure proposal is feasible + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + if np.all(con_offset - con_linear.dot(proposal) > 0): + break + step *= 0.5 + if count >= 40: + raise ValueError('not finding a feasible point') + # make sure proposal is a descent + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + proposed_value = objective(proposal) + if proposed_value <= current_value: + break + step *= 0.5 + if count >= 20: + if not (np.isnan(proposed_value) or np.isnan(current_value)): + break + else: + raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) + + # stop if relative decrease is small + + if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: + current = proposal + current_value = proposed_value + break + + current = proposal + current_value = proposed_value + + if itercount % 4 == 0: + step *= 2 + + hess = inv(precision + barrier_hessian(current)) + return current_value, current, hess + + +# Jacobian calculations +def calc_GammaMinus(gamma, active_dirs): + """Calculate Gamma^minus (as a function of gamma vector, active directions) + """ + to_diag = [[g] * (ug.size - 1) for (g, ug) in zip(gamma, active_dirs.values())] + return block_diag(*[i for gp in to_diag for i in gp]) + + +def jacobian_grad_hess(gamma, C, active_dirs): + """ Calculate the log-Jacobian (scalar), gradient (gamma.size vector) and hessian (gamma.size square matrix) + """ + if C.shape == (0, 0): # when all groups are size one, C will be an empty array + return 0, 0, 0 + else: + GammaMinus = calc_GammaMinus(gamma, active_dirs) + + # eigendecomposition + #evalues, evectors = eig(GammaMinus + C) + + # log Jacobian + #J = log(evalues).sum() + J = np.log(np.linalg.det(GammaMinus + C)) + + # inverse + #GpC_inv = evectors.dot(np.diag(1 / evalues).dot(evectors.T)) + GpC_inv = np.linalg.inv(GammaMinus + C) + + # summing matrix (gamma.size by C.shape[0]) + S = block_diag(*[np.ones((1, ug.size - 1)) for ug in active_dirs.values()]) + + # gradient + grad_J = S.dot(GpC_inv.diagonal()) + + # hessian + hess_J = -S.dot(np.multiply(GpC_inv, GpC_inv.T).dot(S.T)) + + return J, grad_J, hess_J + +def _check_groups(groups): + """Make sure that the user-specific groups are ok + There are a number of assumptions that group_lasso makes about + how groups are specified. Specifically, we assume that + `groups` is a 1-d array_like of integers that are sorted in + increasing order, start at 0, and have no gaps (e.g., if there + is a group 2 and a group 4, there must also be at least one + feature in group 3). + This function checks the user-specified group scheme and + raises an exception if it finds any problems. + Sorting feature groups is potentially tedious for the user and + in future we might do this for them. + """ + + # check array_like + agroups = np.array(groups) + + # check dimension + if len(agroups.shape) != 1: + raise ValueError("Groups are not a 1D array_like") + + # check sorted + if np.any(agroups[:-1] > agroups[1:]) < 0: + raise ValueError("Groups are not sorted") + + # check integers + if not np.issubdtype(agroups.dtype, np.integer): + raise TypeError("Groups are not integers") + + # check starts with 0 + if not np.amin(agroups) == 0: + raise ValueError("First group is not 0") + + # check for no skipped groups + if not np.all(np.diff(np.unique(agroups)) == 1): + raise ValueError("Some group is skipped") diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py new file mode 100644 index 000000000..5a25ff11e --- /dev/null +++ b/selectinf/randomized/base.py @@ -0,0 +1,19 @@ +import numpy as np + +def target_query_Interactspec(query_spec, + regress_target_score, + cov_target): + + QS = query_spec + prec_target = np.linalg.inv(cov_target) + + U1 = regress_target_score.T.dot(prec_target) + U2 = U1.T.dot(QS.M2.dot(U1)) + U3 = U1.T.dot(QS.M3.dot(U1)) + U5 = U1.T.dot(QS.M4) + U4 = QS.M4.dot(QS.cond_cov).dot(U5.T) + + return U1, U2, U3, U4, U5 + + + diff --git a/selectinf/randomized/drop_losers.py b/selectinf/randomized/drop_losers.py new file mode 100644 index 000000000..6c5d45cb3 --- /dev/null +++ b/selectinf/randomized/drop_losers.py @@ -0,0 +1,146 @@ +from __future__ import print_function, division + +import numpy as np +import pandas as pd + +from .query import gaussian_query + +from .randomization import randomization +from ..base import TargetSpec + +class drop_losers(gaussian_query): + + def __init__(self, + df, # should have columns 'arm', 'stage', 'data' + K=1): # how many should we move forward? + + self.df = df + self.K = K + + grouped_arm = df.groupby('arm') + self.std = grouped_arm.std()['data'] + self.means = grouped_arm.mean()['data'] + self.stages = dict([(k, v) for k, v in df.groupby('stage')]) + stage1 = df['stage'].min() + stage2 = df['stage'].max() + + df1 = self.stages[stage1] + df2 = self.stages[stage2] + + stage1_means = df1.groupby('arm').mean().sort_values('data', ascending=False) + self._winners = sorted(list(stage1_means.index[:K])) + best_loser = stage1_means['data'].iloc[K] + + n1 = df1.groupby('arm').count() + n2 = df2.groupby('arm').count() + self._n1_win = n1_win = np.array([n1.loc[lambda df: df.index == winner]['data'].iloc[0] + for winner in self._winners]) + self._n2_win = n2_win = np.array([n2.loc[lambda df: df.index == winner]['data'].iloc[0] + for winner in self._winners]) + std_win = self.std.loc[self._winners] + + A = -np.identity(K) + b = -np.ones(K) * best_loser + linear = np.identity(K) + observed_subgrad = np.zeros(K) + + # Work out the implied randomization variance + # Let X1=X[stage1].mean(), X2=X[stage2].mean() and Xf = X.mean() + # with n1=len(stage1), n2=len(stage2) + + # X1 = Xf + n2/n1 * (Xf-X2) + # = Xf + n2/(n1+n2) * (X1-X2) + # so randomization term is w=n2/(n1+n2) * (X1-X2) + # with variance + # n2**2 / (n1+n2)**2 * (1/n1 + 1/n2) + # = n2**2 / (n1+n2)**2 * (n1+n2) / (n1*n2) + # = n2 / (n1 * (n1 + n2)) + + mult = n2_win / (n1_win * (n1_win + n2_win)) + + # needed for gaussian_query api + + self.randomizer = randomization.gaussian(np.diag(std_win**2) * mult) + self.observed_opt_state = np.asarray(stage1_means['data'].iloc[:K]) + self.observed_score_state = -np.asarray(self.means[self._winners]) # problem is a minimization + self.selection_variable = {'winners':self._winners} + + self._unscaled_cov_score = np.diag(std_win**2) * (1/n1_win + 1/n2_win) + self._setup_sampler(A, b, linear, observed_subgrad) + + def MLE_inference(self, + level=0.9, + solve_args={'tol':1.e-12}): + """ + + Parameters + ---------- + + level : float, optional + Confidence level. + + solve_args : dict, optional + Arguments passed to solver. + + """ + + observed_target = np.asarray(self.means[self._winners]) + std_win = np.asarray(self.std.loc[self._winners]) + cov_target = np.diag(std_win**2 / (self._n1_win + self._n2_win)) + regress_target_score = -np.identity(observed_target.shape[0]) + + target_spec = TargetSpec(observed_target, + cov_target, + regress_target_score, + dispersion=1, + alternatives=['greater']*observed_target.shape[0]) + + result = gaussian_query.selective_MLE(self, + target_spec, + level=level, + solve_args=solve_args) + result[0].insert(0, 'arm', self._winners) + return result + + def summary(self, + level=0.9, + ndraw=10000, + burnin=2000): + + """ + Produce p-values and confidence intervals for targets + of model including selected features + + Parameters + ---------- + + level : float + Confidence level. + + ndraw : int (optional) + Defaults to 1000. + + burnin : int (optional) + Defaults to 1000. + + """ + observed_target = np.asarray(self.means[self._winners]) + std_win = self.std.loc[self._winners] + cov_target = np.diag(std_win**2 / (self._n1_win + self._n2_win)) + regress_target_score = -np.identity(observed_target.shape[0]) + + target_spec = TargetSpec(observed_target, + cov_target, + regress_target_score, + dispersion=1, + alternatives=['greater']*observed_target.shape[0]) + + result = gaussian_query.summary(self, + target_spec, + ndraw=ndraw, + level=level, + burnin=burnin, + compute_intervals=True) + result.insert(0, 'arm', self._winners) + return result + diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py new file mode 100644 index 000000000..209c40c97 --- /dev/null +++ b/selectinf/randomized/exact_reference.py @@ -0,0 +1,114 @@ +from __future__ import division, print_function + +import numpy as np +from scipy.stats import norm as ndist + +from ..distributions.discrete_family import discrete_family +from .approx_reference import grid_inference + +class exact_grid_inference(grid_inference): + + def log_reference(self, + observed_target, + cov_target, + linear_coef, + grid): + + QS = self.query_spec + TS = self.target_spec ## we don't use this; it seems that we have already formed the target_specific elements which we input as arguments for this functions + + if np.asarray(observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + ref_hat = [] + + cond_precision = np.linalg.inv(QS.cond_cov) + num_opt = cond_precision.shape[0] + num_con = QS.linear_part.shape[0] + + for k in range(grid.shape[0]): + # in the usual D = N + Gamma theta.hat, + # regress_opt_target is "something" times Gamma, + # where "something" comes from implied Gaussian + # cond_mean is "something" times D + # Gamma is cov_target_score.T.dot(prec_target) + + cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + + QS.cond_mean) + + #direction for decomposing o + + eta = cond_precision.dot(linear_coef).dot(cov_target) + + implied_mean = (eta.T.dot(cond_mean_grid)).item() + implied_cov = (eta.T.dot(QS.cond_cov).dot(eta)).item() + implied_prec = 1./implied_cov + + _A = QS.cond_cov.dot(eta) * implied_prec + R = np.identity(num_opt) - _A.dot(eta.T) + + A = QS.linear_part.dot(_A).reshape((-1,)) + b = -QS.linear_part.dot(R).dot(QS.observed_soln) + + trunc_ = np.true_divide((QS.offset + b), A) + + neg_indx = np.asarray([j for j in range(num_con) if A[j] < 0.]) + pos_indx = np.asarray([j for j in range(num_con) if A[j] > 0.]) + + if pos_indx.shape[0]>0 and neg_indx.shape[0]>0: + + trunc_lower = np.max(trunc_[neg_indx]) + trunc_upper = np.min(trunc_[pos_indx]) + + lower_limit = (trunc_lower - implied_mean) * np.sqrt(implied_prec) + upper_limit = (trunc_upper - implied_mean) * np.sqrt(implied_prec) + + ref_hat.append(np.log(ndist.cdf(upper_limit) - ndist.cdf(lower_limit))) + + elif pos_indx.shape[0] == num_con: + + trunc_upper = np.min(trunc_[pos_indx]) + + upper_limit = (trunc_upper - implied_mean) * np.sqrt(implied_prec) + + ref_hat.append(np.log(ndist.cdf(upper_limit))) + + else: + + trunc_lower = np.max(trunc_[neg_indx]) + + lower_limit = (trunc_lower - implied_mean) * np.sqrt(implied_prec) + + ref_hat.append(np.log(1. - ndist.cdf(lower_limit))) + + return np.asarray(ref_hat) + + def _construct_families(self): + + QS = self.query_spec + TS = self.target_spec + + precs, S, r, T = self.conditional_spec + + self._families = [] + + for m in range(self.ntarget): + + observed_target_uni = (TS.observed_target[m]).reshape((1,)) + cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1)) + + var_target = 1. / (precs[m][0, 0]) + + log_ref = self.log_reference(observed_target_uni, + cov_target_uni, + T[m], + self.stat_grid[m]) + + logW = (log_ref - 0.5 * (self.stat_grid[m] - TS.observed_target[m]) ** 2 / var_target) + logW -= logW.max() + self._families.append(discrete_family(self.stat_grid[m], + np.exp(logW))) + + + + diff --git a/selectinf/randomized/group_lasso.py b/selectinf/randomized/group_lasso.py index 751afd84e..4f1860599 100644 --- a/selectinf/randomized/group_lasso.py +++ b/selectinf/randomized/group_lasso.py @@ -103,8 +103,8 @@ def fit(self, p = self.nfeature - (self.initial_soln, - self.initial_subgrad) = self._solve_randomized_problem( + (self.observed_soln, + self.observed_subgrad) = self._solve_randomized_problem( perturb=perturb, solve_args=solve_args) @@ -124,7 +124,7 @@ def fit(self, for g in sorted(np.unique(self.penalty.groups)): group = self.penalty.groups == g - soln = self.initial_soln + soln = self.observed_soln if np.linalg.norm(soln[group]) * tol * np.linalg.norm(soln): ordered_groups.append(g) ordered_vars.extend(np.nonzero(group)[0]) @@ -184,8 +184,6 @@ def fit(self, for i, var in enumerate(ordered_vars): opt_linear[var, i] += self.ridge_term - opt_offset = self.initial_subgrad - # for group LASSO, we will have # a different sampler for each group # based on conditioning on all scalings @@ -201,7 +199,7 @@ def fit(self, dispersion = 1. (prec_opt_linear, - logdens_linear) = self._get_precision_opt_linear(opt_linear, + regress_opt) = self._get_precision_opt_linear(opt_linear, ordered_vars, dispersion) @@ -209,9 +207,8 @@ def fit(self, ordered_groups, ordered_vars, opt_linear, - opt_offset, self.observed_score_state, - self.initial_subgrad, + self.observed_subgrad, self.penalty, prec_opt_linear).items(): @@ -231,8 +228,8 @@ def fit(self, self.observed_score_state, log_cond_density, log_det, - (np.atleast_2d(logdens_linear.T[:,idx_g].dot(dir_g).T), - opt_offset)) + (np.atleast_2d(regress_opt.T[:,idx_g].dot(dir_g).T), + self.observed_subgrad)) self._samplers[group] = sampler self._setup = True @@ -242,8 +239,8 @@ def fit(self, def summary(self, observed_target, group_assignments, - target_cov, - target_score_cov, + cov_target, + cov_target_score, alternatives, parameter=None, level=0.9, @@ -268,8 +265,8 @@ def summary(self, intervals_) = self._inference_for_target( observed_target[group_idx], group, - target_cov[group_idx][:, group_idx], - target_score_cov[group_idx], + cov_target[group_idx][:, group_idx], + cov_target_score[group_idx], [alternatives[i] for i in np.nonzero(group_idx)[0]], parameter=parameter[group_idx], level=level, @@ -284,8 +281,8 @@ def summary(self, def _inference_for_target(self, observed_target, group, - target_cov, - target_score_cov, + cov_target, + cov_target_score, alternatives, opt_sample=None, target_sample=None, @@ -332,8 +329,8 @@ def _inference_for_target(self, ndraw = opt_sample.shape[0] pivots = sampler.coefficient_pvalues(observed_target, - target_cov, - target_score_cov, + cov_target, + cov_target_score, parameter=parameter, sample=(opt_sample, logW), normal_sample=target_sample, @@ -341,8 +338,8 @@ def _inference_for_target(self, if not np.all(parameter == 0): pvalues = sampler.coefficient_pvalues(observed_target, - target_cov, - target_score_cov, + cov_target, + cov_target_score, parameter=np.zeros_like(parameter), sample=(opt_sample, logW), normal_sample=target_sample, @@ -354,8 +351,8 @@ def _inference_for_target(self, if compute_intervals: intervals = sampler.confidence_intervals(observed_target, - target_cov, - target_score_cov, + cov_target, + cov_target_score, sample=(opt_sample, logW), normal_sample=target_sample, level=level) @@ -375,9 +372,9 @@ def _get_precision_opt_linear(self, opt_linear, variables, dispersion=1): cond_precision = opt_linear.T.dot(value) cond_cov = np.linalg.inv(cond_precision) - logdens_linear = cond_cov.dot(value.T) * dispersion # is this last dispersion correct? + regress_opt = -cond_cov.dot(value.T) * dispersion # is this last dispersion correct? - return value, logdens_linear + return value, regress_opt def _solve_randomized_problem(self, perturb=None, @@ -396,12 +393,12 @@ def _solve_randomized_problem(self, problem = rr.simple_problem(self.loglike, self.penalty) - initial_soln = problem.solve(quad, **solve_args) - initial_subgrad = -(self.loglike.smooth_objective(initial_soln, + observed_soln = problem.solve(quad, **solve_args) + observed_subgrad = -(self.loglike.smooth_objective(observed_soln, 'grad') + - quad.objective(initial_soln, 'grad')) + quad.objective(observed_soln, 'grad')) - return initial_soln, initial_subgrad + return observed_soln, observed_subgrad @staticmethod def gaussian(X, @@ -858,7 +855,6 @@ def _reference_density_info(soln, ordered_groups, # ordering is used in assumptions about columns opt_linear ordered_variables, opt_linear, - opt_offset, observed_score_state, observed_subgrad, group_lasso_penalty, @@ -1064,28 +1060,17 @@ def sample(self, ndraw): def selective_MLE(self, observed_target, - target_cov, - target_score_cov, + cov_target, + cov_target_score, # initial (observed) value of optimization variables -- # used as a feasible point. # precise value used only for independent estimator - init_soln, + observed_soln, solve_args={'tol':1.e-12}, level=0.9): raise NotImplementedError - def reparam_map(self, - parameter_target, - observed_target, - target_cov, - target_score_cov, - init_soln, - solve_args={'tol':1.e-12}, - useC=True): - - raise NotImplementedError - def _log_density_ray(self, candidate, direction, diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py index 9c73512ca..0c87a0524 100644 --- a/selectinf/randomized/lasso.py +++ b/selectinf/randomized/lasso.py @@ -12,9 +12,9 @@ from .query import gaussian_query from .randomization import randomization -from ..base import restricted_estimator -from ..algorithms.debiased_lasso import (debiasing_matrix, - pseudoinverse_debiasing_matrix) +from ..base import (restricted_estimator, + _compute_hessian, + _pearsonX2) #### High dimensional version #### - parametric covariance @@ -105,12 +105,12 @@ def fit(self, p = self.nfeature - (self.initial_soln, - self.initial_subgrad) = self._solve_randomized_problem( + (self.observed_soln, + self.observed_subgrad) = self._solve_randomized_problem( perturb=perturb, solve_args=solve_args) - active_signs = np.sign(self.initial_soln) + active_signs = np.sign(self.observed_soln) active = self._active = active_signs != 0 self._lagrange = self.penalty.weights @@ -133,8 +133,8 @@ def fit(self, # initial state for opt variables - initial_scalings = np.fabs(self.initial_soln[active]) - initial_unpenalized = self.initial_soln[self._unpenalized] + initial_scalings = np.fabs(self.observed_soln[active]) + initial_unpenalized = self.observed_soln[self._unpenalized] self.observed_opt_state = np.concatenate([initial_scalings, initial_unpenalized]) @@ -143,6 +143,8 @@ def fit(self, self._overall, solve_args=solve_args) + # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator + beta_bar = np.zeros(p) beta_bar[overall] = _beta_unpenalized self._beta_full = beta_bar @@ -156,26 +158,28 @@ def fit(self, # U for unpenalized # -E for inactive + # compute part of hessian + + _hessian, _hessian_active, _hessian_unpen = _compute_hessian(self.loglike, + beta_bar, + active, + unpenalized) + + # fill in pieces of query + opt_linear = np.zeros((p, num_opt_var)) _score_linear_term = np.zeros((p, num_opt_var)) - # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator - - X, y = self.loglike.data - W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar)) - _hessian_active = np.dot(X.T, X[:, active] * W[:, None]) - _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None]) - _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen]) # set the observed score (data dependent) state # observed_score_state is - # \nabla \ell(\bar{\beta}_E) + Q(\bar{\beta}_E) \bar{\beta}_E + # \nabla \ell(\bar{\beta}_E) - Q(\bar{\beta}_E) \bar{\beta}_E # in linear regression this is _ALWAYS_ -X^TY # # should be asymptotically equivalent to - # \nabla \ell(\beta^*) + Q(\beta^*)\beta^* + # \nabla \ell(\beta^*) - Q(\beta^*)\beta^* self.observed_score_state = _score_linear_term.dot(_beta_unpenalized) self.observed_score_state[inactive] += self.loglike.smooth_objective(beta_bar, 'grad')[inactive] @@ -209,23 +213,33 @@ def signed_basis_vector(p, j, s): + self.ridge_term * unpenalized_directions) - opt_offset = self.initial_subgrad - + self.opt_linear = opt_linear # now make the constraints and implied gaussian self._setup = True A_scaling = -np.identity(num_opt_var) b_scaling = np.zeros(num_opt_var) + #### to be fixed -- set the cov_score here without dispersion + + self._unscaled_cov_score = _hessian + + self.num_opt_var = num_opt_var + self._setup_sampler_data = (A_scaling[:active.sum()], b_scaling[:active.sum()], opt_linear, - opt_offset) - if num_opt_var > 0: - self._setup_sampler(*self._setup_sampler_data) + self.observed_subgrad) return active_signs + def setup_inference(self, + dispersion): + + if self.num_opt_var > 0: + self._setup_sampler(*self._setup_sampler_data, + dispersion=dispersion) + def _solve_randomized_problem(self, perturb=None, solve_args={'tol': 1.e-12, 'min_its': 50}): @@ -243,12 +257,95 @@ def _solve_randomized_problem(self, problem = rr.simple_problem(self.loglike, self.penalty) - initial_soln = problem.solve(quad, **solve_args) - initial_subgrad = -(self.loglike.smooth_objective(initial_soln, + observed_soln = problem.solve(quad, **solve_args) + observed_subgrad = -(self.loglike.smooth_objective(observed_soln, 'grad') + - quad.objective(initial_soln, 'grad')) + quad.objective(observed_soln, 'grad')) + + return observed_soln, observed_subgrad + + @staticmethod + def fromsample(samples, + feature_weights, + proportion_select=0.5, + estimator=None, + covariance=None): + r""" + Squared-error LASSO with feature weights. + Objective function is (before randomization) + + .. math:: + + \beta \mapsto \frac{1}{2} (\beta-\hat{\beta})'\hat{\Sigma}^{-1}(\beta-\hat{\beta}) + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\lambda$ is `feature_weights`, $\hat{\beta}$` is the row mean + of `samples` and $\hat{\Sigma}$ is its sample covariance. + + Parameters + ---------- + + samples : ndarray + Shape (B,p) -- the sample data matrix (e.g. bootstrap samples) + + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + ridge_term : float + How big a ridge term to add? + + randomizer_scale : float + Scale for IID components of randomizer. + + Returns + ------- + + L : `selection.randomized.lasso.lasso` + + """ + + samples = np.asarray(samples) + B, p = samples.shape + + if estimator is None: + estimator = samples.mean(0) + if covariance is None: + covariance = np.cov(samples.T) + + U, D, V = np.linalg.svd(covariance) + + sqrt_prec = U / np.sqrt(D)[None,:] + sqrt_prec = sqrt_prec.dot(U.T) + prec = sqrt_prec.dot(sqrt_prec.T) + np.testing.assert_allclose(prec, np.linalg.inv(covariance)) + Y = prec.dot(estimator) + + loglike = rr.glm.gaussian(sqrt_prec, + Y, + coef=1., + quadratic=None) + + # proportion should be used somewhere here... + + multiplier = 1 / proportion_select - 1 + randomizer = randomization.gaussian(prec * multiplier) + + idx = np.random.choice(B, 1)[0] + perturb = (samples[idx] - estimator) * np.sqrt(multiplier) + return (lasso(loglike, + np.asarray(feature_weights), + 0, + randomizer, + perturb=perturb), + perturb) - return initial_soln, initial_subgrad @staticmethod def gaussian(X, @@ -267,7 +364,7 @@ def gaussian(X, \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i| where $\lambda$ is `feature_weights`. The ridge term - is determined by the Hessian and `np.std(Y)` by default, + is determined by the Hessian by default, as is the randomizer scale. Parameters @@ -300,9 +397,6 @@ def gaussian(X, randomizer_scale : float Scale for IID components of randomizer. - randomizer : str - One of ['laplace', 'logistic', 'gaussian'] - Returns ------- @@ -310,21 +404,25 @@ def gaussian(X, """ - loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic) + loglike = rr.glm.gaussian(X, + Y, + coef=1. / sigma ** 2, + quadratic=quadratic) n, p = X.shape mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: - ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) + ridge_term = np.sqrt(mean_diag) / (np.sqrt(n - 1) * sigma**2) if randomizer_scale is None: - randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) + randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y, ddof=1) randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) return lasso(loglike, np.asarray(feature_weights) / sigma ** 2, - ridge_term, randomizer) + ridge_term, + randomizer) @staticmethod def logistic(X, @@ -377,9 +475,6 @@ def logistic(X, randomizer_scale : float Scale for IID components of randomizer. - randomizer : str - One of ['laplace', 'logistic', 'gaussian'] - Returns ------- @@ -393,7 +488,7 @@ def logistic(X, mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: - ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) + ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 @@ -459,23 +554,21 @@ def coxph(X, randomizer_scale : float Scale for IID components of randomizer. - randomizer : str - One of ['laplace', 'logistic', 'gaussian'] - Returns ------- L : `selection.randomized.lasso.lasso` """ - loglike = coxph_obj(X, times, status, quadratic=quadratic) + n, p = X.shape + loglike = rr.glm.cox(X, times, status, quadratic=quadratic) # scale for randomization seems kind of meaningless here... mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: - ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1) + ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) @@ -532,9 +625,6 @@ def poisson(X, randomizer_scale : float Scale for IID components of randomizer. - randomizer : str - One of ['laplace', 'logistic', 'gaussian'] - Returns ------- @@ -549,7 +639,7 @@ def poisson(X, mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: - ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1) + ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.)) @@ -616,9 +706,6 @@ def sqrt_lasso(X, randomizer_scale : float Scale for IID components of randomizer. - randomizer : str - One of ['laplace', 'logistic', 'gaussian'] - Returns ------- @@ -677,137 +764,6 @@ def sqrt_lasso(X, return obj -# private functions - -# functions construct targets of inference -# and covariance with score representation - -def selected_targets(loglike, - W, - features, - sign_info={}, - dispersion=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): - - X, y = loglike.data - n, p = X.shape - - Xfeat = X[:, features] - Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) - observed_target = restricted_estimator(loglike, features, solve_args=solve_args) - cov_target = np.linalg.inv(Qfeat) - _score_linear = -Xfeat.T.dot(W[:, None] * X).T - crosscov_target_score = _score_linear.dot(cov_target) - alternatives = ['twosided'] * features.sum() - features_idx = np.arange(p)[features] - - for i in range(len(alternatives)): - if features_idx[i] in sign_info.keys(): - alternatives[i] = sign_info[features_idx[i]] - - if dispersion is None: # use Pearson's X^2 - dispersion = ((y - loglike.saturated_loss.mean_function( - Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) - - return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives - -def full_targets(loglike, - W, - features, - dispersion=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): - - X, y = loglike.data - n, p = X.shape - features_bool = np.zeros(p, np.bool) - features_bool[features] = True - features = features_bool - - # target is one-step estimator - - Qfull = X.T.dot(W[:, None] * X) - Qfull_inv = np.linalg.inv(Qfull) - full_estimator = loglike.solve(**solve_args) - cov_target = Qfull_inv[features][:, features] - observed_target = full_estimator[features] - crosscov_target_score = np.zeros((p, cov_target.shape[0])) - crosscov_target_score[features] = -np.identity(cov_target.shape[0]) - - if dispersion is None: # use Pearson's X^2 - dispersion = (((y - loglike.saturated_loss.mean_function(X.dot(full_estimator))) ** 2 / W).sum() / - (n - p)) - - alternatives = ['twosided'] * features.sum() - return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives - -def debiased_targets(loglike, - W, - features, - sign_info={}, - penalty=None, #required kwarg - dispersion=None, - approximate_inverse='JM', - debiasing_args={}): - - if penalty is None: - raise ValueError('require penalty for consistent estimator') - - X, y = loglike.data - n, p = X.shape - features_bool = np.zeros(p, np.bool) - features_bool[features] = True - features = features_bool - - # relevant rows of approximate inverse - - - if approximate_inverse == 'JM': - Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(W)[:, None], - np.nonzero(features)[0], - **debiasing_args)) / n - else: - Qinv_hat = np.atleast_2d(pseudoinverse_debiasing_matrix(X * np.sqrt(W)[:, None], - np.nonzero(features)[0], - **debiasing_args)) - - problem = rr.simple_problem(loglike, penalty) - nonrand_soln = problem.solve() - G_nonrand = loglike.smooth_objective(nonrand_soln, 'grad') - - observed_target = nonrand_soln[features] - Qinv_hat.dot(G_nonrand) - - if p > n: - M1 = Qinv_hat.dot(X.T) - cov_target = (M1 * W[None, :]).dot(M1.T) - crosscov_target_score = -(M1 * W[None, :]).dot(X).T - else: - Qfull = X.T.dot(W[:, None] * X) - cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T)) - crosscov_target_score = -Qinv_hat.dot(Qfull).T - - if dispersion is None: # use Pearson's X^2 - Xfeat = X[:, features] - Qrelax = Xfeat.T.dot(W[:, None] * Xfeat) - relaxed_soln = nonrand_soln[features] - np.linalg.inv(Qrelax).dot(G_nonrand[features]) - dispersion = (((y - loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / W).sum() / - (n - features.sum())) - - alternatives = ['twosided'] * features.sum() - return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives - -def form_targets(target, - loglike, - W, - features, - **kwargs): - _target = {'full':full_targets, - 'selected':selected_targets, - 'debiased':debiased_targets}[target] - return _target(loglike, - W, - features, - **kwargs) - class split_lasso(lasso): """ @@ -819,7 +775,8 @@ def __init__(self, feature_weights, proportion_select, ridge_term=0, - perturb=None): + perturb=None, + estimate_dispersion=True): (self.loglike, self.feature_weights, @@ -832,11 +789,11 @@ def __init__(self, self.nfeature = p = self.loglike.shape[0] self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.) self._initial_omega = perturb + self.estimate_dispersion = estimate_dispersion def fit(self, solve_args={'tol': 1.e-12, 'min_its': 50}, - perturb=None, - estimate_dispersion=True): + perturb=None): signs = lasso.fit(self, solve_args=solve_args, @@ -846,31 +803,43 @@ def fit(self, # we need to estimate a dispersion parameter # we then setup up the sampler again + df_fit = len(self.selection_variable['variables']) - if estimate_dispersion: + if self.estimate_dispersion: X, y = self.loglike.data n, p = X.shape - df_fit = len(self.selection_variable['variables']) - dispersion = 2 * (self.loglike.smooth_objective(self._beta_full, + dispersion = 2 * (self.loglike.smooth_objective(self._beta_full, 'func') / - (n - df_fit)) + (n - df_fit)) - # run setup again after - # estimating dispersion + self.dispersion_ = dispersion + # run setup again after + # estimating dispersion - print(dispersion, 'dispersion') - if df_fit > 0: - self._setup_sampler(*self._setup_sampler_data, - dispersion=dispersion) + self.df_fit = df_fit return signs + + def setup_inference(self, + dispersion): + + if self.df_fit > 0: + + if dispersion is None: + self._setup_sampler(*self._setup_sampler_data, + dispersion=self.dispersion_) + + else: + self._setup_sampler(*self._setup_sampler_data, + dispersion=dispersion) + def _setup_implied_gaussian(self, opt_linear, - opt_offset, - dispersion): + observed_subgrad, + dispersion=1): # key observation is that the covariance of the added noise is # roughly dispersion * (1 - pi) / pi * X^TX (in OLS regression, similar for other @@ -879,7 +848,7 @@ def _setup_implied_gaussian(self, # because opt_linear has shape p x E with the columns # being those non-zero columns of the solution. Above S_E = np.diag(signs) # the conditional precision is S_E Q[E][:,E] * pi / ((1 - pi) * dispersion) S_E - # and logdens_linear is Q[E][:,E]^{-1} S_E + # and regress_opt is -Q[E][:,E]^{-1} S_E # padded with zeros # to be E x p @@ -897,12 +866,39 @@ def _setup_implied_gaussian(self, assert(np.linalg.norm(cond_precision - cond_precision.T) / np.linalg.norm(cond_precision) < 1.e-6) cond_cov = np.linalg.inv(cond_precision) - logdens_linear = np.zeros((len(ordered_vars), + regress_opt = np.zeros((len(ordered_vars), self.nfeature)) - logdens_linear[:, ordered_vars] = cond_cov * signs[None, :] / (dispersion * ratio) - cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset) + regress_opt[:, ordered_vars] = -cond_cov * signs[None, :] / (dispersion * ratio) + cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad) + + ## probably missing a dispersion in the denominator + # this might be too big -- use a linear_transform instead + prod_score_prec_unnorm = np.identity(self.nfeature) / (dispersion * ratio) - return cond_mean, cond_cov, cond_precision, logdens_linear + ## probably missing a multiplicative factor of ratio + cov_rand = self._unscaled_cov_score * (dispersion * ratio) + + M1 = prod_score_prec_unnorm * dispersion + M4 = M1.dot(opt_linear) + M2 = M1.dot(cov_rand).dot(M1.T) + M3 = M4.dot(cond_cov).dot(M4.T) + + # would be nice to not store these? + + self.M1 = M1 + self.M2 = M2 + self.M3 = M3 + self.M4 = M4 + self.M5 = M1.dot(self.observed_score_state + observed_subgrad) + + return (cond_mean, + cond_cov, + cond_precision, + M1, + M2, + M3, + self.M4, + self.M5) def _solve_randomized_problem(self, # optional binary vector @@ -925,18 +921,18 @@ def _solve_randomized_problem(self, quad = rr.identity_quadratic(self.ridge_term, 0, 0, - 0,) + 0) randomized_loss = self.loglike.subsample(self._selection_idx) randomized_loss.coef *= inv_frac problem = rr.simple_problem(randomized_loss, self.penalty) - initial_soln = problem.solve(quad, **solve_args) - initial_subgrad = -(self.loglike.smooth_objective(initial_soln, - 'grad') + - quad.objective(initial_soln, 'grad')) + observed_soln = problem.solve(quad, **solve_args) + observed_subgrad = -(randomized_loss.smooth_objective(observed_soln, + 'grad') + + quad.objective(observed_soln, 'grad')) - return initial_soln, initial_subgrad + return observed_soln, observed_subgrad @staticmethod def gaussian(X, @@ -945,7 +941,7 @@ def gaussian(X, proportion, sigma=1., quadratic=None, - ridge_term=0): + estimate_dispersion=True): r""" Squared-error LASSO with feature weights. Objective function is (before randomization) @@ -973,6 +969,9 @@ def gaussian(X, `feature_weights` to 0. If `feature_weights` is a float, then all parameters are penalized equally. + proportion: float + What proportion of data to use for selection. + sigma : float (optional) Noise variance. Set to 1 if `covariance_estimator` is not None. This scales the loglikelihood by `sigma**(-2)`. @@ -982,12 +981,6 @@ def gaussian(X, Can also be a linear term by setting quadratic coefficient to 0. - randomizer_scale : float - Scale for IID components of randomizer. - - randomizer : str - One of ['laplace', 'logistic', 'gaussian'] - Returns ------- @@ -999,12 +992,188 @@ def gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic) + + return split_lasso(loglike, + np.asarray(feature_weights)/sigma**2, + proportion, + estimate_dispersion=estimate_dispersion) + + + @staticmethod + def logistic(X, + successes, + feature_weights, + proportion, + trials=None, + quadratic=None): + r""" + Logistic LASSO with feature weights (before randomization) + + .. math:: + + \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\ell$ is the negative of the logistic + log-likelihood (half the logistic deviance) + and $\lambda$ is `feature_weights`. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + successes : ndarray + Shape (n,) -- response vector. An integer number of successes. + For data that is proportions, multiply the proportions + by the number of trials first. + + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + + proportion: float + What proportion of data to use for selection. + + trials : ndarray (optional) + Number of trials per response, defaults to + ones the same shape as Y. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.lasso.lasso` + + """ + + loglike = rr.glm.logistic(X, + successes, + trials=trials, + quadratic=quadratic) + + return split_lasso(loglike, + np.asarray(feature_weights), + proportion) + + @staticmethod + def coxph(X, + times, + status, + feature_weights, + proportion, + quadratic=None): + r""" + Cox proportional hazards LASSO with feature weights. + Objective function is (before randomization) + + .. math:: + + \beta \mapsto \ell^{\text{Cox}}(\beta) + + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\ell^{\text{Cox}}$ is the + negative of the log of the Cox partial + likelihood and $\lambda$ is `feature_weights`. + Uses Efron's tie breaking method. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + times : ndarray + Shape (n,) -- the survival times. + + status : ndarray + Shape (n,) -- the censoring status. + + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + + proportion: float + What proportion of data to use for selection. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.lasso.lasso` + + """ n, p = X.shape + loglike = rr.glm.cox(X, times, status, quadratic=quadratic) - mean_diag = np.mean((X ** 2).sum(0)) + return split_lasso(loglike, + np.asarray(feature_weights), + proportion) + + @staticmethod + def poisson(X, + counts, + feature_weights, + proportion, + quadratic=None, + ridge_term=None): + r""" + Poisson log-linear LASSO with feature weights. + Objective function is (before randomization) + + .. math:: + + \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\ell^{\text{Poisson}}$ is the negative + of the log of the Poisson likelihood (half the deviance) + and $\lambda$ is `feature_weights`. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + counts : ndarray + Shape (n,) -- the response. + + feature_weights: [float, sequence] + Penalty weights. An intercept, or other unpenalized + features are handled by setting those entries of + `feature_weights` to 0. If `feature_weights` is + a float, then all parameters are penalized equally. + + proportion: float + What proportion of data to use for selection. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.lasso.lasso` + + """ + loglike = rr.glm.poisson(X, counts, quadratic=quadratic) return split_lasso(loglike, - np.asarray(feature_weights) / sigma ** 2, + np.asarray(feature_weights), proportion) diff --git a/selectinf/randomized/modelQ.py b/selectinf/randomized/modelQ.py index d960af043..c239f8821 100644 --- a/selectinf/randomized/modelQ.py +++ b/selectinf/randomized/modelQ.py @@ -114,9 +114,9 @@ def fit(self, quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0) quad_data = rr.identity_quadratic(0, 0, -self.X.T.dot(self.y), 0) problem = rr.simple_problem(self.loss, self.penalty) - self.initial_soln = problem.solve(quad + quad_data, **solve_args) + self.observed_soln = problem.solve(quad + quad_data, **solve_args) - active_signs = np.sign(self.initial_soln) + active_signs = np.sign(self.observed_soln) active = self._active = active_signs != 0 self._lagrange = self.penalty.weights @@ -135,13 +135,13 @@ def fit(self, # initial state for opt variables - initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + - quad_data.objective(self.initial_soln, 'grad') + - quad.objective(self.initial_soln, 'grad')) - self.initial_subgrad = initial_subgrad + observed_subgrad = -(self.loss.smooth_objective(self.observed_soln, 'grad') + + quad_data.objective(self.observed_soln, 'grad') + + quad.objective(self.observed_soln, 'grad')) + self.observed_subgrad = observed_subgrad - initial_scalings = np.fabs(self.initial_soln[active]) - initial_unpenalized = self.initial_soln[self._unpenalized] + initial_scalings = np.fabs(self.observed_soln[active]) + initial_unpenalized = self.observed_soln[self._unpenalized] self.observed_opt_state = np.concatenate([initial_scalings, initial_unpenalized]) @@ -177,6 +177,8 @@ def fit(self, _hessian_active = self.Q[:, active] _hessian_unpen = self.Q[:, unpenalized] + self._unscaled_cov_score = self.Q + _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen]) # set the observed score (data dependent) state @@ -210,7 +212,7 @@ def signed_basis_vector(p, j, s): # two transforms that encode score and optimization # variable roles - self.opt_transform = (_opt_linear_term, self.initial_subgrad) + self.opt_transform = (_opt_linear_term, self.observed_subgrad) self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0])) # now store everything needed for the projections @@ -224,7 +226,7 @@ def signed_basis_vector(p, j, s): # compute implied mean and covariance - opt_linear, opt_offset = self.opt_transform + opt_linear, observed_subgrad = self.opt_transform A_scaling = -np.identity(self.num_opt_var) b_scaling = np.zeros(self.num_opt_var) @@ -232,7 +234,7 @@ def signed_basis_vector(p, j, s): self._setup_sampler(A_scaling, b_scaling, opt_linear, - opt_offset) + observed_subgrad) return active_signs @@ -417,9 +419,9 @@ def selected_targets(self, features=None, dispersion=None): Xfeat = X[:,features] Qfeat = self.Q[features][:,features] - Gfeat = self.loss.smooth_objective(self.initial_soln, 'grad')[features] - Xfeat.T.dot(y) + Gfeat = self.loss.smooth_objective(self.observed_soln, 'grad')[features] - Xfeat.T.dot(y) Qfeat_inv = np.linalg.inv(Qfeat) - one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat) + one_step = self.observed_soln[features] - Qfeat_inv.dot(Gfeat) cov_target = Qfeat_inv.dot(Xfeat.T.dot(Xfeat)).dot(Qfeat_inv) _score_linear = -self.Q[features] crosscov_target_score = _score_linear.dot(cov_target) @@ -447,9 +449,9 @@ def full_targets(self, features=None, dispersion=None): # target is one-step estimator Qfull = self.Q - G = self.loss.smooth_objective(self.initial_soln, 'grad') - X.T.dot(y) + G = self.loss.smooth_objective(self.observed_soln, 'grad') - X.T.dot(y) Qfull_inv = np.linalg.inv(Qfull) - one_step = self.initial_soln - Qfull_inv.dot(G) + one_step = self.observed_soln - Qfull_inv.dot(G) cov_target = Qfull_inv[features][:,features] observed_target = one_step[features] crosscov_target_score = np.zeros((p, cov_target.shape[0])) diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py new file mode 100644 index 000000000..1dd16572f --- /dev/null +++ b/selectinf/randomized/posterior_inference.py @@ -0,0 +1,293 @@ +from __future__ import division, print_function + +import numpy as np +import typing + +from scipy.stats import norm as ndist, invgamma +from scipy.linalg import fractional_matrix_power + +from ..algorithms.barrier_affine import solve_barrier_affine_py +from .selective_MLE import mle_inference +from .base import target_query_Interactspec + +class PosteriorAtt(typing.NamedTuple): + + logPosterior: float + grad_logPosterior: np.ndarray + +class posterior(object): + """ + Parameters + ---------- + observed_target : ndarray + Observed estimate of target. + cov_target : ndarray + Estimated covariance of target. + cov_target_score : ndarray + Estimated covariance of target and score of randomized query. + prior : callable + A callable object that takes a single argument + `parameter` of the same shape as `observed_target` + and returns (value of log prior, gradient of log prior) + dispersion : float, optional + A dispersion parameter for likelihood. + solve_args : dict + Arguments passed to solver of affine barrier problem. + """ + + def __init__(self, + query_spec, + target_spec, + dispersion, + prior, + solve_args={'tol': 1.e-12}): + + self.query_spec = QS = query_spec + self.target_spec = TS = target_spec + self.solve_args = solve_args + + G = mle_inference(query_spec, + target_spec, + solve_args=solve_args) + + result, self.inverse_info, self.log_ref = G.solve_estimating_eqn() + + self.ntarget = TS.cov_target.shape[0] + self.nopt = QS.cond_cov.shape[0] + + self.initial_estimate = np.asarray(result['MLE']) + self.dispersion = dispersion + + ### Note for an informative prior we might want to change this... + self.prior = prior + + self._get_marginal_parameters() + + def log_posterior(self, + target_parameter, + sigma=1): + + """ + Parameters + ---------- + target_parameter : ndarray + Value of parameter at which to evaluate + posterior and its gradient. + sigma : ndarray + Noise standard deviation. + """ + + QS = self.query_spec + TS = self.target_spec + + (prec_marginal, + linear_coef, + offset_coef, + r, + S, + prec_target_nosel) = self._get_marginal_parameters() + + sigmasq = sigma ** 2 + + target = S.dot(target_parameter) + r + + mean_marginal = linear_coef.dot(target) + offset_coef + conjugate_marginal = prec_marginal.dot(mean_marginal) + + solver = solve_barrier_affine_py + + val, soln, hess = solver(conjugate_marginal, + prec_marginal, + QS.observed_soln, + QS.linear_part, + QS.offset, + **self.solve_args) + + log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. + + log_lik = -((TS.observed_target - target).T.dot(prec_target_nosel).dot(TS.observed_target - target)) / 2. \ + - log_normalizer + + grad_lik = S.T.dot(prec_target_nosel.dot(TS.observed_target) - prec_target_nosel.dot(target) + - linear_coef.T.dot(prec_marginal.dot(soln) - conjugate_marginal)) + + log_prior, grad_prior = self.prior(target_parameter) + + log_posterior = self.dispersion * (log_lik - self.log_ref) / sigmasq + log_prior + grad_log_posterior = self.dispersion * grad_lik / sigmasq + grad_prior + + return PosteriorAtt(log_posterior, + grad_log_posterior) + + ### Private method + + def _get_marginal_parameters(self): + """ + This works out the implied covariance + of optimization varibles as a function + of randomization as well how to compute + implied mean as a function of the true parameters. + """ + + QS = self.query_spec + TS = self.target_spec + + U1, U2, U3, U4, U5 = self._form_interaction_pieces(QS, + TS.regress_target_score, + TS.cov_target) + + prec_target = np.linalg.inv(TS.cov_target) + cond_precision = np.linalg.inv(QS.cond_cov) + + prec_target_nosel = prec_target + U2 - U3 + + _P = -(U1.T.dot(QS.M5) + U2.dot(TS.observed_target)) + + bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target) + + QS.M4.dot(QS.cond_mean)) - _P) + + ###set parameters for the marginal distribution of optimization variables + + _Q = np.linalg.inv(prec_target_nosel + U3) + prec_marginal = cond_precision - U5.T.dot(_Q).dot(U5) + linear_coef = QS.cond_cov.dot(U5.T) + offset_coef = QS.cond_mean - linear_coef.dot(TS.observed_target) + + ###set parameters for the marginal distribution of target + + r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target)) + S = np.linalg.inv(prec_target_nosel).dot(prec_target) + + return (prec_marginal, + linear_coef, + offset_coef, + r, + S, + prec_target_nosel) + + def _form_interaction_pieces(self, + QS, + regress_target_score, + cov_target): + + return target_query_Interactspec(QS, + regress_target_score, + cov_target) +### sampling methods + +def langevin_sampler(selective_posterior, + nsample=2000, + nburnin=100, + proposal_scale=None, + step=1.): + + state = selective_posterior.initial_estimate + stepsize = 1. / (step * selective_posterior.ntarget) + + if proposal_scale is None: + proposal_scale = selective_posterior.inverse_info + + sampler = langevin(state, + selective_posterior.log_posterior, + proposal_scale, + stepsize, + np.sqrt(selective_posterior.dispersion)) + + samples = np.zeros((nsample, selective_posterior.ntarget)) + + for i, sample in enumerate(sampler): + sampler.scaling = np.sqrt(selective_posterior.dispersion) + samples[i, :] = sample.copy() + #print("sample ", i, samples[i,:]) + if i == nsample - 1: + break + + return samples[nburnin:, :] + + +def gibbs_sampler(selective_posterior, + nsample=2000, + nburnin=100, + proposal_scale=None, + step=1.): + state = selective_posterior.initial_estimate + stepsize = 1. / (step * selective_posterior.ntarget) + + if proposal_scale is None: + proposal_scale = selective_posterior.inverse_info + + sampler = langevin(state, + selective_posterior.log_posterior, + proposal_scale, + stepsize, + np.sqrt(selective_posterior.dispersion)) + samples = np.zeros((nsample, selective_posterior.ntarget)) + scale_samples = np.zeros(nsample) + scale_update = np.sqrt(selective_posterior.dispersion) + for i in range(nsample): + sample = sampler.__next__() + samples[i, :] = sample + + import sys + sys.stderr.write('a: ' + str(0.1 + + selective_posterior.ntarget + + selective_posterior.ntarget / 2)+'\n') + sys.stderr.write('scale: ' + str(0.1 - ((scale_update ** 2) * sampler.posterior_[0])) + '\n') + sys.stderr.write('scale_update: ' + str(scale_update) + '\n') + sys.stderr.write('initpoint: ' + str(sampler.posterior_[0]) + '\n') + scale_update_sq = invgamma.rvs(a=(0.1 + + selective_posterior.ntarget + + selective_posterior.ntarget / 2), + scale=0.1 - ((scale_update ** 2) * sampler.posterior_.logPosterior), + size=1) + scale_samples[i] = np.sqrt(scale_update_sq) + sampler.scaling = np.sqrt(scale_update_sq) + + return samples[nburnin:, :], scale_samples[nburnin:] + + +class langevin(object): + + def __init__(self, + initial_condition, + gradient_map, + proposal_scale, + stepsize, + scaling): + + (self.state, + self.gradient_map, + self.stepsize) = (np.copy(initial_condition), + gradient_map, + stepsize) + self.proposal_scale = proposal_scale + self._shape = self.state.shape[0] + self._sqrt_step = np.sqrt(self.stepsize) + self._noise = ndist(loc=0, scale=1) + self.sample = np.copy(initial_condition) + self.scaling = scaling + + self.proposal_sqrt = fractional_matrix_power(self.proposal_scale, 0.5) + + def __iter__(self): + return self + + def next(self): + return self.__next__() + + def __next__(self): + while True: + self.posterior_ = self.gradient_map(self.state, self.scaling) + _proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape)) + candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_.grad_logPosterior) + + np.sqrt(2.) * _proposal * self._sqrt_step) + + if not np.all(np.isfinite(self.gradient_map(candidate, self.scaling)[1])): + self.stepsize *= 0.5 + self._sqrt_step = np.sqrt(self.stepsize) + else: + self.state[:] = candidate + break + return self.state + + diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py index b697afd85..d9a2a83cb 100644 --- a/selectinf/randomized/query.py +++ b/selectinf/randomized/query.py @@ -1,56 +1,66 @@ -import functools -from itertools import product +from typing import NamedTuple +import numpy as np, pandas as pd -import numpy as np -from scipy.stats import norm as ndist -from scipy.optimize import bisect +from ..constraints.affine import constraints +from .posterior_inference import (posterior, langevin_sampler) +from .approx_reference import approximate_grid_inference +from .exact_reference import exact_grid_inference +from .selective_MLE import mle_inference -from regreg.affine import power_L -import regreg.api as rr +class QuerySpec(NamedTuple): -from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C + # law of o|S,u -from ..distributions.api import discrete_family -from ..constraints.affine import (sample_from_constraints, - constraints) -from ..algorithms.softmax import softmax_objective + cond_mean : np.ndarray + cond_cov : np.ndarray -class query(object): + # how S enters into E[o|S,u] + opt_linear : np.ndarray # not sure if needed -- absorbed into M4,M5? + + # constraints + + linear_part : np.ndarray + offset : np.ndarray + + # score / randomization relationship + + M2 : np.ndarray + M3 : np.ndarray + M4 : np.ndarray + M5 : np.ndarray + + # observed values + + observed_opt_state : np.ndarray + observed_score_state : np.ndarray + observed_subgrad : np.ndarray + observed_soln : np.ndarray + observed_score : np.ndarray + +class gaussian_query(object): r""" This class is the base of randomized selective inference based on convex programs. - The main mechanism is to take an initial penalized program - .. math:: - \text{minimize}_B \ell(B) + {\cal P}(B) - and add a randomization and small ridge term yielding - .. math:: - - \text{minimize}_B \ell(B) + {\cal P}(B) - + \text{minimize}_B \ell(B) + {\cal P}(B) - \langle \omega, B \rangle + \frac{\epsilon}{2} \|B\|^2_2 - """ def __init__(self, randomization, perturb=None): """ - Parameters ---------- - randomization : `selection.randomized.randomization.randomization` - Instance of a randomization scheme. + Instance of a randomization scheme. Describes the law of $\omega$. - perturb : ndarray, optional Value of randomization vector, an instance of $\omega$. - - """ self.randomization = randomization self.perturb = perturb @@ -58,24 +68,41 @@ def __init__(self, randomization, perturb=None): self._randomized = False self._setup = False + @property + def specification(self): + return QuerySpec(cond_mean=self.cond_mean, + cond_cov=self.cond_cov, + opt_linear=self.opt_linear, + linear_part=self.affine_con.linear_part, + offset=self.affine_con.offset, + M2=self.M2, + M3=self.M3, + M4=self.M4, + M5=self.M5, + observed_opt_state=self.observed_opt_state, + observed_score_state=self.observed_score_state, + observed_subgrad=self.observed_subgrad, + observed_soln=self.observed_opt_state, + observed_score=self.observed_score_state + self.observed_subgrad) + + # Methods reused by subclasses def randomize(self, perturb=None): """ - The actual randomization step. - Parameters ---------- - perturb : ndarray, optional Value of randomization vector, an instance of $\omega$. - """ if not self._randomized: - self.randomized_loss, self._initial_omega = self.randomization.randomize(self.loss, self.epsilon, perturb=perturb) + (self.randomized_loss, + self._initial_omega) = self.randomization.randomize(self.loss, + self.epsilon, + perturb=perturb) self._randomized = True def get_sampler(self): @@ -87,170 +114,8 @@ def set_sampler(self, sampler): sampler = property(get_sampler, set_sampler, doc='Sampler of optimization (augmented) variables.') - # implemented by subclasses - - def solve(self): - - raise NotImplementedError('abstract method') - - def setup_sampler(self): - """ - Setup query to prepare for sampling. - Should set a few key attributes: - - - observed_score_state - - observed_opt_state - - opt_transform - - """ - raise NotImplementedError('abstract method -- only keyword arguments') - - def summary(self, - observed_target, - target_cov, - target_score_cov, - alternatives, - opt_sample=None, - target_sample=None, - parameter=None, - level=0.9, - ndraw=10000, - burnin=2000, - compute_intervals=False): - """ - Produce p-values and confidence intervals for targets - of model including selected features - - Parameters - ---------- - - target : one of ['selected', 'full'] - - features : np.bool - Binary encoding of which features to use in final - model and targets. - - parameter : np.array - Hypothesized value for parameter -- defaults to 0. - - level : float - Confidence level. - - ndraw : int (optional) - Defaults to 1000. - - burnin : int (optional) - Defaults to 1000. - - compute_intervals : bool - Compute confidence intervals? - - dispersion : float (optional) - Use a known value for dispersion, or Pearson's X^2? - """ - - if parameter is None: - parameter = np.zeros_like(observed_target) - - if opt_sample is None: - opt_sample, logW = self.sampler.sample(ndraw, burnin) - else: - if len(opt_sample) == 1: # only a sample, so weights are 1s - opt_sample = opt_sample[0] - logW = np.zeros(ndraw) - else: - opt_sample, logW = opt_sample - ndraw = opt_sample.shape[0] - - pivots = self.sampler.coefficient_pvalues(observed_target, - target_cov, - target_score_cov, - parameter=parameter, - sample=(opt_sample, logW), - normal_sample=target_sample, - alternatives=alternatives) - - MLE_intervals = self.selective_MLE(observed_target, - target_cov, - target_score_cov)[5] - - if not np.all(parameter == 0): - pvalues = self.sampler.coefficient_pvalues(observed_target, - target_cov, - target_score_cov, - parameter=np.zeros_like(parameter), - sample=(opt_sample, logW), - normal_sample=target_sample, - alternatives=alternatives) - else: - pvalues = pivots - - intervals = None - if compute_intervals: - - MLE_intervals = self.selective_MLE(observed_target, - target_cov, - target_score_cov)[4] - - intervals = self.sampler.confidence_intervals(observed_target, - target_cov, - target_score_cov, - sample=(opt_sample, logW), - normal_sample=target_sample, - initial_guess=MLE_intervals, - level=level) - - return pivots, pvalues, intervals - - def selective_MLE(self, - observed_target, - target_cov, - target_score_cov, - level=0.9, - solve_args={'tol':1.e-12}): - """ - - Parameters - ---------- - - observed_target : ndarray - Observed estimate of target. - - target_cov : ndarray - Estimated covaraince of target. - - target_score_cov : ndarray - Estimated covariance of target and score of randomized query. - - level : float, optional - Confidence level. - - solve_args : dict, optional - Arguments passed to solver. - - """ - - return self.sampler.selective_MLE(observed_target, - target_cov, - target_score_cov, - self.observed_opt_state, - level=level, - solve_args=solve_args) - - -class gaussian_query(query): - - useC = True - - """ - A class with Gaussian perturbation to the objective -- - easy to apply CLT to such things - """ - def fit(self, perturb=None): - p = self.nfeature - # take a new perturbation if supplied if perturb is not None: self._initial_omega = perturb @@ -259,1461 +124,193 @@ def fit(self, perturb=None): # Private methods - def _setup_sampler(self, - A_scaling, - b_scaling, + def _setup_sampler(self, + linear_part, + offset, opt_linear, - opt_offset, - # optional dispersion parameter - # for covariance of randomization + observed_subgrad, dispersion=1): - if not np.all(A_scaling.dot(self.observed_opt_state) - b_scaling <= 0): + A, b = linear_part, offset + + if not np.all(A.dot(self.observed_opt_state) - b <= 0): raise ValueError('constraints not satisfied') - (cond_mean, - cond_cov, - cond_precision, - logdens_linear) = self._setup_implied_gaussian(opt_linear, - opt_offset, - dispersion) - - def log_density(logdens_linear, offset, cond_prec, opt, score): - if score.ndim == 1: - mean_term = logdens_linear.dot(score.T + offset).T - else: - mean_term = logdens_linear.dot(score.T + offset[:, None]).T - arg = opt + mean_term - return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1) - - log_density = functools.partial(log_density, - logdens_linear, - opt_offset, - cond_precision) + (cond_mean, + cond_cov, + cond_precision) = self._setup_implied_gaussian(opt_linear, + observed_subgrad, + dispersion=dispersion)[:3] self.cond_mean, self.cond_cov = cond_mean, cond_cov - affine_con = constraints(A_scaling, - b_scaling, + affine_con = constraints(A, + b, mean=cond_mean, covariance=cond_cov) - self.sampler = affine_gaussian_sampler(affine_con, - self.observed_opt_state, - self.observed_score_state, - log_density, - (logdens_linear, opt_offset), - selection_info=self.selection_variable, - useC=self.useC) - - def _setup_implied_gaussian(self, - opt_linear, - opt_offset, - # optional dispersion parameter - # for covariance of randomization + self.affine_con = affine_con + self.opt_linear = opt_linear + self.observed_subgrad = observed_subgrad + + def _setup_implied_gaussian(self, + opt_linear, + observed_subgrad, dispersion=1): - _, prec = self.randomizer.cov_prec - prec = prec / dispersion + cov_rand, prec = self.randomizer.cov_prec + + if np.asarray(prec).shape in [(), (0,)]: + prod_score_prec_unnorm = self._unscaled_cov_score * prec + else: + prod_score_prec_unnorm = self._unscaled_cov_score.dot(prec) if np.asarray(prec).shape in [(), (0,)]: cond_precision = opt_linear.T.dot(opt_linear) * prec cond_cov = np.linalg.inv(cond_precision) - logdens_linear = cond_cov.dot(opt_linear.T) * prec + regress_opt = -cond_cov.dot(opt_linear.T) * prec else: cond_precision = opt_linear.T.dot(prec.dot(opt_linear)) cond_cov = np.linalg.inv(cond_precision) - logdens_linear = cond_cov.dot(opt_linear.T).dot(prec) - - cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset) - - return cond_mean, cond_cov, cond_precision, logdens_linear + regress_opt = -cond_cov.dot(opt_linear.T).dot(prec) -class multiple_queries(object): + # regress_opt is regression coefficient of opt onto score + u... - ''' - Combine several queries of a given data - through randomized algorithms. - ''' + cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad) - def __init__(self, objectives): - ''' + M1 = prod_score_prec_unnorm * dispersion + M2 = M1.dot(cov_rand).dot(M1.T) + M4 = M1.dot(opt_linear) + M3 = M4.dot(cond_cov).dot(M4.T) - Parameters - ---------- - - objectives : sequence - A sequences of randomized objective functions. - - Notes - ----- - - Each element of `objectives` must - have a `setup_sampler` method that returns - a description of the distribution of the - data implicated in the objective function, - typically through the score or gradient - of the objective function. - These descriptions are passed to a function - `form_covariances` to linearly decompose - each score in terms of a target - and an asymptotically independent piece. - - Returns - ------- - - None - ''' - - self.objectives = objectives - - def fit(self): - for objective in self.objectives: - if not objective._setup: - objective.fit() - - def summary(self, - observed_target, - opt_sampling_info, # a sequence of (target_cov, score_cov) - # objects in theory all target_cov - # should be about the same... - alternatives=None, - parameter=None, - level=0.9, - ndraw=5000, - burnin=2000, - compute_intervals=False): - - """ - Produce p-values and confidence intervals for targets - of model including selected features - - Parameters - ---------- - - observed_target : ndarray - Observed estimate of target. - - alternatives : [str], optional - Sequence of strings describing the alternatives, - should be values of ['twosided', 'less', 'greater'] - - parameter : np.array - Hypothesized value for parameter -- defaults to 0. - - level : float - Confidence level. - - ndraw : int (optional) - Defaults to 1000. - - burnin : int (optional) - Defaults to 1000. - - compute_intervals : bool - Compute confidence intervals? - - """ - - if parameter is None: - parameter = np.zeros_like(observed_target) - - if alternatives is None: - alternatives = ['twosided'] * observed_target.shape[0] - - if len(self.objectives) != len(opt_sampling_info): - raise ValueError("number of objectives and sampling cov infos do not match") - - self.opt_sampling_info = [] - for i in range(len(self.objectives)): - if opt_sampling_info[i][0] is None or opt_sampling_info[i][1] is None: - raise ValueError("did not input target and score covariance info") - opt_sample, opt_logW = self.objectives[i].sampler.sample(ndraw, burnin) - self.opt_sampling_info.append((self.objectives[i].sampler, - opt_sample, - opt_logW, - opt_sampling_info[i][0], - opt_sampling_info[i][1])) - - pivots = self.coefficient_pvalues(observed_target, - parameter=parameter, - alternatives=alternatives) - - if not np.all(parameter == 0): - pvalues = self.coefficient_pvalues(observed_target, - parameter=parameter, - alternatives=alternatives) - else: - pvalues = pivots - - intervals = None - if compute_intervals: - intervals = self.confidence_intervals(observed_target, - level) - - return pivots, pvalues, intervals + self.M1 = M1 + self.M2 = M2 + self.M3 = M3 + self.M4 = M4 + self.M5 = M1.dot(self.observed_score_state + observed_subgrad) + return (cond_mean, + cond_cov, + cond_precision, + M1, + M2, + M3, + self.M4, + self.M5) + + def inference(self, + target_spec, + method, + level=0.90, + method_args={}): - def coefficient_pvalues(self, - observed_target, - parameter=None, - sample_args=(), - alternatives=None): - - ''' - Construct selective p-values - for each parameter of the target. - - Parameters - ---------- - - observed_target : ndarray - Observed estimate of target. - - parameter : ndarray (optional) - A vector of parameters with shape `self.shape` - at which to evaluate p-values. Defaults - to `np.zeros(self.shape)`. - - sample_args : sequence - Arguments to `self.sample` if sample is not found - for a given objective. - - alternatives : [str], optional - Sequence of strings describing the alternatives, - should be values of ['twosided', 'less', 'greater'] - - Returns - ------- - pvalues : ndarray - - ''' - - for i in range(len(self.objectives)): - if self.opt_sampling_info[i][1] is None: - _sample, _logW = self.objectives[i].sampler.sample(*sample_args) - self.opt_sampling_info[i][1] = _sample - self.opt_sampling_info[i][2] = _logW - - ndraw = self.opt_sampling_info[0][1].shape[0] # nsample for normal samples taken from the 1st objective - - _intervals = optimization_intervals(self.opt_sampling_info, - observed_target, - ndraw) - - pvals = [] - - for i in range(observed_target.shape[0]): - keep = np.zeros_like(observed_target) - keep[i] = 1. - pvals.append(_intervals.pivot(keep, candidate=parameter[i], alternative=alternatives[i])) - - return np.array(pvals) - - - def confidence_intervals(self, - observed_target, - sample_args=(), - level=0.9): - - ''' - Construct selective confidence intervals - for each parameter of the target. - - Parameters - ---------- - - observed_target : ndarray - Observed estimate of target. - - sample_args : sequence - Arguments to `self.sample` if sample is not found - for a given objective. - - level : float - Confidence level. - - Returns - ------- - limits : ndarray - Confidence intervals for each target. - - ''' - - for i in range(len(self.objectives)): - if self.opt_sampling_info[i][1] is None: - _sample, _logW = self.objectives[i].sampler.sample(*sample_args) - self.opt_sampling_info[i][1] = _sample - self.opt_sampling_info[i][2] = _logW - - ndraw = self.opt_sampling_info[0][1].shape[0] # nsample for normal samples taken from the 1st objective - - _intervals = optimization_intervals(self.opt_sampling_info, - observed_target, - ndraw) - - limits = [] - - for i in range(observed_target.shape[0]): - keep = np.zeros_like(observed_target) - keep[i] = 1. - limits.append(_intervals.confidence_interval(keep, level=level)) - - return np.array(limits) - - -class optimization_sampler(object): - - def __init__(self): - raise NotImplementedError("abstract method") - - def sample(self): - raise NotImplementedError("abstract method") - - def log_cond_density(self, - opt_sample, - target_sample, - transform=None): - """ - Density of opt_sample | target_sample """ - raise NotImplementedError("abstract method") - - def hypothesis_test(self, - test_stat, - observed_value, - target_cov, - score_cov, - sample_args=(), - sample=None, - parameter=0, - alternative='twosided'): - - ''' - Sample `target` from selective density - using sampler with - gradient map `self.gradient` and - projection map `self.projection`. - - Parameters - ---------- - - test_stat : callable - Test statistic to evaluate on sample from - selective distribution. - - observed_value : float - Observed value of test statistic. - Used in p-value calculation. - - sample_args : sequence - Arguments to `self.sample` if sample is None. - - sample : np.array (optional) - If not None, assumed to be a sample of shape (-1,) + `self.shape` - representing a sample of the target from parameters. - Allows reuse of the same sample for construction of confidence - intervals, hypothesis tests, etc. If not None, - `ndraw, burnin, stepsize` are ignored. - - parameter : np.float (optional) - - alternative : ['greater', 'less', 'twosided'] - What alternative to use. - - Returns - ------- - - pvalue : float - ''' - - if alternative not in ['greater', 'less', 'twosided']: - raise ValueError("alternative should be one of ['greater', 'less', 'twosided']") - - if sample is None: - sample, logW = self.sample(*sample_args) - sample = np.atleast_2d(sample) - - if parameter is None: - parameter = self.reference - - sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample])) - - target_inv_cov = np.linalg.inv(target_cov) - delta = target_inv_cov.dot(parameter - self.reference) - W = np.exp(sample.dot(delta) + logW) - - family = discrete_family(sample_test_stat, W) - pval = family.cdf(0, observed_value) - - if alternative == 'greater': - return 1 - pval - elif alternative == 'less': - return pval - else: - return 2 * min(pval, 1 - pval) - - def confidence_intervals(self, - observed_target, - target_cov, - score_cov, - sample_args=(), - sample=None, - normal_sample=None, - level=0.9, - initial_guess=None): - ''' - Parameters ---------- - - observed : np.float - A vector of parameters with shape `self.shape`, - representing coordinates of the target. - - sample_args : sequence - Arguments to `self.sample` if sample is None. - - sample : np.array (optional) - If not None, assumed to be a sample of shape (-1,) + `self.shape` - representing a sample of the target from parameters `self.reference`. - Allows reuse of the same sample for construction of confidence - intervals, hypothesis tests, etc. - - level : float (optional) - Specify the - confidence level. - - initial_guess : np.float - Initial guesses at upper and lower limits, optional. - - Notes - ----- - - Construct selective confidence intervals - for each parameter of the target. - - Returns - ------- - - intervals : [(float, float)] - List of confidence intervals. - ''' - - if sample is None: - sample, logW = self.sample(*sample_args) - sample = np.vstack([sample]*5) # why times 5? - logW = np.hstack([logW]*5) - else: - sample, logW = sample - - ndraw = sample.shape[0] - - _intervals = optimization_intervals([(self, - sample, - logW, - target_cov, - score_cov)], - observed_target, - ndraw, - normal_sample=normal_sample) - - limits = [] - - for i in range(observed_target.shape[0]): - keep = np.zeros_like(observed_target) - keep[i] = 1. - if initial_guess is None: - l, u = _intervals.confidence_interval(keep, level=level) - else: - l, u = _intervals.confidence_interval(keep, level=level, - guess=initial_guess[i]) - limits.append((l, u)) - - return np.array(limits) - - def coefficient_pvalues(self, - observed_target, - target_cov, - score_cov, - parameter=None, - sample_args=(), - sample=None, - normal_sample=None, - alternatives=None): - ''' - Construct selective p-values - for each parameter of the target. - - Parameters - ---------- - - observed : np.float - A vector of parameters with shape `self.shape`, - representing coordinates of the target. - - parameter : np.float (optional) - A vector of parameters with shape `self.shape` - at which to evaluate p-values. Defaults - to `np.zeros(self.shape)`. - - sample_args : sequence - Arguments to `self.sample` if sample is None. - - sample : np.array (optional) - If not None, assumed to be a sample of shape (-1,) + `self.shape` - representing a sample of the target from parameters `self.reference`. - Allows reuse of the same sample for construction of confidence - intervals, hypothesis tests, etc. - - alternatives : list of ['greater', 'less', 'twosided'] - What alternative to use. + target_spec : TargetSpec + Information needed to specify the target. + method : str + One of ['selective_MLE', 'approx', 'exact', 'posterior'] + level : float + Confidence level or posterior quantiles. + method_args : dict + Dict of arguments to be optionally passed to the methods. Returns ------- - pvalues : np.float - - ''' - - if alternatives is None: - alternatives = ['twosided'] * observed_target.shape[0] - - if sample is None: - sample, logW = self.sample(*sample_args) - else: - sample, logW = sample - ndraw = sample.shape[0] - - if parameter is None: - parameter = np.zeros(observed_target.shape[0]) - - _intervals = optimization_intervals([(self, - sample, - logW, - target_cov, - score_cov)], - observed_target, - ndraw, - normal_sample=normal_sample) - pvals = [] - - for i in range(observed_target.shape[0]): - keep = np.zeros_like(observed_target) - keep[i] = 1. - pvals.append(_intervals.pivot(keep, - candidate=parameter[i], - alternative=alternatives[i])) - - return np.array(pvals) - - def _reconstruct_score_from_target(self, - target_sample, - transform=None): - if transform is not None: - direction, nuisance = transform - score_sample = (np.multiply.outer(target_sample, - direction) + - nuisance[None, :]) - else: - score_sample = target_sample - return score_sample - -class affine_gaussian_sampler(optimization_sampler): - - ''' - Sample from an affine truncated Gaussian - ''' - - def __init__(self, - affine_con, - initial_point, - observed_score_state, - log_cond_density, - logdens_transform, # described how score enters log_density. - selection_info=None, - useC=False): - - ''' - Parameters - ---------- - - affine_con : `selection.constraints.affine.constraints` - Affine constraints - - initial_point : ndarray - Feasible point for affine constraints. - - observed_score_state : ndarray - Observed score of convex loss (slightly modified). - Essentially (asymptotically) equivalent - to $\nabla \ell(\beta^*) + - Q(\beta^*)\beta^*$ where $\beta^*$ is population - minimizer. For linear regression, it is always - $-X^Ty$. - - log_cond_density : callable - Density of optimization variables given score - - logdens_transform : tuple - Description of how conditional mean - of optimization variables depends on score. - - selection_info : optional - Function of optimization variables that - will be conditioned on. - - useC : bool, optional - Use python or C solver. - - ''' - - self.affine_con = affine_con - - self.covariance = self.affine_con.covariance - self.mean = self.affine_con.mean - - self.initial_point = initial_point - self.observed_score_state = observed_score_state - self.selection_info = selection_info - self._log_cond_density = log_cond_density - self.logdens_transform = logdens_transform - self.useC = useC - - def log_cond_density(self, - opt_sample, - target_sample, - transform=None): - - if transform is not None: - direction, nuisance = transform - return self._log_density_ray(0, # candidate - # has been added to - # target - direction, - nuisance, - target_sample, - opt_sample) - else: - # target must be in score coordinates - score_sample = target_sample - - # probably should switch - # order of signature - return self._log_cond_density(opt_sample, - score_sample) - - def sample(self, ndraw, burnin): - ''' - Sample `target` from selective density - using projected Langevin sampler with - gradient map `self.gradient` and - projection map `self.projection`. - - Parameters - ---------- - - ndraw : int - How long a chain to return? - - burnin : int - How many samples to discard? - - ''' - - _sample = sample_from_constraints(self.affine_con, - self.initial_point, - ndraw=ndraw, - burnin=burnin) - return _sample, np.zeros(_sample.shape[0]) - - def selective_MLE(self, - observed_target, - target_cov, - target_score_cov, - # initial (observed) value of optimization variables -- - # used as a feasible point. - # precise value used only for independent estimator - init_soln, - solve_args={'tol':1.e-12}, - level=0.9): - """ - Selective MLE based on approximation of - CGF. - - Parameters - ---------- - - observed_target : ndarray - Observed estimate of target. - - target_cov : ndarray - Estimated covaraince of target. - - target_score_cov : ndarray - Estimated covariance of target and score of randomized query. - - init_soln : ndarray - Feasible point for optimization problem. - - level : float, optional - Confidence level. - - solve_args : dict, optional - Arguments passed to solver. + summary : pd.DataFrame + Statistical summary for specified targets. """ - return selective_MLE(observed_target, - target_cov, - target_score_cov, - init_soln, - self.mean, - self.covariance, - self.logdens_transform[0], - self.affine_con.linear_part, - self.affine_con.offset, - solve_args=solve_args, - level=level, - useC=self.useC) - - def reparam_map(self, - parameter_target, - observed_target, - target_cov, - target_score_cov, - init_soln, - solve_args={'tol':1.e-12}, - useC=True): - - prec_target = np.linalg.inv(target_cov) - ndim = prec_target.shape[0] - logdens_lin, _ = self.logdens_transform - target_lin = - logdens_lin.dot(target_score_cov.T.dot(prec_target)) - target_offset = self.mean - target_lin.dot(observed_target) - - cov_opt = self.covariance - prec_opt = np.linalg.inv(cov_opt) - - mean_param = target_lin.dot(parameter_target) + target_offset - conjugate_arg = prec_opt.dot(mean_param) - - if useC: - solver = solve_barrier_affine_C - else: - solver = _solve_barrier_affine_py - - val, soln, hess = solver(conjugate_arg, - prec_opt, # JT: I think this quadratic is wrong should involve target_cov and target_lin too? - init_soln, - self.affine_con.linear_part, - self.affine_con.offset, - **solve_args) - - inter_map = target_cov.dot(target_lin.T.dot(prec_opt)) - param_map = parameter_target + inter_map.dot(mean_param - soln) - log_normalizer_map = ((parameter_target.T.dot(prec_target + target_lin.T.dot(prec_opt).dot(target_lin)).dot(parameter_target))/2. - - parameter_target.T.dot(target_lin.T).dot(prec_opt.dot(soln)) - target_offset.T.dot(prec_opt).dot(target_offset)/2. - + val - (param_map.T.dot(prec_target).dot(param_map))/2.) - - jacobian_map = (np.identity(ndim) + inter_map.dot(target_lin)) - inter_map.dot(hess).dot(prec_opt.dot(target_lin)) - - return param_map, log_normalizer_map, jacobian_map - - def _log_density_ray(self, - candidate, - direction, - nuisance, - gaussian_sample, - opt_sample): - - # implicitly caching (opt_sample, gaussian_sample) ? - - if (not hasattr(self, "_direction") or not - np.all(self._direction == direction)): - - logdens_lin, logdens_offset = self.logdens_transform - - if opt_sample.shape[1] == 1: - - prec = 1. / self.covariance[0, 0] - quadratic_term = logdens_lin.dot(direction)**2 * prec - arg = (logdens_lin.dot(nuisance + logdens_offset) + - logdens_lin.dot(direction) * gaussian_sample + - opt_sample[:,0]) - linear_term = logdens_lin.dot(direction) * prec * arg - constant_term = arg**2 * prec - - self._cache = {'linear_term':linear_term, - 'quadratic_term':quadratic_term, - 'constant_term':constant_term} - else: - self._direction = direction.copy() - - # density is a Gaussian evaluated at - # O_i + A(N + (Z_i + theta) * gamma + b) - - # b is logdens_offset - # A is logdens_linear - # Z_i is gaussian_sample[i] (real-valued) - # gamma is direction - # O_i is opt_sample[i] - - # let arg1 = O_i - # let arg2 = A(N+b + Z_i \cdot gamma) - # then it is of the form (arg1 + arg2 + theta * A gamma) - - logdens_lin, logdens_offset = self.logdens_transform - cov = self.covariance - prec = np.linalg.inv(cov) - linear_part = logdens_lin.dot(direction) # A gamma - - if 1 in opt_sample.shape: - pass # stop3 what's this for? - cov = self.covariance - - quadratic_term = linear_part.T.dot(prec).dot(linear_part) - - arg1 = opt_sample.T - arg2 = logdens_lin.dot(np.multiply.outer(direction, gaussian_sample) + - (nuisance + logdens_offset)[:,None]) - arg = arg1 + arg2 - linear_term = linear_part.T.dot(prec).dot(arg) - constant_term = np.sum(prec.dot(arg) * arg, 0) - - self._cache = {'linear_term':linear_term, - 'quadratic_term':quadratic_term, - 'constant_term':constant_term} - (linear_term, - quadratic_term, - constant_term) = (self._cache['linear_term'], - self._cache['quadratic_term'], - self._cache['constant_term']) - return (-0.5 * candidate**2 * quadratic_term - - candidate * linear_term - 0.5 * constant_term) - -class optimization_intervals(object): - - def __init__(self, - opt_sampling_info, # a sequence of - # (opt_sampler, - # opt_sample, - # opt_logweights, - # target_cov, - # score_cov) objects - # in theory all target_cov - # should be about the same... - observed, - nsample, # how large a normal sample - target_cov=None, - normal_sample=None): - - # not all opt_samples will be of the same size as nsample - # let's repeat them as necessary - - tiled_sampling_info = [] - for (opt_sampler, - opt_sample, - opt_logW, - t_cov, - t_score_cov) in opt_sampling_info: - if opt_sample is not None: - if opt_sample.shape[0] < nsample: - if opt_sample.ndim == 1: - tiled_opt_sample = np.tile(opt_sample, - int(np.ceil(nsample / - opt_sample.shape[0])))[:nsample] - tiled_opt_logW = np.tile(opt_logW, - int(np.ceil(nsample / - opt_logW.shape[0])))[:nsample] - else: - tiled_opt_sample = np.tile(opt_sample, - (int(np.ceil(nsample / - opt_sample.shape[0])), 1))[:nsample] - tiled_opt_logW = np.tile(opt_logW, - (int(np.ceil(nsample / - opt_logW.shape[0])), 1))[:nsample] - else: - tiled_opt_sample = opt_sample[:nsample] - tiled_opt_logW = opt_logW[:nsample] - else: - tiled_sample = None - tiled_sampling_info.append((opt_sampler, - tiled_opt_sample, - tiled_opt_logW, - t_cov, - t_score_cov)) - - self.opt_sampling_info = tiled_sampling_info - self._logden = 0 - for opt_sampler, opt_sample, opt_logW, _, _ in opt_sampling_info: - - self._logden += opt_sampler.log_cond_density( - opt_sample, - opt_sampler.observed_score_state, - transform=None) - self._logden -= opt_logW - if opt_sample.shape[0] < nsample: - self._logden = np.tile(self._logden, - int(np.ceil(nsample / - opt_sample.shape[0])))[:nsample] - - # this is our observed unpenalized estimator - self.observed = observed.copy() - - # average covariances in case they might be different - - if target_cov is None: - self.target_cov = 0 - for _, _, _, target_cov, _ in opt_sampling_info: - self.target_cov += target_cov - self.target_cov /= len(opt_sampling_info) - - if normal_sample is None: - self._normal_sample = np.random.multivariate_normal( - mean=np.zeros(self.target_cov.shape[0]), - cov=self.target_cov, - size=(nsample,)) - else: - self._normal_sample = normal_sample - - def pivot(self, - linear_func, - candidate, - alternative='twosided'): - ''' - alternative : ['greater', 'less', 'twosided'] - What alternative to use. - Returns - ------- - pvalue : np.float - ''' - - if alternative not in ['greater', 'less', 'twosided']: - raise ValueError("alternative should be one of ['greater', 'less', 'twosided']") - - observed_stat = self.observed.dot(linear_func) - sample_stat = self._normal_sample.dot(linear_func) - - target_cov = linear_func.dot(self.target_cov.dot(linear_func)) - - nuisance = [] - translate_dirs = [] - - for (opt_sampler, - opt_sample, - _, - _, - target_score_cov) in self.opt_sampling_info: - - cur_score_cov = linear_func.dot(target_score_cov) - - # cur_nuisance is in the view's score coordinates - cur_nuisance = opt_sampler.observed_score_state - cur_score_cov * observed_stat / target_cov - nuisance.append(cur_nuisance) - translate_dirs.append(cur_score_cov / target_cov) - - weights = self._weights(sample_stat, # normal sample - candidate, # candidate value - nuisance, # nuisance sufficient stats for each view - translate_dirs) # points will be moved like sample * target_score_cov - - pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights) - - if alternative == 'twosided': - return 2 * min(pivot, 1 - pivot) - elif alternative == 'less': - return pivot - else: - return 1 - pivot - - def confidence_interval(self, - linear_func, - level=0.90, - how_many_sd=20, - guess=None): - - sample_stat = self._normal_sample.dot(linear_func) - observed_stat = self.observed.dot(linear_func) - - def _rootU(gamma): - return self.pivot(linear_func, - observed_stat + gamma, - alternative='less') - (1 - level) / 2. - def _rootL(gamma): - return self.pivot(linear_func, - observed_stat + gamma, - alternative='less') - (1 + level) / 2. - - if guess is None: - grid_min, grid_max = -how_many_sd * np.std(sample_stat), how_many_sd * np.std(sample_stat) - upper = bisect(_rootU, grid_min, grid_max) - lower = bisect(_rootL, grid_min, grid_max) - - else: - delta = 0.5 * (guess[1] - guess[0]) - - # find interval bracketing upper solution - count = 0 - while True: - Lu, Uu = guess[1] - delta, guess[1] + delta - valU = _rootU(Uu) - valL = _rootU(Lu) - if valU * valL < 0: - break - delta *= 2 - count += 1 - upper = bisect(_rootU, Lu, Uu) - - # find interval bracketing lower solution - count = 0 - while True: - Ll, Ul = guess[0] - delta, guess[0] + delta - valU = _rootL(Ul) - valL = _rootL(Ll) - if valU * valL < 0: - break - delta *= 2 - count += 1 - lower = bisect(_rootL, Ll, Ul) - - return lower + observed_stat, upper + observed_stat - - # Private methods - - def _weights(self, - stat_sample, - candidate, - nuisance, - translate_dirs): - - # Here we should loop through the views - # and move the score of each view - # for each projected (through linear_func) normal sample - # using the linear decomposition - - # We need access to the map that takes observed_score for each view - # and constructs the full randomization -- this is the reconstruction map - # for each view - - # The data state for each view will be set to be N_i + A_i \hat{\theta}_i - # where N_i is the nuisance sufficient stat for the i-th view's - # data with respect to \hat{\theta} and N_i will not change because - # it depends on the observed \hat{\theta} and observed score of i-th view - - # In this function, \hat{\theta}_i will change with the Monte Carlo sample - - score_sample = [] - _lognum = 0 - for i, opt_info in enumerate(self.opt_sampling_info): - opt_sampler, opt_sample = opt_info[:2] - - _lognum += opt_sampler.log_cond_density(opt_sample, - stat_sample + candidate, - transform= - (translate_dirs[i], - nuisance[i])) - - _logratio = _lognum - self._logden - _logratio -= _logratio.max() - - return np.exp(_logratio) - -def naive_confidence_intervals(diag_cov, observed, level=0.9): + query_spec = self.specification + + if method == 'selective_MLE': + G = mle_inference(query_spec, + target_spec, + **method_args) + + return G.solve_estimating_eqn(alternatives=target_spec.alternatives, + level=level)[0] + + elif method == 'exact': + G = exact_grid_inference(query_spec, + target_spec) + + return G.summary(alternatives=target_spec.alternatives, + level=level) + + elif method == 'approx': + G = approximate_grid_inference(query_spec, + target_spec, + **method_args) + + return G.summary(alternatives=target_spec.alternatives, + level=level) + + elif method == 'posterior': + return _posterior(query_spec, + target_spec, + **method_args)[1] + + +def _posterior(query_spec, + target_spec, + level=0.90, + dispersion=1, + prior=None, + solve_args={'tol': 1.e-12}, + nsample=2000, + nburnin=500): """ - Compute naive Gaussian based confidence - intervals for target. - Parameters - ---------- - - diag_cov : diagonal of a covariance matrix - - observed : np.float - A vector of observed data of shape `target.shape` - - alpha : float (optional) - 1 - confidence level. - - Returns - ------- - intervals : np.float - Gaussian based confidence intervals. - """ - alpha = 1 - level - diag_cov = np.asarray(diag_cov) - p = diag_cov.shape[0] - quantile = - ndist.ppf(alpha/2) - LU = np.zeros((2, p)) - for j in range(p): - sigma = np.sqrt(diag_cov[j]) - LU[0,j] = observed[j] - sigma * quantile - LU[1,j] = observed[j] + sigma * quantile - return LU.T - -def naive_pvalues(diag_cov, observed, parameter): - diag_cov = np.asarray(diag_cov) - p = diag_cov.shape[0] - pvalues = np.zeros(p) - for j in range(p): - sigma = np.sqrt(diag_cov[j]) - pval = ndist.cdf((observed[j] - parameter[j])/sigma) - pvalues[j] = 2 * min(pval, 1-pval) - return pvalues - -# private function - -def _solve_barrier_affine_py(conjugate_arg, - precision, - feasible_point, - con_linear, - con_offset, - step=1, - nstep=1000, - min_its=200, - tol=1.e-10): - - scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) - - if feasible_point is None: - feasible_point = 1. / scaling - - objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. \ - + np.log(1.+ 1./((con_offset - con_linear.dot(u))/ scaling)).sum() - grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot(1./(scaling + con_offset - con_linear.dot(u)) - - 1./(con_offset - con_linear.dot(u))) - barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1./((scaling + con_offset-con_linear.dot(u))**2.) - + 1./((con_offset-con_linear.dot(u))**2.))).dot(con_linear) - - current = feasible_point - current_value = np.inf - - for itercount in range(nstep): - cur_grad = grad(current) - - # make sure proposal is feasible - - count = 0 - while True: - count += 1 - proposal = current - step * cur_grad - if np.all(con_offset-con_linear.dot(proposal) > 0): - break - step *= 0.5 - if count >= 40: - raise ValueError('not finding a feasible point') - - # make sure proposal is a descent - - count = 0 - while True: - count += 1 - proposal = current - step * cur_grad - proposed_value = objective(proposal) - if proposed_value <= current_value: - break - step *= 0.5 - if count >= 20: - if not (np.isnan(proposed_value) or np.isnan(current_value)): - break - else: - raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) - - # stop if relative decrease is small - - if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: - current = proposal - current_value = proposed_value - break - - current = proposal - current_value = proposed_value - - if itercount % 4 == 0: - step *= 2 - - hess = np.linalg.inv(precision + barrier_hessian(current)) - return current_value, current, hess - -def _solve_barrier_nonneg(conjugate_arg, - precision, - feasible_point=None, - step=1, - nstep=1000, - tol=1.e-8): - - scaling = np.sqrt(np.diag(precision)) - - if feasible_point is None: - feasible_point = 1. / scaling - - objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum() - grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u) - barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.)) - - current = feasible_point - current_value = np.inf - - for itercount in range(nstep): - cur_grad = grad(current) - - # make sure proposal is feasible - - count = 0 - while True: - count += 1 - proposal = current - step * cur_grad - if np.all(proposal > 0): - break - step *= 0.5 - if count >= 40: - raise ValueError('not finding a feasible point') - - # make sure proposal is a descent - - count = 0 - while True: - proposal = current - step * cur_grad - proposed_value = objective(proposal) - if proposed_value <= current_value: - break - step *= 0.5 - if count >= 20: - if not (np.isnan(proposed_value) or np.isnan(current_value)): - break - else: - raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) - - # stop if relative decrease is small - - if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value): - current = proposal - current_value = proposed_value - break - - current = proposal - current_value = proposed_value - - if itercount % 4 == 0: - step *= 2 - - hess = np.linalg.inv(precision + np.diag(barrier_hessian(current))) - return current_value, current, hess - -def selective_MLE(observed_target, - target_cov, - target_score_cov, - init_soln, # initial (observed) value of optimization variables -- - # used as a feasible point. - # precise value used only for independent estimator - cond_mean, - cond_cov, - logdens_linear, - linear_part, - offset, - solve_args={'tol':1.e-12}, - level=0.9, - useC=False): - """ - Selective MLE based on approximation of - CGF. Parameters ---------- - - observed_target : ndarray - Observed estimate of target. - - target_cov : ndarray - Estimated covaraince of target. - - target_score_cov : ndarray - Estimated covariance of target and score of randomized query. - - init_soln : ndarray - Feasible point for optimization problem. - - cond_mean : ndarray - Conditional mean of optimization variables given target. - - cond_cov : ndarray - Conditional covariance of optimization variables given target. - - logdens_linear : ndarray - Describes how conditional mean of optimization - variables varies with target. - - linear_part : ndarray - Linear part of affine constraints: $\{o:Ao \leq b\}$ - - offset : ndarray - Offset part of affine constraints: $\{o:Ao \leq b\}$ - + target_spec : TargetSpec + Information needed to specify the target. + level : float + Level for credible interval. + dispersion : float, optional + Dispersion parameter for log-likelihood. + prior : callable + A callable object that takes a single argument + `parameter` of the same shape as `observed_target` + and returns (value of log prior, gradient of log prior) solve_args : dict, optional Arguments passed to solver. - level : float, optional - Confidence level. - - useC : bool, optional - Use python or C solver. - """ - if np.asarray(observed_target).shape in [(), (0,)]: - raise ValueError('no target specified') - - observed_target = np.atleast_1d(observed_target) - prec_target = np.linalg.inv(target_cov) - - # target_lin determines how the conditional mean of optimization variables - # vary with target - # logdens_linear determines how the argument of the optimization density - # depends on the score, not how the mean depends on score, hence the minus sign - - target_lin = - logdens_linear.dot(target_score_cov.T.dot(prec_target)) - target_offset = cond_mean - target_lin.dot(observed_target) - - prec_opt = np.linalg.inv(cond_cov) + if prior is None: + Di = 1. / (200 * np.diag(target_spec.cov_target)) - conjugate_arg = prec_opt.dot(cond_mean) + def prior(target_parameter): + grad_prior = -target_parameter * Di + log_prior = -0.5 * np.sum(target_parameter ** 2 * Di) + return log_prior, grad_prior - if useC: - solver = solve_barrier_affine_C - else: - solver = solve_barrier_affine_py + posterior_repr = posterior(query_spec, + target_spec, + dispersion, + prior, + solve_args=solve_args) - val, soln, hess = solver(conjugate_arg, - prec_opt, - init_soln, - linear_part, - offset, - **solve_args) + samples = langevin_sampler(posterior_repr, + nsample=nsample, + nburnin=nburnin) - final_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) - ind_unbiased_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - - init_soln))) - L = target_lin.T.dot(prec_opt) - observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) - observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov)) + delta = 0.5 * (1 - level) * 100 + lower = np.percentile(samples, delta, axis=0) + upper = np.percentile(samples, 100 - delta, axis=0) + mean = np.mean(samples, axis=0) - Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) - pvalues = ndist.cdf(Z_scores) - pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + return samples, pd.DataFrame({'estimate':mean, + 'lower_credible':lower, + 'upper_credible':upper}) - alpha = 1 - level - quantile = ndist.ppf(1 - alpha / 2.) - intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), - final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T - return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator -def normalizing_constant(target_parameter, - observed_target, - target_cov, - target_score_cov, - feasible_point, - cond_mean, - cond_cov, - logdens_linear, - linear_part, - offset, - useC=False): - """ - - Approximation of normalizing constant - in affine constrained Gaussian. - - Parameters - ---------- - - observed_target : ndarray - Observed estimate of target. - - target_cov : ndarray - Estimated covaraince of target. - - target_score_cov : ndarray - Estimated covariance of target and score of randomized query. - - init_soln : ndarray - Feasible point for optimization problem. - - cond_mean : ndarray - Conditional mean of optimization variables given target. - - cond_cov : ndarray - Conditional covariance of optimization variables given target. - - logdens_linear : ndarray - Describes how conditional mean of optimization - variables varies with target. - - linear_part : ndarray - Linear part of affine constraints: $\{o:Ao \leq b\}$ - - offset : ndarray - Offset part of affine constraints: $\{o:Ao \leq b\}$ - - solve_args : dict, optional - Arguments passed to solver. - - level : float, optional - Confidence level. - - useC : bool, optional - Use python or C solver. - """ - target_parameter = np.atleast_1d(target_parameter) - - cond_precision = np.linalg.inv(cond_cov) - prec_target = np.linalg.inv(target_cov) - target_linear = -logdens_linear.dot(target_score_cov.dot(prec_target)) - nuisance_correction = target_linear.dot(observed_target) - corrected_mean = cond_mean - nuisance_correction - - # rest of the objective is the target mahalanobis distance - # plus the mahalanobis distance for optimization variables - # this includes a term linear in the target, i.e. - # the source of `target_linear` - - ntarget = target_cov.shape[0] - nopt = cond_cov.shape[0] - full_Q = np.zeros((ntarget + nopt, - ntarget + nopt)) - full_Q[:ntarget][:,:ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear))) - full_Q[:ntarget][:,ntarget:] = -target_linear.dot(cond_precision) - full_Q[ntarget:][:,:ntarget] = (-target_linear.dot(cond_precision)).T - full_Q[ntarget:][:,ntarget:] = cond_precision - - linear_term = np.hstack([-prec_target.dot(target_parameter) + - corrected_mean.dot(cond_precision).dot(target_linear), - -cond_precision.dot(corrected_mean)]) - - constant_term = 0.5 * (np.sum(target_parameter * prec_target.dot(target_parameter)) + - np.sum(corrected_mean * cond_precision.dot(corrected_mean))) - - full_con_linear = np.zeros((linear_part.shape[0], - ntarget + nopt)) - full_con_linear[:,ntarget:] = linear_part - full_feasible = np.zeros(ntarget + nopt) - full_feasible[ntarget:] = feasible_point - - solve_args={'tol':1.e-12} - - if useC: - solver = solve_barrier_affine_C - else: - solver = solve_barrier_affine_py - - value, soln, hess = solver(-linear_term, - full_Q, - full_feasible, - full_con_linear, - offset, - **solve_args) - return (-value + 0.5 * np.sum(target_parameter * prec_target.dot(target_parameter)), - soln[:ntarget], - hess[:ntarget][:,:ntarget]) diff --git a/selectinf/randomized/randomization.py b/selectinf/randomized/randomization.py index f7dd4e10b..54437990a 100644 --- a/selectinf/randomized/randomization.py +++ b/selectinf/randomized/randomization.py @@ -168,6 +168,44 @@ def gaussian(covariance): log_density = lambda x: -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const), cov_prec=(covariance, precision)) + @staticmethod + def degenerate_gaussian(covariance, tol=1.e-6): + """ + Gaussian noise with a given covariance. + Parameters + ---------- + covariance : np.float((*,*)) + Positive definite covariance matrix. Non-negative definite + will raise an error. + """ + p = covariance.shape[0] + U, D, _ = np.linalg.svd(covariance) + keep = D > D.max() * tol + rank = keep.sum() + sqrt_cov = U[:,keep].dot(np.diag(np.sqrt(D[keep]))) + sqrt_precision = U[:,keep].dot(np.diag(1./np.sqrt(D[keep]))) + precision = sqrt_precision.dot(sqrt_precision.T) + _const = 1. + density = lambda x: np.exp(-(x * precision.dot(x)).sum() / 2) / _const + cdf = lambda x: None + pdf = lambda x: None + derivative_log_density = lambda x: None + grad_negative_log_density = lambda x: precision.dot(x) + sampler = lambda size: covariance.dot(sqrt_precision.dot(np.random.standard_normal((rank,) + size))) + + return randomization((p,), + density, + cdf, + pdf, + derivative_log_density, + grad_negative_log_density, + sampler, + lipschitz=(1/D[keep]).max(), + log_density = lambda x: -np.sum(sqrt_precision.T.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const), + cov_prec=(covariance, precision)) + + + @staticmethod def laplace(shape, scale): """ diff --git a/selectinf/randomized/screening.py b/selectinf/randomized/screening.py index 0aab6d341..1e24c73bf 100644 --- a/selectinf/randomized/screening.py +++ b/selectinf/randomized/screening.py @@ -7,12 +7,13 @@ from .query import gaussian_query from .randomization import randomization +from ..base import TargetSpec class screening(gaussian_query): def __init__(self, observed_data, - covariance, + covariance, # unscaled randomizer, perturb=None): @@ -21,6 +22,7 @@ def __init__(self, self.covariance = covariance self.randomizer = randomizer self._initial_omega = perturb + self._unscaled_cov_score = covariance def fit(self, perturb=None): @@ -28,44 +30,62 @@ def fit(self, perturb=None): self._randomized_score = self.observed_score_state - self._initial_omega return self._randomized_score, self._randomized_score.shape[0] - def multivariate_targets(self, features, dispersion=1.): + def multivariate_targets(self, + features, + dispersion=1): """ Entries of the mean of \Sigma[E,E]^{-1}Z_E """ - score_linear = self.covariance[:, features].copy() / dispersion - Q = score_linear[features] - cov_target = np.linalg.inv(Q) + Q = self.covariance[features][:,features] + Qinv = np.linalg.inv(Q) + cov_target = np.linalg.inv(Q) * dispersion observed_target = -np.linalg.inv(Q).dot(self.observed_score_state[features]) - crosscov_target_score = -score_linear.dot(cov_target) + regress_target_score = -Qinv.dot(np.identity(self.covariance.shape[0])[features]) alternatives = ['twosided'] * features.sum() - return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives + return TargetSpec(observed_target, + cov_target, + regress_target_score, + alternatives, + dispersion) - def full_targets(self, features, dispersion=1.): + def full_targets(self, + features, + dispersion=1): """ - Entries of the mean of \Sigma[E,E]^{-1}Z_E + Entries of the mean of (\Sigma^{-1}Z)[E] """ - score_linear = self.covariance[:, features].copy() / dispersion - Q = self.covariance / dispersion - cov_target = (np.linalg.inv(Q)[features])[:, features] + + Q = self.covariance + Qinv = np.linalg.inv(Q) + cov_target = Qinv[features][:, features] * dispersion observed_target = -np.linalg.inv(Q).dot(self.observed_score_state)[features] - crosscov_target_score = -np.identity(Q.shape[0])[:, features] + regress_target_score = -Qinv[:, features] alternatives = ['twosided'] * features.sum() - return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives + return TargetSpec(observed_target, + cov_target, + regress_target_score.T, + alternatives, + dispersion) - def marginal_targets(self, features): + def marginal_targets(self, + features, + dispersion=1): """ Entries of the mean of Z_E """ - score_linear = self.covariance[:, features] - Q = score_linear[features] - cov_target = Q + Q = self.covariance[features][:,features] + cov_target = Q * dispersion observed_target = -self.observed_score_state[features] - crosscov_target_score = -score_linear + regress_target_score = -np.identity(self.covariance.shape[0])[:,features] alternatives = ['twosided'] * features.sum() - return observed_target, cov_target, crosscov_target_score.T, alternatives + return TargetSpec(observed_target, + cov_target, + regress_target_score.T, + alternatives, + dispersion) class marginal_screening(screening): @@ -104,20 +124,20 @@ def fit(self, perturb=None): self.num_opt_var = self.observed_opt_state.shape[0] opt_linear = np.zeros((p, self.num_opt_var)) - opt_linear[self._selected,:] = np.diag(active_signs) - opt_offset = np.zeros(p) - opt_offset[self._selected] = active_signs * self.threshold[self._selected] - opt_offset[self._not_selected] = _randomized_score[self._not_selected] + opt_linear[self._selected] = np.diag(active_signs) + observed_subgrad = np.zeros(p) + observed_subgrad[self._selected] = active_signs * self.threshold[self._selected] + observed_subgrad[self._not_selected] = _randomized_score[self._not_selected] self._setup = True A_scaling = -np.identity(len(active_signs)) b_scaling = np.zeros(self.num_opt_var) - self._setup_sampler(A_scaling, - b_scaling, - opt_linear, - opt_offset) + self._setup_sampler_data = (A_scaling, + b_scaling, + opt_linear, + observed_subgrad) return self._selected @@ -208,19 +228,19 @@ def fit(self, perturb=None): for j in range(self.num_opt_var): opt_linear[selected_idx[j], j] = active_signs[j] - opt_offset = np.zeros(p) - opt_offset[self._selected] = active_signs * last_cutoff - opt_offset[self._not_selected] = _randomized_score[self._not_selected] + observed_subgrad = np.zeros(p) + observed_subgrad[self._selected] = active_signs * last_cutoff + observed_subgrad[self._not_selected] = _randomized_score[self._not_selected] self._setup = True A_scaling = -np.identity(self.num_opt_var) b_scaling = np.zeros(self.num_opt_var) - self._setup_sampler(A_scaling, - b_scaling, - opt_linear, - opt_offset) + self._setup_sampler_data = (A_scaling, + b_scaling, + opt_linear, + observed_subgrad) else: self._selected = np.zeros(p, np.bool) return self._selected @@ -324,8 +344,8 @@ def fit(self, perturb=None): self.num_opt_var = self.observed_opt_state.shape[0] opt_linear = np.zeros((p, self.num_opt_var)) - opt_linear[self._selected,:] = np.diag(topK_signs) - opt_offset = np.zeros(p) + opt_linear[self._selected] = np.diag(topK_signs) + observed_subgrad = np.zeros(p) else: @@ -342,8 +362,8 @@ def fit(self, perturb=None): self.num_opt_var = self.observed_opt_state.shape[0] opt_linear = np.zeros((p, self.num_opt_var)) - opt_linear[self._selected,:] = np.identity(self.num_opt_var) - opt_offset = np.zeros(p) + opt_linear[self._selected] = np.identity(self.num_opt_var) + observed_subgrad = np.zeros(p) # in both cases, this conditioning means we just need to compute # the observed lower bound @@ -354,10 +374,10 @@ def fit(self, perturb=None): A_scaling = -np.identity(self.num_opt_var) b_scaling = -np.ones(self.num_opt_var) * lower_bound - self._setup_sampler(A_scaling, - b_scaling, - opt_linear, - opt_offset) + self._setup_sampler_data = (A_scaling, + b_scaling, + opt_linear, + observed_subgrad) return self._selected diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py new file mode 100644 index 000000000..cc7aed4a2 --- /dev/null +++ b/selectinf/randomized/selective_MLE.py @@ -0,0 +1,115 @@ +from __future__ import division, print_function + +import numpy as np, pandas as pd +from scipy.stats import norm as ndist +from ..algorithms.barrier_affine import solve_barrier_affine_py + +from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C +from .base import target_query_Interactspec + +class mle_inference(object): + + def __init__(self, + query_spec, + target_spec, + solve_args={'tol': 1.e-12}): + + self.query_spec = query_spec + self.target_spec = target_spec + self.solve_args = solve_args + + def solve_estimating_eqn(self, + alternatives=None, + useC=False, + level=0.90): + + QS = self.query_spec + TS = self.target_spec + + U1, U2, U3, U4, U5 = self._form_interaction_pieces(QS, + TS.regress_target_score, + TS.cov_target) + + prec_target = np.linalg.inv(TS.cov_target) + + prec_target_nosel = prec_target + U2 - U3 + + _P = -(U1.T.dot(QS.M5) + U2.dot(TS.observed_target)) + + bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target) + + QS.M4.dot(QS.cond_mean)) - _P) + + cond_precision = np.linalg.inv(QS.cond_cov) + conjugate_arg = cond_precision.dot(QS.cond_mean) + + if useC: + solver = solve_barrier_affine_C + else: + solver = solve_barrier_affine_py + + val, soln, hess = solver(conjugate_arg, + cond_precision, + QS.observed_soln, + QS.linear_part, + QS.offset, + **self.solve_args) + + final_estimator = TS.cov_target.dot(prec_target_nosel).dot(TS.observed_target) \ + + TS.regress_target_score.dot(QS.M4).dot(QS.cond_mean - soln) \ + - bias_target + + observed_info_natural = prec_target_nosel + U3 - U5.dot(hess.dot(U5.T)) + + unbiased_estimator = TS.cov_target.dot(prec_target_nosel).dot(TS.observed_target) - bias_target + + observed_info_mean = TS.cov_target.dot(observed_info_natural.dot(TS.cov_target)) + + Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) + + cdf_vals = ndist.cdf(Z_scores) + pvalues = [] + + if alternatives is None: + alternatives = ['twosided'] * len(cdf_vals) + + for m, _cdf in enumerate(cdf_vals): + if alternatives[m] == 'twosided': + pvalues.append(2 * min(_cdf, 1 - _cdf)) + elif alternatives[m] == 'greater': + pvalues.append(1 - _cdf) + elif alternatives[m] == 'less': + pvalues.append(_cdf) + else: + raise ValueError('alternative should be in ["twosided", "less", "greater"]') + + alpha = 1. - level + + quantile = ndist.ppf(1 - alpha / 2.) + + intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), + final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + + log_ref = val + conjugate_arg.T.dot(QS.cond_cov).dot(conjugate_arg) / 2. + + result = pd.DataFrame({'MLE': final_estimator, + 'SE': np.sqrt(np.diag(observed_info_mean)), + 'Zvalue': Z_scores, + 'pvalue': pvalues, + 'alternative': alternatives, + 'lower_confidence': intervals[:, 0], + 'upper_confidence': intervals[:, 1], + 'unbiased': unbiased_estimator}) + + return result, observed_info_mean, log_ref + + # Private + + def _form_interaction_pieces(self, + QS, + regress_target_score, + cov_target): + + return target_query_Interactspec(QS, + regress_target_score, + cov_target) + diff --git a/selectinf/randomized/selective_MLE_utils.pyx b/selectinf/randomized/selective_MLE_utils.pyx index 2aabbc365..363399a25 100644 --- a/selectinf/randomized/selective_MLE_utils.pyx +++ b/selectinf/randomized/selective_MLE_utils.pyx @@ -114,8 +114,8 @@ def solve_barrier_nonneg(conjugate_arg, gradient = np.zeros_like(conjugate_arg) opt_variable = np.asarray(feasible_point) opt_proposed = opt_variable.copy() - scaling = np.sqrt(np.diag(precision)) - + scaling = 1 / np.sqrt(np.diag(precision)) + return barrier_solve_(gradient, opt_variable, opt_proposed, @@ -143,7 +143,8 @@ def solve_barrier_affine(conjugate_arg, affine_term = np.zeros_like(offset) A = linear_term - scaling = np.sqrt(np.diag(A.dot(precision).dot(A.T))) + scaling = 1 / np.sqrt(np.diag(A.dot(precision).dot(A.T))) + linear_term_fortran = np.asfortranarray(linear_term) value, opt_variable, hess = barrier_solve_affine_(gradient, @@ -158,6 +159,7 @@ def solve_barrier_affine(conjugate_arg, step, max_iter=max_iter, min_iter=min_iter, - value_tol=tol) + value_tol=tol + ) return value, opt_variable, hess \ No newline at end of file diff --git a/selectinf/randomized/slope.py b/selectinf/randomized/slope.py index 854148b54..c8c53b9bf 100644 --- a/selectinf/randomized/slope.py +++ b/selectinf/randomized/slope.py @@ -20,7 +20,7 @@ from ..constraints.affine import constraints from .randomization import randomization -from ..base import restricted_estimator +from ..base import restricted_estimator, _compute_hessian from .query import gaussian_query from .lasso import lasso @@ -81,22 +81,22 @@ def _solve_randomized_problem(self, quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0) problem = rr.simple_problem(self.loglike, self.penalty) - initial_soln = problem.solve(quad, **solve_args) - initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 'grad') + - quad.objective(initial_soln, 'grad')) + observed_soln = problem.solve(quad, **solve_args) + observed_subgrad = -(self.loglike.smooth_objective(observed_soln, 'grad') + + quad.objective(observed_soln, 'grad')) - return initial_soln, initial_subgrad + return observed_soln, observed_subgrad def fit(self, solve_args={'tol': 1.e-12, 'min_its': 50}, perturb=None): - self.initial_soln, self.initial_subgrad = self._solve_randomized_problem(perturb=perturb, solve_args=solve_args) - p = self.initial_soln.shape[0] + self.observed_soln, self.observed_subgrad = self._solve_randomized_problem(perturb=perturb, solve_args=solve_args) + p = self.observed_soln.shape[0] # now we have to work out SLOPE details, clusters, etc. - active_signs = np.sign(self.initial_soln) + active_signs = np.sign(self.observed_soln) active = self._active = active_signs != 0 self._overall = overall = active> 0 @@ -104,12 +104,12 @@ def fit(self, _active_signs = active_signs.copy() self.selection_variable = {'sign': _active_signs, - 'variables': self._overall} + 'variables': np.nonzero(self._overall)[0]} - indices = np.argsort(-np.fabs(self.initial_soln)) - sorted_soln = self.initial_soln[indices] - initial_scalings = np.sort(np.unique(np.fabs(self.initial_soln[active])))[::-1] + indices = self.selection_variable['indices'] = np.argsort(-np.fabs(self.observed_soln)) + sorted_soln = self.observed_soln[indices] + initial_scalings = np.sort(np.unique(np.fabs(self.observed_soln[active])))[::-1] self.observed_opt_state = initial_scalings self._unpenalized = np.zeros(p, np.bool) @@ -121,9 +121,11 @@ def fit(self, self.num_opt_var = self.observed_opt_state.shape[0] - X, y = self.loglike.data - W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar)) - _hessian_active = np.dot(X.T, X[:, active] * W[:, None]) + self._unscaled_cov_score, _hessian_active = _compute_hessian(self.loglike, + beta_bar, + active) + + _score_linear_term = -_hessian_active self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0])) @@ -141,23 +143,22 @@ def fit(self, cur_indx = j + 1 sign_vec = np.zeros(p) sign_vec[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]] = \ - np.sign(self.initial_soln[indices[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]]]) + np.sign(self.observed_soln[indices[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]]]) signs_cluster.append(sign_vec) pointer = pointer + 1 if sorted_soln[j + 1] == 0: break signs_cluster = np.asarray(signs_cluster).T + self.selection_variable['signs_cluster'] = signs_cluster if signs_cluster.size == 0: return active_signs else: + X, y = self.loglike.data X_clustered = X[:, indices].dot(signs_cluster) _opt_linear_term = X.T.dot(X_clustered) - _, prec = self.randomizer.cov_prec - opt_linear, opt_offset = (_opt_linear_term, self.initial_subgrad) - # now make the constraints self._setup = True @@ -168,13 +169,21 @@ def fit(self, A_scaling = np.vstack([A_scaling_0, A_scaling_1]) b_scaling = np.zeros(2 * self.num_opt_var - 1) - self._setup_sampler(A_scaling, - b_scaling, - opt_linear, - opt_offset) - + self._setup_sampler_data = (A_scaling, + b_scaling, + _opt_linear_term, + self.observed_subgrad) + self.opt_linear = _opt_linear_term return active_signs + def setup_inference(self, + dispersion): + + if self.num_opt_var > 0: + self._setup_sampler(*self._setup_sampler_data, + dispersion=dispersion) + + # Targets of inference # and covariance with score representation # are same as LASSO @@ -185,7 +194,7 @@ def gaussian(X, slope_weights, sigma=1., quadratic=None, - ridge_term=0., + ridge_term=None, randomizer_scale=None): loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic) @@ -205,6 +214,406 @@ def gaussian(X, ridge_term, randomizer) +# split SLOPE + +class split_slope(lasso): + + """ + Data split, then LASSO (i.e. data carving) + """ + + def __init__(self, + loglike, + slope_weights, + proportion_select, + ridge_term=0, + perturb=None, + estimate_dispersion=True): + + (self.loglike, + self.slope_weights, + self.proportion_select, + self.ridge_term) = (loglike, + slope_weights, + proportion_select, + ridge_term) + + self.nfeature = p = self.loglike.shape[0] + self.penalty = rr.slope(slope_weights, lagrange=1.) + self._initial_omega = perturb # random perturbation + self.estimate_dispersion = estimate_dispersion + + def fit(self, + solve_args={'tol': 1.e-12, 'min_its': 50}, + perturb=None): + + signs = slope.fit(self, + solve_args=solve_args, + perturb=perturb) + + # for data splitting randomization, + # we need to estimate a dispersion parameter + + # we then setup up the sampler again + df_fit = len(self.selection_variable['variables']) + + if self.estimate_dispersion: + + X, y = self.loglike.data + n, p = X.shape + + dispersion = 2 * (self.loglike.smooth_objective(self._beta_full, + 'func') / + (n - df_fit)) + + self.dispersion_ = dispersion + # run setup again after + # estimating dispersion + + self.df_fit = df_fit + + return signs + + + def setup_inference(self, + dispersion): + + if self.df_fit > 0: + + if dispersion is None: + self._setup_sampler(*self._setup_sampler_data, + dispersion=self.dispersion_) + + else: + self._setup_sampler(*self._setup_sampler_data, + dispersion=dispersion) + + def _setup_implied_gaussian(self, + opt_linear, + observed_subgrad, + dispersion=1): + + # key observation is that the covariance of the added noise is + # roughly dispersion * (1 - pi) / pi * X^TX (in OLS regression, similar for other + # models), so the precision is (X^TX)^{-1} * (pi / ((1 - pi) * dispersion)) + # and prec.dot(opt_linear) = S_E / (dispersion * (1 - pi) / pi) + # because opt_linear has shape p x E with the columns + # being those non-zero columns of the solution. Above S_E = np.diag(signs) + # the conditional precision is S_E Q[E][:,E] * pi / ((1 - pi) * dispersion) S_E + # and regress_opt is -Q[E][:,E]^{-1} S_E + # padded with zeros + # to be E x p + + pi_s = self.proportion_select + ratio = (1 - pi_s) / pi_s + + ordered_vars = self.selection_variable['variables'] + indices = self.selection_variable['indices'] + signs_cluster = self.selection_variable['signs_cluster'] + + # JT: this may be expensive to form -- not pxp but large + cond_precision = signs_cluster.T.dot(self.opt_linear[indices] / (dispersion * ratio)) + + assert(np.linalg.norm(cond_precision - cond_precision.T) / + np.linalg.norm(cond_precision) < 1.e-6) + cond_cov = np.linalg.inv(cond_precision) + regress_opt = np.zeros((len(ordered_vars), + self.nfeature)) + # JT: not sure this is right -- had to remove signs + regress_opt[:, ordered_vars] = -cond_cov / (dispersion * ratio) + cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad) + + ## probably missing a dispersion in the denominator + prod_score_prec_unnorm = np.identity(self.nfeature) / (dispersion * ratio) + + ## probably missing a multiplicative factor of ratio + cov_rand = self._unscaled_cov_score * (dispersion * ratio) + + M1 = prod_score_prec_unnorm * dispersion + M2 = M1.dot(cov_rand).dot(M1.T) + M4 = M1.dot(opt_linear) + M3 = M4.dot(cond_cov).dot(M4.T) + + # would be nice to not store these? + + self.M1 = M1 + self.M2 = M2 + self.M3 = M3 + self.M4 = M4 + self.M5 = M1.dot(self.observed_score_state + observed_subgrad) + + return (cond_mean, + cond_cov, + cond_precision, + M1, + M2, + M3, + self.M4, + self.M5) + + def _solve_randomized_problem(self, + # optional binary vector + # indicating selection data + perturb=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + # take a new perturbation if none supplied + if perturb is not None: + self._selection_idx = perturb + if not hasattr(self, "_selection_idx"): + X, y = self.loglike.data + total_size = n = X.shape[0] + pi_s = self.proportion_select + self._selection_idx = np.zeros(n, np.bool) + self._selection_idx[:int(pi_s*n)] = True + np.random.shuffle(self._selection_idx) + + inv_frac = 1 / self.proportion_select + quad = rr.identity_quadratic(self.ridge_term, + 0, + 0, + 0) + + randomized_loss = self.loglike.subsample(self._selection_idx) + randomized_loss.coef *= inv_frac + + problem = rr.simple_problem(randomized_loss, self.penalty) + observed_soln = problem.solve(quad, **solve_args) + observed_subgrad = -(randomized_loss.smooth_objective(observed_soln, + 'grad') + + quad.objective(observed_soln, 'grad')) + + return observed_soln, observed_subgrad + + @staticmethod + def gaussian(X, + Y, + slope_weights, + proportion, + sigma=1., + quadratic=None, + estimate_dispersion=True): + r""" + Squared-error LASSO with feature weights. + Objective function is (before randomization) + + .. math:: + + \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\lambda$ is `slope_weights`. The ridge term + is determined by the Hessian and `np.std(Y)` by default. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + Y : ndarray + Shape (n,) -- the response. + + slope_weights: [float, sequence] + + proportion: float + What proportion of data to use for selection. + + sigma : float (optional) + Noise variance. Set to 1 if `covariance_estimator` is not None. + This scales the loglikelihood by `sigma**(-2)`. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.slope.slope` + + """ + + loglike = rr.glm.gaussian(X, + Y, + coef=1. / sigma ** 2, + quadratic=quadratic) + + return split_slope(loglike, + np.asarray(slope_weights)/sigma**2, + proportion, + estimate_dispersion=estimate_dispersion) + + + @staticmethod + def logistic(X, + successes, + slope_weights, + proportion, + trials=None, + quadratic=None): + r""" + Logistic LASSO with feature weights (before randomization) + + .. math:: + + \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\ell$ is the negative of the logistic + log-likelihood (half the logistic deviance) + and $\lambda$ is `slope_weights`. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + successes : ndarray + Shape (n,) -- response vector. An integer number of successes. + For data that is proportions, multiply the proportions + by the number of trials first. + + slope_weights: [float, sequence] + + proportion: float + What proportion of data to use for selection. + + trials : ndarray (optional) + Number of trials per response, defaults to + ones the same shape as Y. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.slope.slope` + + """ + + loglike = rr.glm.logistic(X, + successes, + trials=trials, + quadratic=quadratic) + + return split_slope(loglike, + np.asarray(slope_weights), + proportion) + + @staticmethod + def coxph(X, + times, + status, + slope_weights, + proportion, + quadratic=None): + r""" + Cox proportional hazards LASSO with feature weights. + Objective function is (before randomization) + + .. math:: + + \beta \mapsto \ell^{\text{Cox}}(\beta) + + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\ell^{\text{Cox}}$ is the + negative of the log of the Cox partial + likelihood and $\lambda$ is `slope_weights`. + Uses Efron's tie breaking method. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + times : ndarray + Shape (n,) -- the survival times. + + status : ndarray + Shape (n,) -- the censoring status. + + slope_weights: [float, sequence] + + + proportion: float + What proportion of data to use for selection. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.slope.slope` + + """ + n, p = X.shape + loglike = rr.glm.cox(X, times, status, quadratic=quadratic) + + return split_slope(loglike, + np.asarray(slope_weights), + proportion) + + @staticmethod + def poisson(X, + counts, + slope_weights, + proportion, + quadratic=None, + ridge_term=0): + r""" + Poisson log-linear LASSO with feature weights. + Objective function is (before randomization) + + .. math:: + + \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i| + + where $\ell^{\text{Poisson}}$ is the negative + of the log of the Poisson likelihood (half the deviance) + and $\lambda$ is `slope_weights`. + + Parameters + ---------- + + X : ndarray + Shape (n,p) -- the design matrix. + + counts : ndarray + Shape (n,) -- the response. + + slope_weights: [float, sequence] + + proportion: float + What proportion of data to use for selection. + + quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) + An optional quadratic term to be added to the objective. + Can also be a linear term by setting quadratic + coefficient to 0. + + Returns + ------- + + L : `selection.randomized.slope.slope` + + """ + loglike = rr.glm.poisson(X, counts, quadratic=quadratic) + + return split_slope(loglike, + np.asarray(slope_weights), + proportion) + + + # Projection onto selected subgradients of SLOPE def _projection_onto_selected_subgradients(prox_arg, diff --git a/selectinf/randomized/tests/test_BH.py b/selectinf/randomized/tests/test_BH.py index a6fe5851f..59927c56a 100644 --- a/selectinf/randomized/tests/test_BH.py +++ b/selectinf/randomized/tests/test_BH.py @@ -1,5 +1,4 @@ import numpy as np -import numpy.testing.decorators as dec from scipy.stats import norm as ndist @@ -42,7 +41,7 @@ def BH_cutoff(): np.testing.assert_allclose(sorted(BHfilter(2 * ndist.sf(np.fabs(Z)), q=0.2)), sorted(stepup_selection(Z, BH_cutoffs)[1])) -@dec.skipif(True, "independent estimator test not working") +@np.testing.dec.skipif(True, "independent estimator test not working") def test_independent_estimator(n=100, n1=50, q=0.2, signal=3, p=100): Z = np.random.standard_normal((n, p)) @@ -54,31 +53,33 @@ def test_independent_estimator(n=100, n1=50, q=0.2, signal=3, p=100): perturb = Zbar1 - Zbar frac = n1 * 1. / n - BH_select = stepup.BH(Zbar, np.identity(p) / n, np.sqrt((1 - frac) / (n * frac)), q=q) + BH_select = stepup.BH(Zbar, np.identity(p) / n, + np.sqrt((1 - frac) / (n * frac)), q=q) selected = BH_select.fit(perturb=perturb) observed_target = Zbar[selected] cov_target = np.identity(selected.sum()) / n cross_cov = -np.identity(p)[selected] / n - observed_target1, cov_target1, cross_cov1, _ = BH_select.marginal_targets(selected) + target_spec = BH_select.marginal_targets(selected) - assert(np.linalg.norm(observed_target - observed_target1) / np.linalg.norm(observed_target) < 1.e-7) - assert(np.linalg.norm(cov_target - cov_target1) / np.linalg.norm(cov_target) < 1.e-7) - assert(np.linalg.norm(cross_cov - cross_cov1) / np.linalg.norm(cross_cov) < 1.e-7) - - (final_estimator, - _, - Z_scores, - pvalues, - intervals, - ind_unbiased_estimator) = BH_select.selective_MLE(observed_target, cov_target, cross_cov) + assert(np.linalg.norm(observed_target - target_spec.observed_target) / + np.linalg.norm(observed_target) < 1.e-7) + assert(np.linalg.norm(cov_target - target_spec.cov_target) / + np.linalg.norm(cov_target) < 1.e-7) + assert(np.linalg.norm(regress_target_score - target_spec.regress_target_score) / np.linalg.norm(regress_target_score) + < 1.e-7) + result = BH_select.selective_MLE(observed_target, cov_target, cross_cov)[0] + Z = result['Zvalue'] + ind_unbiased_estimator = result['unbiased'] Zbar2 = Z[n1:].mean(0)[selected] - assert(np.linalg.norm(ind_unbiased_estimator - Zbar2) / np.linalg.norm(Zbar2) < 1.e-6) + assert(np.linalg.norm(ind_unbiased_estimator - Zbar2) + / np.linalg.norm(Zbar2) < 1.e-6) np.testing.assert_allclose(sorted(np.nonzero(selected)[0]), - sorted(BHfilter(2 * ndist.sf(np.fabs(np.sqrt(n1) * Zbar1))))) + sorted(BHfilter(2 * ndist.sf(np.fabs( + np.sqrt(n1) * Zbar1))))) def test_BH(n=500, @@ -117,15 +118,9 @@ def test_BH(n=500, if nonzero is not None: if marginal: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = BH_select.marginal_targets(nonzero) + target_spec = BH_select.marginal_targets(nonzero) else: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = BH_select.full_targets(nonzero, dispersion=sigma**2) + target_spec = BH_select.full_targets(nonzero, dispersion=sigma**2) if marginal: beta_target = true_mean[nonzero] @@ -133,29 +128,30 @@ def test_BH(n=500, beta_target = beta[nonzero] if use_MLE: - print('huh') - estimate, info, _, pval, intervals, _ = BH_select.selective_MLE(observed_target, - cov_target, - crosscov_target_score, - level=level) - pivots = ndist.cdf((estimate - beta_target) / np.sqrt(np.diag(info))) + result = BH_select.selective_MLE(target_spec, + level=level)[0] + estimate = result['MLE'] + pivots = ndist.cdf((estimate - beta_target) / result['SE']) pivots = 2 * np.minimum(pivots, 1 - pivots) # run summary else: - pivots, pval, intervals = BH_select.summary(observed_target, - cov_target, - crosscov_target_score, - alternatives, - compute_intervals=True, - level=level, - ndraw=20000, - burnin=2000, - parameter=beta_target) + result = BH_select.summary(target_spec, + compute_intervals=True, + level=level, + ndraw=20000, + burnin=2000, + parameter=beta_target) + pivots = np.asarray(result['pivot']) + pval = np.asarray(result['pvalue']) + lower = np.asarray(result['lower_confidence']) + upper = np.asarray(result['upper_confidence']) print(pval) - print("beta_target and intervals", beta_target, intervals) - coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + print("beta_target and intervals", beta_target, result[['lower_confidence', + 'upper_confidence']]) + coverage = (beta_target > lower) * (beta_target < upper) print("coverage for selected target", coverage.sum()/float(nonzero.sum())) - return pivots[beta_target == 0], pivots[beta_target != 0], coverage, intervals, pivots + return (pivots[beta_target == 0], pivots[beta_target != 0], coverage, + result[['lower_confidence', 'upper_confidence']], pivots) else: return [], [], [], [], [] @@ -163,32 +159,5 @@ def test_both(): test_BH(marginal=True) test_BH(marginal=False) -def main(nsim=500, use_MLE=True, marginal=False): - - import matplotlib.pyplot as plt - import statsmodels.api as sm - U = np.linspace(0, 1, 101) - P0, PA, cover, length_int = [], [], [], [] - Ps = [] - for i in range(nsim): - p0, pA, cover_, intervals, pivots = test_BH(use_MLE=use_MLE, marginal=marginal) - Ps.extend(pivots) - cover.extend(cover_) - P0.extend(p0) - PA.extend(pA) - print(np.mean(cover),'coverage so far') - - period = 10 - if use_MLE: - period = 50 - if i % period == 0 and i > 0: - plt.clf() - if len(P0) > 0: - plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null') - plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt') - plt.plot(U, sm.distributions.ECDF(Ps)(U), 'tab:orange', label='pivot') - plt.plot([0, 1], [0, 1], 'k--') - plt.legend() - plt.savefig('BH_pvals.pdf') diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py new file mode 100644 index 000000000..a4b6ec87b --- /dev/null +++ b/selectinf/randomized/tests/test_approx_reference.py @@ -0,0 +1,135 @@ +import numpy as np + +from ...tests.instance import gaussian_instance +from ..lasso import lasso +from ...base import selected_targets +from ..approx_reference import approximate_grid_inference + +def test_inf(n=500, + p=100, + signal_fac=1., + s=5, + sigma=2., + rho=0.4, + randomizer_scale=1., + equicorrelated=False, + useIP=True, + CI=False): + + inst, const = gaussian_instance, lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=equicorrelated, + rho=rho, + sigma=sigma, + random_signs=False)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + if n > (2 * p): + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + else: + dispersion = sigma_ ** 2 + + eps = np.random.standard_normal((n, 2000)) * Y.std() + W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1)) + + conv = const(X, + Y, + W, + ridge_term=0., + randomizer_scale=randomizer_scale * sigma_) + + signs = conv.fit() + nonzero = signs != 0 + print("no of variables selected ", nonzero.sum()) + + if nonzero.sum() > 0: + beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + + conv.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + print(target_spec) + + approximate_grid_inf = approximate_grid_inference(conv, + target_spec, + useIP=useIP) + + if CI is False: + pivot, log_ref = approximate_grid_inf._approx_pivots(beta_target) + + return pivot, log_ref + else: + lci, uci = approximate_grid_inf._approx_intervals(level=0.90) + beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + coverage = (lci < beta_target) * (uci > beta_target) + length = uci - lci + + return np.mean(coverage), np.mean(length) + + +def main(nsim=300, CI = False): + + import matplotlib as mpl + mpl.use('tkagg') + import matplotlib.pyplot as plt + from statsmodels.distributions.empirical_distribution import ECDF + + if CI is False: + _pivot = [] + for i in range(nsim): + _pivot.extend(test_inf(n=100, + p=400, + signal_fac=0.5, + s=0, + sigma=2., + rho=0.30, + randomizer_scale=1., + equicorrelated=True, + useIP=True, + CI=False)) + + print("iteration completed ", i) + + plt.clf() + ecdf_MLE = ECDF(np.asarray(_pivot)) + grid = np.linspace(0, 1, 101) + plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^') + plt.plot(grid, grid, 'k--') + plt.show() + + if CI is True: + coverage_ = 0. + length_ = 0. + for n in range(nsim): + cov, len = test_inf(n=100, + p=400, + signal_fac=0.5, + s=5, + sigma=2., + rho=0.30, + randomizer_scale=1., + equicorrelated=True, + useIP=True, + CI=True) + + coverage_ += cov + length_ += len + print("coverage so far ", coverage_ / (n + 1.)) + print("lengths so far ", length_ / (n + 1.)) + print("iteration completed ", n + 1) + +if __name__ == "__main__": + main(nsim=1, CI = False) + diff --git a/selectinf/randomized/tests/test_approx_reference_grouplasso.py b/selectinf/randomized/tests/test_approx_reference_grouplasso.py new file mode 100644 index 000000000..5228a410a --- /dev/null +++ b/selectinf/randomized/tests/test_approx_reference_grouplasso.py @@ -0,0 +1,95 @@ +import numpy as np + +from ...tests.instance import gaussian_group_instance +from ..approx_reference_grouplasso import group_lasso, approximate_grid_inference + +def test_approx_pivot(n=500, + p=200, + signal_fac=0.1, + sgroup=3, + groups=np.arange(50).repeat(4), + sigma=3., + rho=0.3, + randomizer_scale=1, + weight_frac=1.5): + + while True: + + inst, const = gaussian_group_instance, group_lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + X, Y, beta = inst(n=n, + p=p, + signal=signal, + sgroup=sgroup, + groups=groups, + equicorrelated=False, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + + if n > (2 * p): + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + else: + dispersion = sigma_ ** 2 + + penalty_weights = dict([(i, weight_frac * sigma_ * np.sqrt(2 * np.log(p))) for i in np.unique(groups)]) + + conv = const(X, + Y, + groups, + penalty_weights, + randomizer_scale=randomizer_scale * np.sqrt(dispersion)) + + signs, _ = conv.fit() + nonzero = signs != 0 + print("number of selected variables ", nonzero.sum()) + + if nonzero.sum() > 0: + conv._setup_implied_gaussian() + + beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + + approximate_grid_inf = approximate_grid_inference(conv, + dispersion) + + pivot = approximate_grid_inf._approx_pivots(beta_target) + + return pivot + + +def main(nsim=300, CI = False): + + import matplotlib as mpl + mpl.use('tkagg') + import matplotlib.pyplot as plt + from statsmodels.distributions.empirical_distribution import ECDF + if CI is False: + _pivot = [] + for i in range(nsim): + _pivot.extend(test_approx_pivot(n=500, + p=100, + signal_fac=1., + sgroup=0, + groups=np.arange(25).repeat(4), + sigma=2., + rho=0.20, + randomizer_scale=0.5, + weight_frac=1.2)) + + print("iteration completed ", i) + + plt.clf() + ecdf_MLE = ECDF(np.asarray(_pivot)) + grid = np.linspace(0, 1, 101) + plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^') + plt.plot(grid, grid, 'k--') + plt.show() + +if __name__ == "__main__": + + main(nsim=50, CI = False) diff --git a/selectinf/randomized/tests/test_drop_losers.py b/selectinf/randomized/tests/test_drop_losers.py new file mode 100644 index 000000000..5f212b740 --- /dev/null +++ b/selectinf/randomized/tests/test_drop_losers.py @@ -0,0 +1,165 @@ +import numpy as np, pandas as pd + +from ..drop_losers import drop_losers +from ..screening import topK +from ..randomization import randomization + +def test_drop_losers(p=50, + K=5, + n=300, + use_MLE=True): + + arm = [] + data = [] + stage = [] + for a in range(p): + N = int(np.random.poisson(n, size=(1,))) + arm.extend([a]*N) + stage.extend([1]*N) + data.extend(list(np.random.standard_normal(N))) + + df = pd.DataFrame({'arm':arm, + 'stage':stage, + 'data':data}) + + grouped = df.groupby('arm') + stage1_means = df.groupby('arm').mean().sort_values('data', ascending=False) + winners = list(stage1_means.index[:K]) + + for winner in winners: + N = int(np.random.poisson(30, size=(1,))) + arm.extend([winner]*N) + stage.extend([2]*N) + data.extend(list(np.random.standard_normal(N))) + + df = pd.DataFrame({'arm':arm, + 'stage':stage, + 'data':data}) + + dtl = drop_losers(df, + K=K) + + dtl.MLE_inference() + if not use_MLE: + result = dtl.summary(ndraw=20000, burnin=5000) + else: + result = dtl.MLE_inference()[0] + pvalue = np.asarray(result['pvalue']) + lower = np.asarray(result['lower_confidence']) + upper = np.asarray(result['upper_confidence']) + cover = (lower < 0) * (upper > 0) + + return pvalue, cover + +def test_compare_topK(p=20, + K=5, + n=100): + + arm = [] + data = [] + stage = [] + for a in range(p): + N = int(np.random.poisson(n, size=(1,))) + arm.extend([a]*N) + stage.extend([1]*N) + data.extend(list(np.random.standard_normal(N))) + + df1 = pd.DataFrame({'arm':arm, + 'stage':stage, + 'data':data}) + + grouped = df1.groupby('arm') + stage1_means = df1.groupby('arm').mean().sort_values('data', ascending=False) + winners = list(stage1_means.index[:K]) + + for winner in winners: + N = int(np.random.poisson(30, size=(1,))) + 5 + arm.extend([winner]*N) + stage.extend([2]*N) + data.extend(list(np.random.standard_normal(N))) + + df2 = pd.DataFrame({'arm':arm, + 'stage':stage, + 'data':data}) + + dtl = drop_losers(df2, + K=K) + + # need additional data for randomized api with non-degenerate covariance + + for a in range(p): + if a not in winners: + N = int(np.random.poisson(30, size=(1,))) + 5 + arm.extend([a]*N) + stage.extend([2]*N) + data.extend(list(np.random.standard_normal(N))) + + df_full = pd.DataFrame({'arm':arm, + 'stage':stage, + 'data':data}) + full_means = df_full.groupby('arm').mean()['data'].iloc[range(p)] + full_std = df_full.groupby('arm').std()['data'].iloc[range(p)] + n_1 = df1.groupby('arm').count()['data'].iloc[range(p)] + n_full = df_full.groupby('arm').count()['data'].iloc[range(p)] + print(n_1, n_full) + stage1_means = df1.groupby('arm').mean()['data'].iloc[range(p)] + perturb = np.array(stage1_means) - np.array(full_means) + + covariance = np.diag(np.array(full_std)**2 / np.array(n_full)) + randomizer = randomization.gaussian(np.diag(np.array(full_std)**2 / np.array(n_1)) - + covariance) + + randomized_topK = topK(np.asarray(full_means), + covariance, + randomizer, + K, + perturb=perturb) + + randomized_topK.fit(perturb=perturb) + + target_spec = randomized_topK.marginal_targets(randomized_topK.selection_variable['variables']) + print('var', randomized_topK.selection_variable['variables']) + + # try with a degenerate covariance now + + means2 = df2.groupby('arm').mean()['data'].iloc[range(p)] + std2 = df2.groupby('arm').std()['data'].iloc[range(p)] + n_2 = df2.groupby('arm').count()['data'].iloc[range(p)] + stage1_means = df1.groupby('arm').mean()['data'].iloc[range(p)] + perturb2 = np.array(stage1_means) - np.array(means2) + covariance2 = np.diag(np.array(std2)**2 / np.array(n_2)) + degenerate_randomizer = randomization.degenerate_gaussian( + np.diag(np.array(std2)**2 / + np.array(n_1)) - + covariance2) + + degenerate_topK = topK(np.asarray(means2), + covariance2, + degenerate_randomizer, + K, + perturb=perturb2) + + np.random.seed(0) + summary1 = randomized_topK.selective_MLE(target_spec)[0] + np.random.seed(0) + summary2 = dtl.MLE_inference()[0] + + + np.testing.assert_allclose(summary1['MLE'], summary2['MLE'], rtol=1.e-3) + np.testing.assert_allclose(summary1['pvalue'], summary2['pvalue'], rtol=1.e-3) + #np.testing.assert_allclose(summary1['lower_confidence'], summary2['lower_confidence'], rtol=1.e-3) + #np.testing.assert_allclose(summary1['upper_confidence'], summary2['upper_confidence'], rtol=1.e-3) + + np.random.seed(0) + degenerate_topK.fit(perturb=perturb2) + summary3 = degenerate_topK.summary(target_spec, + ndraw=10000, + burnin=2000, + compute_intervals=True) + + np.testing.assert_allclose(summary1['pvalue'], summary3['pvalue'], rtol=1.e-3) + np.testing.assert_allclose(summary1['target'], summary3['target'], rtol=1.e-3) + #np.testing.assert_allclose(summary1['lower_confidence'], summary3['lower_confidence'], rtol=1.e-3) + #np.testing.assert_allclose(summary1['upper_confidence'], summary3['upper_confidence'], rtol=1.e-3) + + diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py new file mode 100644 index 000000000..ad1dee613 --- /dev/null +++ b/selectinf/randomized/tests/test_exact_reference.py @@ -0,0 +1,77 @@ +import numpy as np + +from ...tests.instance import gaussian_instance +from ..lasso import lasso +from ...base import selected_targets +from ..exact_reference import exact_grid_inference + +def test_inf(n=500, + p=100, + signal_fac=1., + s=5, + sigma=2., + rho=0.4, + randomizer_scale=1., + equicorrelated=False, + CI=True): + + while True: + + inst, const = gaussian_instance, lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=equicorrelated, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + + if n > (2 * p): + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + else: + dispersion = sigma_ ** 2 + + eps = np.random.standard_normal((n, 2000)) * Y.std() + W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1)) + + conv = const(X, + Y, + W, + ridge_term=0., + randomizer_scale=randomizer_scale * np.sqrt(dispersion)) + + signs = conv.fit() + nonzero = signs != 0 + print("size of selected set ", nonzero.sum()) + + if nonzero.sum() > 0: + beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + + conv.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + exact_grid_inf = exact_grid_inference(conv, + target_spec) + + if CI is False: + pivot = exact_grid_inf._pivots(beta_target) + return pivot + + else: + lci, uci = exact_grid_inf._intervals(level=0.90) + coverage = (lci < beta_target) * (uci > beta_target) + length = uci - lci + mle_length = 1.65*2 * np.sqrt(np.diag(exact_grid_inf.inverse_info)) + return np.mean(coverage), np.mean(length), np.mean(mle_length) + + diff --git a/selectinf/randomized/tests/test_group_lasso.py b/selectinf/randomized/tests/test_group_lasso.py index 9cc866cf6..0f1380ffb 100644 --- a/selectinf/randomized/tests/test_group_lasso.py +++ b/selectinf/randomized/tests/test_group_lasso.py @@ -268,7 +268,7 @@ def test_mixed(n=400, which += which_group return pval[beta[which] == 0], pval[beta[which] != 0] -@set_seed_iftrue(SET_SEED) +@set_seed_iftrue(True) def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4): for target in ['full', 'selected', 'debiased']: test_group_lasso(n=n, diff --git a/selectinf/randomized/tests/test_hiv_data.py b/selectinf/randomized/tests/test_hiv_data.py new file mode 100644 index 000000000..4c3a741e8 --- /dev/null +++ b/selectinf/randomized/tests/test_hiv_data.py @@ -0,0 +1,117 @@ +import numpy as np +import pandas as pd +from scipy.stats import norm as ndist + +from ..lasso import split_lasso +from ..posterior_inference import (langevin_sampler, + gibbs_sampler) + +from ...base import selected_targets +from ...tests.instance import HIV_NRTI +from ...tests.flags import SET_SEED, SMALL_SAMPLES +from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10) +def test_hiv_data(nsample=10000, + nburnin=500, + level=0.90, + split_proportion=0.50, + seedn=1): + np.random.seed(seedn) + + alpha = (1 - level) / 2 + Z_quantile = ndist.ppf(1 - alpha) + + X, Y, _ = HIV_NRTI(standardize=True) + Y *= 15 + n, p = X.shape + X /= np.sqrt(n) + + ols_fit = np.linalg.pinv(X).dot(Y) + _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1) + + const = split_lasso.gaussian + + dispersion = _sigma ** 2 + + W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma + + conv = const(X, + Y, + W, + proportion=split_proportion) + + signs = conv.fit() + nonzero = signs != 0 + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + mle, inverse_info = conv.selective_MLE(target_spec, + level=level, + solve_args={'tol': 1.e-12})[:2] + + approx_inf = conv.approximate_grid_inference(target_spec, + useIP=True) + + posterior_inf = conv.posterior(target_spec, + dispersion=dispersion) + + samples_langevin = langevin_sampler(posterior_inf, + nsample=nsample, + nburnin=nburnin, + step=1.) + + lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0) + upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0) + + samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf, + nsample=nsample, + nburnin=nburnin) + + lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0) + upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0) + + naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y) + naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero])) + naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)), + naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T + + X_split = X[~conv._selection_idx, :] + Y_split = Y[~conv._selection_idx] + split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split) + split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero])) + split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)), + split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T + + print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ", + np.mean(upper_langevin - lower_langevin), + np.mean(upper_gibbs - lower_gibbs), + np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))), + np.mean(mle['upper_confidence'] - mle['lower_confidence']), + np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence']) + ) + + print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])) + + print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0])) + + scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100]) + output = pd.DataFrame({'Langevin_lower_credible': lower_langevin, + 'Langevin_upper_credible': upper_langevin, + 'Gibbs_lower_credible': lower_gibbs, + 'Gibbs_upper_credible': upper_gibbs, + 'MLE_lower_confidence': mle['lower_confidence'], + 'MLE_upper_confidence': mle['upper_confidence'], + 'approx_lower_confidence': approx_inf['lower_confidence'], + 'approx_upper_confidence': approx_inf['upper_confidence'], + 'Split_lower_confidence': split_intervals[:, 0], + 'Split_upper_confidence': split_intervals[:, 1], + 'Naive_lower_confidence': naive_intervals[:, 0], + 'Naive_upper_confidence': naive_intervals[:, 1] + }) + + return output, scale_interval, _sigma + diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py index 13dae3769..07d1e9989 100644 --- a/selectinf/randomized/tests/test_lasso.py +++ b/selectinf/randomized/tests/test_lasso.py @@ -5,14 +5,18 @@ import regreg.api as rr -from ..lasso import lasso, selected_targets, full_targets, debiased_targets -from ...tests.instance import gaussian_instance -from ...tests.flags import SET_SEED +from ..lasso import lasso +from ...base import selected_targets, full_targets, debiased_targets +from ...tests.instance import gaussian_instance, logistic_instance +from ...tests.flags import SET_SEED, SMALL_SAMPLES from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso from ..randomization import randomization from ...tests.decorators import rpy_test_safe + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10) def test_highdim_lasso(n=500, p=200, signal_fac=1.5, @@ -41,7 +45,7 @@ def test_highdim_lasso(n=500, n, p = X.shape sigma_ = np.std(Y) - if target is not 'debiased': + if target != 'debiased': W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_ else: W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ @@ -56,38 +60,26 @@ def test_highdim_lasso(n=500, nonzero = signs != 0 if target == 'full': - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero) + target_spec = full_targets(conv.loglike, + conv.observed_soln) elif target == 'selected': - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(conv.loglike, - conv._W, - nonzero) + target_spec = selected_targets(conv.loglike, + conv.observed_soln) elif target == 'debiased': - (observed_target, - cov_target, - cov_target_score, - alternatives) = debiased_targets(conv.loglike, - conv._W, - nonzero, - penalty=conv.penalty) - - _, pval, intervals = conv.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - ndraw=ndraw, - burnin=burnin, - compute_intervals=True) - + target_spec = debiased_targets(conv.loglike, + conv.observed_soln, + penalty=conv.penalty) + + result = conv.summary(target_spec, + ndraw=ndraw, + burnin=burnin, + compute_intervals=True) + pval = result['pvalue'] + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0] +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10) def test_AR_randomization(n=300, p=100, signal=4.5, @@ -143,42 +135,47 @@ def test_AR_randomization(n=300, nonzero = signs != 0 if target == 'full': - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero) + target_spec = full_targets(conv.loglike, + conv.observed_soln) elif target == 'selected': - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(conv.loglike, - conv._W, - nonzero) + target_spec = selected_targets(conv.loglike, + conv.observed_soln) elif target == 'debiased': - (observed_target, - cov_target, - cov_target_score, - alternatives) = debiased_targets(conv.loglike, - conv._W, - nonzero, - penalty=conv.penalty) - - _, pval, intervals = conv.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - ndraw=ndraw, - burnin=burnin, - compute_intervals=True) - + target_spec = debiased_targets(conv.loglike, + conv.observed_soln, + penalty=conv.penalty) + + result = conv.summary(target_spec, + ndraw=ndraw, + burnin=burnin, + compute_intervals=True) + pval = result['pvalue'] + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0] -def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4): +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10) +def test_all_targets(n=100, + p=20, + signal_fac=1.5, + s=5, + sigma=3, + rho=0.4, + ndraw=5000, + burnin=1000): for target in ['full', 'selected', 'debiased']: - test_highdim_lasso(n=n, p=p, signal_fac=signal_fac, s=s, sigma=sigma, rho=rho, target=target) + test_highdim_lasso(n=n, + p=p, + signal_fac=signal_fac, + s=s, + sigma=sigma, + rho=rho, + target=target, + ndraw=ndraw, + burnin=burnin) +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10) def test_sqrt_highdim_lasso(n=500, p=200, signal_fac=1.5, @@ -229,7 +226,7 @@ def test_sqrt_highdim_lasso(n=500, q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0) soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True) - soln = conv.initial_soln + soln = conv.observed_soln denom = np.linalg.norm(Y - X.dot(soln)) new_weights = W * denom @@ -248,30 +245,21 @@ def test_sqrt_highdim_lasso(n=500, np.testing.assert_allclose(soln, soln3) if full: - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero) + target_spec = full_targets(conv.loglike, + conv.observed_soln) else: - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(conv.loglike, - conv._W, - nonzero) - - _, pval, intervals = conv.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - ndraw=ndraw, - burnin=burnin, - compute_intervals=False) + target_spec = selected_targets(conv.loglike, + conv.observed_soln) + + result = conv.summary(target_spec, + ndraw=ndraw, + burnin=burnin, + compute_intervals=False) + pval = result['pvalue'] return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0] +@np.testing.dec.skipif(True, "comparison to R is broken") @set_seed_iftrue(SET_SEED) @rpy_test_safe(libraries=['selectiveInference']) def test_compareR(n=200, @@ -345,49 +333,70 @@ def Rpval(X, Y, W, noise_scale=None): assert np.fabs(conv.ridge_term - ridge_term) / ridge_term < 1.e-4 - assert np.fabs(soln - conv.initial_soln).max() / np.fabs(soln).max() < 1.e-3 + assert np.fabs(soln - conv.observed_soln).max() / np.fabs(soln).max() < 1.e-3 nonzero = signs != 0 assert np.linalg.norm(conv.sampler.affine_con.covariance - cond_cov) / np.linalg.norm(cond_cov) < 1.e-3 assert np.linalg.norm(conv.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3 +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10) +def test_logistic_lasso(n=500, + p=200, + signal_fac=1.5, + s=5, + full=True, + rho=0.4, + randomizer_scale=1., + ndraw=5000, + burnin=1000, + ridge_term=None, compare_to_lasso=True): + """ + Compare to R randomized lasso + """ + + inst, const = logistic_instance, lasso.logistic + signal = np.sqrt(signal_fac * 2 * np.log(p)) + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:3] -def main(nsim=500, n=500, p=200, sqrt=False, target='full', sigma=3, AR=True): - - import matplotlib.pyplot as plt - P0, PA = [], [] - from statsmodels.distributions import ECDF - - for i in range(nsim): - if True: - if not sqrt: - if AR: - p0, pA = test_AR_randomization(n=n, p=p, target=target, sigma=sigma) - else: - p0, pA = test_highdim_lasso(n=n, p=p, target=target, sigma=sigma) - else: - p0, pA = test_sqrt_highdim_lasso(n=n, p=p, target=target, compare_to_lasso=False) - else: - p0, pA = [], [] - print(len(p0), len(pA)) - P0.extend(p0) - PA.extend(pA) - - P0_clean = np.array(P0) - - P0_clean = P0_clean[P0_clean > 1.e-5] # - print(np.mean(P0_clean), np.std(P0_clean), np.mean(np.array(PA) < 0.05), np.sum(np.array(PA) < 0.05) / (i+1), np.mean(np.array(P0) < 0.05), np.mean(P0_clean < 0.05), np.mean(np.array(P0) < 1e-5), 'null pvalue + power + failure') + if ridge_term is None: + mean_diag = np.mean((X**2).sum(0)) + ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.)) + + W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7 + + perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n) + + conv = const(X, + Y, + W, + randomizer_scale=randomizer_scale / np.sqrt(n), + ridge_term=ridge_term) - if i % 3 == 0 and i > 0: - U = np.linspace(0, 1, 101) - plt.clf() - if len(P0_clean) > 0: - plt.plot(U, ECDF(P0_clean)(U)) - if len(PA) > 0: - plt.plot(U, ECDF(PA)(U), 'r') - plt.plot([0, 1], [0, 1], 'k--') - plt.savefig("plot.pdf") - plt.show() + signs = conv.fit() + nonzero = signs != 0 + + # sanity check + + if full: + target_spec = full_targets(conv.loglike, + conv.observed_soln) + else: + target_spec = selected_targets(conv.loglike, + conv.observed_soln) + result = conv.summary(target_spec, + ndraw=ndraw, + burnin=burnin, + compute_intervals=False) + pval = result['pvalue'] + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0] diff --git a/selectinf/randomized/tests/test_marginal_screening.py b/selectinf/randomized/tests/test_marginal_screening.py index 6e7a564e0..50c769fb6 100644 --- a/selectinf/randomized/tests/test_marginal_screening.py +++ b/selectinf/randomized/tests/test_marginal_screening.py @@ -46,28 +46,19 @@ def test_marginal(n=500, if marginal: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = marginal_select.marginal_targets(nonzero) + target_spec = marginal_select.marginal_targets(nonzero) else: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = marginal_select.multivariate_targets(nonzero, dispersion=sigma**2) + target_spec = marginal_select.multivariate_targets(nonzero, dispersion=sigma**2) if use_MLE: - estimate, _, _, pval, intervals, _ = marginal_select.selective_MLE(observed_target, - cov_target, - crosscov_target_score) + result = marginal_select.selective_MLE(target_spec)[0] # run summary else: - _, pval, intervals = marginal_select.summary(observed_target, - cov_target, - crosscov_target_score, - alternatives, - compute_intervals=True) + result = marginal_select.summary(target_spec, + compute_intervals=True) + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) + pval = result['pvalue'] print(pval) if marginal: beta_target = true_mean[nonzero] @@ -132,25 +123,19 @@ def test_simple(n=100, if nonzero.sum() > 0: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = marginal_select.marginal_targets(nonzero) + target_spec = marginal_select.marginal_targets(nonzero) if use_MLE: - estimate, _, _, pval, intervals, _ = marginal_select.selective_MLE(observed_target, - cov_target, - crosscov_target_score) + result = marginal_select.selective_MLE(target_spec) # run summary else: - _, pval, intervals = marginal_select.summary(observed_target, - cov_target, - crosscov_target_score, - alternatives, - compute_intervals=True) + result = marginal_select.summary(target_spec, + compute_intervals=True) + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) print(pval) - beta_target = cov_target.dot(true_mean[nonzero]) + beta_target = target_spec.cov_target.dot(true_mean[nonzero]) print("beta_target and intervals", beta_target, intervals) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) print("coverage for selected target", coverage.sum()/float(nonzero.sum())) @@ -159,25 +144,3 @@ def test_simple(n=100, def test_both(): test_marginal(marginal=True) test_marginal(marginal=False) - -def main(nsim=1000, test_fn=test_marginal, use_MLE=False): - - import matplotlib.pyplot as plt - import statsmodels.api as sm - U = np.linspace(0, 1, 101) - P0, PA, cover, length_int = [], [], [], [] - for i in range(nsim): - p0, pA, cover_, intervals = test_fn(use_MLE=use_MLE) - - cover.extend(cover_) - P0.extend(p0) - PA.extend(pA) - print(np.mean(cover),'coverage so far') - - if i % 50 == 0 and i > 0: - plt.clf() - plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null') - plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt') - plt.plot([0, 1], [0, 1], 'k--') - plt.savefig('marginal_screening_pvals.pdf') - diff --git a/selectinf/randomized/tests/test_modelQ.py b/selectinf/randomized/tests/test_modelQ.py index e88522423..09d70d29c 100644 --- a/selectinf/randomized/tests/test_modelQ.py +++ b/selectinf/randomized/tests/test_modelQ.py @@ -29,7 +29,7 @@ def test_modelQ(): conH = LH.sampler.affine_con conQ = LQ.sampler.affine_con - np.testing.assert_allclose(LH.initial_soln, LQ.initial_soln) + np.testing.assert_allclose(LH.observed_soln, LQ.observed_soln) np.testing.assert_allclose(LH.initial_subgrad, LQ.initial_subgrad) np.testing.assert_allclose(conH.linear_part, conQ.linear_part) diff --git a/selectinf/randomized/tests/test_multiple_queries.py b/selectinf/randomized/tests/test_multiple_queries.py index 03a921862..a56a8a440 100644 --- a/selectinf/randomized/tests/test_multiple_queries.py +++ b/selectinf/randomized/tests/test_multiple_queries.py @@ -5,14 +5,24 @@ import regreg.api as rr -from ..lasso import lasso, selected_targets, full_targets, debiased_targets -from ..screening import marginal_screening -from ..query import multiple_queries +from ...base import selected_targets from ...tests.instance import gaussian_instance from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso +from ..lasso import lasso +from ..screening import marginal_screening +from ..query import multiple_queries + # the test here is marginal_screening + lasso -def test_multiple_queries(n=500, p=100, signal_fac=1.5, s=5, sigma=3, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000): +def test_multiple_queries(n=500, + p=100, + signal_fac=1.5, + s=5, + sigma=3, + rho=0.4, + randomizer_scale=1, + ndraw=5000, + burnin=1000): inst, const1, const2 = gaussian_instance, marginal_screening, lasso.gaussian signal = np.sqrt(signal_fac * np.log(p)) @@ -52,51 +62,17 @@ def test_multiple_queries(n=500, p=100, signal_fac=1.5, s=5, sigma=3, rho=0.4, r if nonzero.sum() == 0: return [], [] - observed_target1, cov_target1, cov_target_score1, alternatives1 = conv1.multivariate_targets(nonzero, sigma**2) + target_spec1 = conv1.multivariate_targets(nonzero, sigma**2) - (observed_target2, - cov_target2, - cov_target_score2, - alternatives2) = selected_targets(conv2.loglike, - conv2._W, - nonzero) + target_spec2 = selected_targets(conv2.loglike, + conv2.observed_soln, + features=nonzero) mq = multiple_queries([conv1, conv2]) - _, pval, intervals = mq.summary(observed_target1, - [(cov_target1, cov_target_score1), (cov_target2, cov_target_score2)], - compute_intervals=True) - + results = mq.summary([target_spec1, target_spec2], + compute_intervals=True) + pval = np.asarray(results['pvalue']) return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0] -def main(nsim=500, n=500, p=100, sigma=3): - - P0, PA = [], [] - from statsmodels.distributions import ECDF - import matplotlib.pyplot as plt - - for i in range(nsim): - if True: - p0, pA = test_multiple_queries(n=n, p=p, sigma=sigma) - else: - p0, pA = [], [] - P0.extend(p0) - PA.extend(pA) - - P0_clean = np.array(P0) - - P0_clean = P0_clean[P0_clean > 1.e-5] # - print(np.mean(P0_clean), np.std(P0_clean), np.mean(np.array(PA) < 0.05), np.mean(np.array(P0) < 0.05), np.mean(P0_clean < 0.05), np.mean(np.array(P0) < 1e-5)) - - if i % 3 == 0 and i > 0: - U = np.linspace(0, 1, 101) - plt.clf() - if len(P0_clean) > 0: - plt.plot(U, ECDF(P0_clean)(U)) - if len(PA) > 0: - plt.plot(U, ECDF(PA)(U), 'r') - plt.plot([0, 1], [0, 1], 'k--') - plt.savefig("plot.pdf") - plt.show() - diff --git a/selectinf/randomized/tests/test_naive.py b/selectinf/randomized/tests/test_naive.py index 584535dc7..56f7f5515 100644 --- a/selectinf/randomized/tests/test_naive.py +++ b/selectinf/randomized/tests/test_naive.py @@ -1,15 +1,52 @@ import numpy as np import regreg.api as rr -import pandas as pd from scipy.stats import norm as ndist -from scipy.optimize import bisect + from ...tests.instance import gaussian_instance from ...algorithms.lasso import lasso from ...tests.flags import SMALL_SAMPLES, SET_SEED from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue from ..cv_view import CV_view, have_glmnet -from ..query import (naive_pvalues, naive_confidence_intervals) + + +def naive_confidence_intervals(diag_cov, observed, level=0.9): + """ + Compute naive Gaussian based confidence + intervals for target. + Parameters + ---------- + diag_cov : diagonal of a covariance matrix + observed : np.float + A vector of observed data of shape `target.shape` + alpha : float (optional) + 1 - confidence level. + Returns + ------- + intervals : np.float + Gaussian based confidence intervals. + """ + alpha = 1 - level + diag_cov = np.asarray(diag_cov) + p = diag_cov.shape[0] + quantile = - ndist.ppf(alpha / 2) + LU = np.zeros((2, p)) + for j in range(p): + sigma = np.sqrt(diag_cov[j]) + LU[0, j] = observed[j] - sigma * quantile + LU[1, j] = observed[j] + sigma * quantile + return LU.T + + +def naive_pvalues(diag_cov, observed, parameter): + diag_cov = np.asarray(diag_cov) + p = diag_cov.shape[0] + pvalues = np.zeros(p) + for j in range(p): + sigma = np.sqrt(diag_cov[j]) + pval = ndist.cdf((observed[j] - parameter[j]) / sigma) + pvalues[j] = 2 * min(pval, 1 - pval) + return pvalues def compute_projection_parameters(n, p, s, signal, rho, sigma, active): multiple = 10**2 diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py new file mode 100644 index 000000000..2757c06da --- /dev/null +++ b/selectinf/randomized/tests/test_posterior.py @@ -0,0 +1,376 @@ +import numpy as np +import pandas as pd +from scipy.stats import norm as ndist + +from ..lasso import lasso, split_lasso +from ..posterior_inference import (langevin_sampler, + gibbs_sampler) + +from ...base import selected_targets +from ...tests.instance import gaussian_instance, HIV_NRTI +from ...tests.flags import SET_SEED, SMALL_SAMPLES +from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10) +def test_Langevin(n=500, + p=100, + signal_fac=1., + s=5, + sigma=3., + rho=0.4, + randomizer_scale=1., + nsample=1500, + nburnin=100): + + inst, const = gaussian_instance, lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + + W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + ridge_term=0., + randomizer_scale=randomizer_scale * dispersion) + + signs = conv.fit() + nonzero = signs != 0 + + if nonzero.sum()>0: + + conv.setup_inference(dispersion=dispersion) + + beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + posterior_inf = conv.posterior(target_spec, + dispersion=dispersion) + + samples = langevin_sampler(posterior_inf, + nsample=nsample, + nburnin=nburnin) + + lci = np.percentile(samples, 5, axis=0) + uci = np.percentile(samples, 95, axis=0) + coverage = (lci < beta_target) * (uci > beta_target) + length = uci - lci + + return np.mean(coverage), np.mean(length) + + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10, nsim=2) +def test_coverage(nsim=100, + nsample=1500, + nburnin=100): + + cov, len = 0., 0. + + for i in range(nsim): + cov_, len_ = test_Langevin(n=500, + p=100, + signal_fac=0.5, + s=5, + sigma=2., + rho=0.2, + randomizer_scale=1., + nsample=nsample, + nburnin=nburnin) + + cov += cov_ + len += len_ + + print("coverage and lengths ", i, cov / (i + 1.), len / (i + 1.)) + + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10) +def test_instance(nsample=100, nburnin=50): + np.random.seed(10) + n, p, s = 500, 100, 5 + X = np.random.standard_normal((n, p)) + beta = np.zeros(p) + # beta[:s] = np.sqrt(2 * np.log(p) / n) + Y = X.dot(beta) + np.random.standard_normal(n) + + scale_ = np.std(Y) + # uses noise of variance n * scale_ / 4 by default + L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n))) + signs = L.fit() + E = (signs != 0) + + M = E.copy() + M[-3:] = 1 + dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum()) + + L.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(L.loglike, + L.observed_soln, + features=M, + dispersion=dispersion) + + posterior_inf = L.posterior(target_spec, + dispersion=dispersion) + + samples = langevin_sampler(posterior_inf, + nsample=nsample, + nburnin=nburnin) + + gibbs_samples = gibbs_sampler(posterior_inf, + nsample=nsample, + nburnin=nburnin)[0] + + lci = np.percentile(samples, 5, axis=0) + uci = np.percentile(samples, 95, axis=0) + + beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta)) + coverage = (lci < beta_target) * (uci > beta_target) + length = uci - lci + + return np.mean(coverage), np.mean(length) + + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10) +def test_flexible_prior1(nsample=100, + nburnin=50, + seed=0): + + np.random.seed(seed) + + n, p, s = 500, 100, 5 + X = np.random.standard_normal((n, p)) + beta = np.zeros(p) + # beta[:s] = np.sqrt(2 * np.log(p) / n) + Y = X.dot(beta) + np.random.standard_normal(n) + + scale_ = np.std(Y) + # uses noise of variance n * scale_ / 4 by default + L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n))) + signs = L.fit() + E = (signs != 0) + + M = E.copy() + M[-3:] = 1 + dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum()) + + L.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(L.loglike, + L.observed_soln, + features=M, + dispersion=dispersion) + + # default prior + + Di = 1. / (200 * np.diag(target_spec.cov_target)) + + def prior(target_parameter): + grad_prior = -target_parameter * Di + log_prior = -0.5 * np.sum(target_parameter ** 2 * Di) + return log_prior, grad_prior + + seed_state = np.random.get_state() + np.random.set_state(seed_state) + Z1 = np.random.standard_normal() + + posterior_inf1 = L.posterior(target_spec, + dispersion=dispersion, + prior=prior) + + W1 = np.random.standard_normal() + samples1 = langevin_sampler(posterior_inf1, + nsample=nsample, + nburnin=nburnin) + + np.random.set_state(seed_state) + Z2 = np.random.standard_normal() + posterior_inf2 = L.posterior(target_spec) + + W2 = np.random.standard_normal() + samples2 = langevin_sampler(posterior_inf2, + nsample=nsample, + nburnin=nburnin) + # these two assertions essentially just check the random state + # was run identically for samples1 and samples2 + np.testing.assert_equal(Z1, Z2) + np.testing.assert_equal(W1, W2) + np.testing.assert_allclose(samples1, samples2, rtol=1.e-3) + + +@set_seed_iftrue(SET_SEED) +@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10) +def test_flexible_prior2(nsample=1000, nburnin=50): + n, p, s = 500, 100, 5 + X = np.random.standard_normal((n, p)) + beta = np.zeros(p) + # beta[:s] = np.sqrt(2 * np.log(p) / n) + Y = X.dot(beta) + np.random.standard_normal(n) + + scale_ = np.std(Y) + # uses noise of variance n * scale_ / 4 by default + L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n))) + signs = L.fit() + E = (signs != 0) + + M = E.copy() + M[-3:] = 1 + dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum()) + + L.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(L.loglike, + L.observed_soln, + features=M, + dispersion=dispersion) + + prior_var = 0.05 ** 2 + + def prior(target_parameter): + grad_prior = -target_parameter / prior_var + log_prior = -np.linalg.norm(target_parameter) ** 2 / (2. * prior_var) + return log_prior, grad_prior + + posterior_inf = L.posterior(target_spec, + dispersion=dispersion, + prior=prior) + + adaptive_proposal = np.linalg.inv(np.linalg.inv(posterior_inf.inverse_info) + + np.identity(posterior_inf.inverse_info.shape[0]) / 0.05 ** 2) + samples = langevin_sampler(posterior_inf, + nsample=nsample, + proposal_scale=adaptive_proposal, + nburnin=nburnin) + return samples + + +# def test_hiv_data(nsample=10000, +# nburnin=500, +# level=0.90, +# split_proportion=0.50, +# seedn=1): +# np.random.seed(seedn) +# +# alpha = (1 - level) / 2 +# Z_quantile = ndist.ppf(1 - alpha) +# +# X, Y, _ = HIV_NRTI(standardize=True) +# Y *= 15 +# n, p = X.shape +# X /= np.sqrt(n) +# +# ols_fit = np.linalg.pinv(X).dot(Y) +# _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1) +# +# const = split_lasso.gaussian +# +# dispersion = _sigma ** 2 +# +# W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma +# +# conv = const(X, +# Y, +# W, +# proportion=split_proportion) +# +# signs = conv.fit() +# nonzero = signs != 0 +# +# conv.setup_inference() +# +# target_spec = selected_targets(conv.loglike, +# conv.observed_soln, +# nonzero, +# dispersion=dispersion) +# +# mle, inverse_info = conv.selective_MLE(target_spec, +# level=level, +# solve_args={'tol': 1.e-12})[:2] +# +# approx_inf = conv.approximate_grid_inference(target_spec) +# +# posterior_inf = conv.posterior(target_spec, +# dispersion=dispersion) +# +# samples_langevin = langevin_sampler(posterior_inf, +# nsample=nsample, +# nburnin=nburnin, +# step=1.) +# +# lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0) +# upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0) +# +# samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf, +# nsample=nsample, +# nburnin=nburnin) +# +# lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0) +# upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0) +# +# naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y) +# naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero])) +# naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)), +# naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T +# +# X_split = X[~conv._selection_idx, :] +# Y_split = Y[~conv._selection_idx] +# split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split) +# split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero])) +# split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)), +# split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T +# +# print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ", +# np.mean(upper_langevin - lower_langevin), +# np.mean(upper_gibbs - lower_gibbs), +# np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))), +# np.mean(mle['upper_confidence'] - mle['lower_confidence']), +# np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence']) +# ) +# +# print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])) +# +# print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0])) +# +# scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100]) +# output = pd.DataFrame({'Langevin_lower_credible': lower_langevin, +# 'Langevin_upper_credible': upper_langevin, +# 'Gibbs_lower_credible': lower_gibbs, +# 'Gibbs_upper_credible': upper_gibbs, +# 'MLE_lower_confidence': mle['lower_confidence'], +# 'MLE_upper_confidence': mle['upper_confidence'], +# 'approx_lower_confidence': approx_inf['lower_confidence'], +# 'approx_upper_confidence': approx_inf['upper_confidence'], +# 'Split_lower_confidence': split_intervals[:, 0], +# 'Split_upper_confidence': split_intervals[:, 1], +# 'Naive_lower_confidence': naive_intervals[:, 0], +# 'Naive_upper_confidence': naive_intervals[:, 1] +# }) +# +# return output, scale_interval, _sigma + + +if __name__ == "__main__": + #test_hiv_data(split_proportion=0.50) + test_coverage(nsim=1) + + + diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py index 4a4d4a8a5..d7aca0e34 100644 --- a/selectinf/randomized/tests/test_selective_MLE_high.py +++ b/selectinf/randomized/tests/test_selective_MLE_high.py @@ -1,15 +1,28 @@ import numpy as np import nose.tools as nt -from ..lasso import lasso, full_targets, selected_targets, debiased_targets -from ...tests.instance import gaussian_instance - -def test_full_targets(n=200, - p=1000, - signal_fac=0.5, - s=5, sigma=3, - rho=0.4, - randomizer_scale=0.5, +import regreg.api as rr + + +from ..lasso import (lasso, + split_lasso) + +from ...base import (full_targets, + selected_targets, + debiased_targets) +from ...tests.instance import (gaussian_instance, + logistic_instance, + poisson_instance, + cox_instance) + + +def test_full_targets(n=200, + p=1000, + signal_fac=0.5, + s=5, + sigma=3, + rho=0.4, + randomizer_scale=0.7, full_dispersion=False): """ Compare to R randomized lasso @@ -22,7 +35,7 @@ def test_full_targets(n=200, p=p, signal=signal, s=s, - equicorrelated=False, + equicorrelated=True, rho=rho, sigma=sigma, random_signs=True)[:3] @@ -49,43 +62,40 @@ def test_full_targets(n=200, if full_dispersion: dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) else: - dispersion = None - - if n>p: - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero, - dispersion=dispersion) + dispersion = np.linalg.norm(Y - X[:,nonzero].dot(np.linalg.pinv(X[:,nonzero]).dot(Y))) ** 2 / (n - nonzero.sum()) + + if n > p: + target_spec = full_targets(conv.loglike, + conv.observed_soln, + nonzero, + dispersion=dispersion) else: - (observed_target, - cov_target, - cov_target_score, - alternatives) = debiased_targets(conv.loglike, - conv._W, - nonzero, - penalty=conv.penalty, - dispersion=dispersion) - - estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target, - cov_target, - cov_target_score) + target_spec = debiased_targets(conv.loglike, + conv.observed_soln, + nonzero, + penalty=conv.penalty, + dispersion=dispersion) + + conv.setup_inference(dispersion=dispersion) + result = conv._selective_MLE(target_spec)[0] + + pval = result['pvalue'] + estimate = result['MLE'] + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) print("estimate, intervals", estimate, intervals) coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1]) return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals -def test_selected_targets(n=2000, - p=200, - signal_fac=1., - s=5, - sigma=3, - rho=0.4, - randomizer_scale=1, +def test_selected_targets(n=2000, + p=200, + signal_fac=1.2, + s=5, + sigma=2, + rho=0.7, + randomizer_scale=1., full_dispersion=True): """ Compare to R randomized lasso @@ -99,7 +109,7 @@ def test_selected_targets(n=2000, p=p, signal=signal, s=s, - equicorrelated=False, + equicorrelated=True, rho=rho, sigma=sigma, random_signs=True)[:3] @@ -111,63 +121,614 @@ def test_selected_targets(n=2000, n, p = X.shape sigma_ = np.std(Y) - W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ conv = const(X, Y, W, + ridge_term=0., randomizer_scale=randomizer_scale * sigma_) signs = conv.fit() nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + if nonzero.sum() > 0: - dispersion = None + if full_dispersion: dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + else: + dispersion = np.linalg.norm(Y - X[:,nonzero].dot(np.linalg.pinv(X[:,nonzero]).dot(Y))) ** 2 / (n - nonzero.sum()) + + conv.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(conv.loglike, - conv._W, - nonzero, - dispersion=dispersion) + result = conv._selective_MLE(target_spec)[0] - estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target, - cov_target, - cov_target_score) + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals -def main(nsim=500, full=False): - P0, PA, cover, length_int = [], [], [], [] - from statsmodels.distributions import ECDF +def test_instance(): + n, p, s = 500, 100, 5 + X = np.random.standard_normal((n, p)) + beta = np.zeros(p) + beta[:s] = np.sqrt(2 * np.log(p) / n) + Y = X.dot(beta) + np.random.standard_normal(n) - n, p, s = 500, 100, 10 + scale_ = np.std(Y) + # uses noise of variance n * scale_ / 4 by default + L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n))) + signs = L.fit() + E = (signs != 0) - for i in range(nsim): - if full: - if n > p: - full_dispersion = True + M = E.copy() + M[-3:] = 1 + dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum()) + + L.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(L.loglike, + L.observed_soln, + features=M, + dispersion=dispersion) + + print("check shapes", target_spec.observed_target.shape, E.sum()) + + result = L.selective_MLE(target_spec)[0] + + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) + + beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta)) + + coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + + return coverage + +def test_selected_targets_disperse(n=500, + p=100, + s=5, + sigma=1., + rho=0.4, + randomizer_scale=1, + full_dispersion=True): + """ + Compare to R randomized lasso + """ + + inst, const = gaussian_instance, lasso.gaussian + signal = 1. + + while True: + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + randomizer_scale=randomizer_scale * sigma_) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + if full_dispersion: + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) else: - full_dispersion = False - p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion) - avg_length = intervals[:, 1] - intervals[:, 0] - else: - full_dispersion = True - p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, - full_dispersion=full_dispersion) - avg_length = intervals[:, 1] - intervals[:, 0] - - cover.extend(cover_) - P0.extend(p0) - PA.extend(pA) - print( - np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover), - np.mean(avg_length), 'null pvalue + power + length') + dispersion = np.linalg.norm(Y - X[:,nonzero].dot(np.linalg.pinv(X[:,nonzero]).dot(Y))) ** 2 / (n - nonzero.sum()) + + conv.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + result = conv._selective_MLE(target_spec)[0] + + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) + + beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + + coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals + + +def test_logistic(n=2000, + p=200, + signal_fac=10., + s=5, + rho=0.4, + randomizer_scale=1): + """ + Run approx MLE with selected targets on binomial data + """ + + inst, const = logistic_instance, lasso.logistic + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + randomizer_scale=randomizer_scale * sigma_) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + + conv.setup_inference(dispersion=1) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=1) + + result = conv._selective_MLE(target_spec)[0] + estimate = result['MLE'] + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', + 'upper_confidence']]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals + +def test_logistic_split(n=2000, + p=200, + signal_fac=10., + s=5, + rho=0.4, + randomizer_scale=1): + """ + Run approx MLE with selected targets on binomial data with data splitting + """ + + inst, const = logistic_instance, split_lasso.logistic + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + proportion=0.7) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=1) + + conv.setup_inference(dispersion=None) + + result = conv._selective_MLE(target_spec)[0] + estimate = result['MLE'] + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', + 'upper_confidence']]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals + +def test_poisson(n=2000, + p=200, + signal_fac=10., + s=5, + rho=0.4, + randomizer_scale=1): + """ + Run approx MLE with selected targets on Poisson data + """ + + inst, const = poisson_instance, lasso.poisson + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + randomizer_scale=randomizer_scale * sigma_) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=1) + + conv.setup_inference(dispersion=1) + + result = conv._selective_MLE(target_spec)[0] + estimate = result['MLE'] + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', + 'upper_confidence']]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals + +def test_poisson_split(n=2000, + p=200, + signal_fac=10., + s=5, + rho=0.4, + randomizer_scale=1): + """ + Run approx MLE with selected targets on Poisson data with data splitting + """ + + inst, const = poisson_instance, split_lasso.poisson + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:3] + + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + proportion=0.7) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=1) + + conv.setup_inference(dispersion=1) + + result = conv._selective_MLE(target_spec)[0] + estimate = result['MLE'] + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', + 'upper_confidence']]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals + +def test_cox(n=2000, + p=200, + signal_fac=10., + s=5, + rho=0.4, + randomizer_scale=1): + """ + Run approx MLE with selected targets on survival data + """ + + inst, const = cox_instance, lasso.coxph + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, T, S, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:4] + + n, p = X.shape + + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) + + conv = const(X, + T, + S, + W, + randomizer_scale=randomizer_scale) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + + cox_full = rr.glm.cox(X, T, S) + full_hess = cox_full.hessian(conv.observed_soln) + + conv.setup_inference(dispersion=1) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=1) + + result = conv._selective_MLE(target_spec)[0] + estimate = result['MLE'] + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', + 'upper_confidence']]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals + +def test_cox_split(n=2000, + p=200, + signal_fac=10., + s=5, + rho=0.4, + randomizer_scale=1): + """ + Run approx MLE with selected targets on survival data with data splitting + """ + + inst, const = cox_instance, split_lasso.coxph + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, T, S, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + random_signs=True)[:4] + + n, p = X.shape + + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) + + conv = const(X, + T, + S, + W, + proportion=0.7) + + signs = conv.fit() + nonzero = signs != 0 + print("dimensions", n, p, nonzero.sum()) + + if nonzero.sum() > 0: + + cox_full = rr.glm.cox(X, T, S) + full_hess = cox_full.hessian(conv.observed_soln) + + conv.setup_inference(dispersion=1) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=1) + + result = conv._selective_MLE(target_spec)[0] + estimate = result['MLE'] + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', + 'upper_confidence']]) + + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals + +def test_scale_invariant_split(n=200, + p=20, + signal_fac=10., + s=5, + sigma=3, + rho=0.4, + randomizer_scale=1, + full_dispersion=True, + seed=2): + """ + Confirm Gaussian version is appropriately scale invariant with data splitting + """ + + inst, const = gaussian_instance, split_lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + results = [] + + scales = [1, 5] + for scale in scales: + + np.random.seed(seed) + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + Y *= scale; beta *= scale + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + print('W', W[0]/scale) + conv = const(X, + Y, + W, + proportion=0.7) + + signs = conv.fit() + nonzero = signs != 0 + print('nonzero', np.where(nonzero)[0]) + print('feature_weights', conv.feature_weights[0] / scale) + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + + conv.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + #print('dispersion', target_spec.dispersion/scale**2) + print('target', target_spec.observed_target[0]/scale) + print('cov_target', target_spec.cov_target[0,0]/scale**2) + print('regress_target_score', target_spec.regress_target_score[0,0]/scale**2) + + + result = conv._selective_MLE(target_spec)[0] + + print(result['MLE'] / scale) + results.append(result) + + assert np.allclose(results[0]['MLE'] / scales[0], + results[1]['MLE'] / scales[1]) + assert np.allclose(results[0]['SE'] / scales[0], + results[1]['SE'] / scales[1]) + assert np.allclose(results[0]['upper_confidence'] / scales[0], + results[1]['upper_confidence'] / scales[1]) + assert np.allclose(results[0]['lower_confidence'] / scales[0], + results[1]['lower_confidence'] / scales[1]) + assert np.allclose(results[0]['Zvalue'], + results[1]['Zvalue']) + assert np.allclose(results[0]['pvalue'], + results[1]['pvalue']) + +def test_scale_invariant(n=200, + p=20, + signal_fac=10., + s=5, + sigma=3, + rho=0.4, + randomizer_scale=1, + full_dispersion=True, + seed=2): + """ + Confirm Gaussian version is appropriately scale invariant + """ + + inst, const = gaussian_instance, lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + results = [] + + scales = [1, 5] + for scale in scales: + + np.random.seed(seed) + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=False, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + Y *= scale; beta *= scale + n, p = X.shape + + sigma_ = np.std(Y) + W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + print('W', W[0]/scale) + conv = const(X, + Y, + W, + randomizer_scale=randomizer_scale * sigma_) + + signs = conv.fit() + nonzero = signs != 0 + print('nonzero', np.where(nonzero)[0]) + print('feature_weights', conv.feature_weights[0] / scale) + print('perturb', conv._initial_omega[0] / scale) + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + + conv.setup_inference(dispersion=dispersion) + + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=dispersion) + + #print('dispersion', target_spec.dispersion/scale**2) + print('target', target_spec.observed_target[0]/scale) + print('cov_target', target_spec.cov_target[0,0]/scale**2) + print('regress_target_score', target_spec.regress_target_score[0,0]/scale**2) + + result = conv._selective_MLE(target_spec)[0] + + print(result['MLE'] / scale) + results.append(result) + + assert np.allclose(results[0]['MLE'] / scales[0], + results[1]['MLE'] / scales[1]) + assert np.allclose(results[0]['SE'] / scales[0], + results[1]['SE'] / scales[1]) + assert np.allclose(results[0]['upper_confidence'] / scales[0], + results[1]['upper_confidence'] / scales[1]) + assert np.allclose(results[0]['lower_confidence'] / scales[0], + results[1]['lower_confidence'] / scales[1]) + assert np.allclose(results[0]['Zvalue'], + results[1]['Zvalue']) + assert np.allclose(results[0]['pvalue'], + results[1]['pvalue']) + diff --git a/selectinf/randomized/tests/test_selective_MLE_onedim.py b/selectinf/randomized/tests/test_selective_MLE_onedim.py index df3aea08d..dd7ded2ff 100644 --- a/selectinf/randomized/tests/test_selective_MLE_onedim.py +++ b/selectinf/randomized/tests/test_selective_MLE_onedim.py @@ -4,10 +4,11 @@ from scipy.stats import norm as ndist import nose.tools as nt -from ..lasso import lasso, full_targets +from ..lasso import lasso +from ...base import full_targets, TargetSpec from ...tests.instance import gaussian_instance -def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1): +def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=2, randomizer_scale=1): beta = np.array([signal]) while True: @@ -28,24 +29,33 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1): # this is current code where we estimate sigma - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero) + target_spec = full_targets(conv.loglike, + conv.observed_soln, + features=nonzero) - estimate_cur, I_cur, Z_cur, pv_cur = conv.selective_MLE(observed_target, - cov_target, - cov_target_score)[:4] + result = conv.selective_MLE(target_spec) + + estimate_cur = float(result[0]['MLE']) + Z_cur = float(result[0]['Zvalue']) + pv_cur = float(result[0]['pvalue']) + I_cur = result[1] # this matches exactly with old code target_Z = X.T.dot(Y) / np.sqrt((X**2).sum(0)) - estimate, I, Z, pv = conv.sampler.selective_MLE(target_Z, sigma**2 * np.ones((1,1)), - -sigma**2 * np.ones((1,1)), np.ones((1,)), - solve_args={'tol':1.e-12})[:4] + target = TargetSpec(target_Z, + sigma**2 * np.ones((1,1)), + -np.ones((1,1)) / np.sqrt((X**2).sum(0)), + ['greater'], + sigma**2) + result2 = conv.sampler.selective_MLE(target, + np.ones((1,)) * signs[0], + solve_args={'tol':1.e-12}) + estimate, I, Z, pv = (float(result2[0]['MLE']), + result2[1], + float(result2[0]['Zvalue']), + float(result2[0]['pvalue'])) target_transform = (-np.identity(1), np.zeros(1)) s = signs @@ -59,13 +69,16 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1): (sigma ** 2.) * np.identity(1), (1. / (sigma ** 2.)) * np.identity(1), tol=1.e-12) - print(estimate, approx_MLE, 'selective MLE') - print(beta[nonzero], 'truth') - print(np.linalg.pinv(X[:, nonzero]).dot(Y), 'relaxed') - print(pv, 'pv') - pivot = ndist.cdf((estimate_cur - signal) / np.sqrt(I_cur[0,0])) - print(pivot, 'pivot') + + debug = False + if debug: + print(estimate, approx_MLE, 'selective MLE') + print(beta[nonzero], 'truth') + print(np.linalg.pinv(X[:, nonzero]).dot(Y), 'relaxed') + print(pv, 'pv') + print(pivot, 'pivot') + return estimate, estimate_cur, np.atleast_1d(approx_MLE), pivot def test_agreement(seed=0): @@ -95,24 +108,6 @@ def test_agreement(seed=0): return beta_seq, MLE_cur, MLE_prev, pivot -def main(): - - beta_seq, MLE_cur, MLE_prev, pivot = test_agreement() - - import matplotlib.pyplot as plt - from statsmodels.distributions import ECDF - - plt.figure(num=1) - - plt.plot(beta_seq, np.array(MLE_cur), label='MLE now') - plt.plot(beta_seq, np.array(MLE_prev), 'r--', label='MLE prev') - plt.legend() - - plt.figure(num=2) - U = np.linspace(0, 1, 101) - plt.plot(U, ECDF(pivot)(U)) - plt.plot([0,1],[0,1], 'k--') - ##################################################### # Old selection.randomized.selective_MLE module diff --git a/selectinf/randomized/tests/test_slope.py b/selectinf/randomized/tests/test_slope.py index 05d2ec257..1545d0f47 100644 --- a/selectinf/randomized/tests/test_slope.py +++ b/selectinf/randomized/tests/test_slope.py @@ -1,12 +1,12 @@ from ...tests.instance import gaussian_instance -import numpy as np +import numpy as np, pandas as pd from regreg.atoms.slope import slope as slope_atom import regreg.api as rr from ..slope import slope -from ..lasso import full_targets +from ...base import full_targets, selected_targets from ...tests.decorators import rpy_test_safe try: @@ -34,7 +34,7 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma { if(choice_weights == "gaussian"){ lambda = "gaussian"} else{ - lambda = "bhq"} + lambda = "bh"} result = SLOPE(X, Y, fdr = fdr, lambda = lambda, normalize = normalize, sigma = sigma) } else{ result = SLOPE(X, Y, fdr = fdr, lambda = W, normalize = normalize, sigma = sigma) @@ -55,10 +55,10 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma if W is None: r_W = robjects.NA_Logical - if choice_weights is "gaussian": + if choice_weights == "gaussian": r_choice_weights = robjects.StrVector('gaussian') - elif choice_weights is "bhq": - r_choice_weights = robjects.StrVector('bhq') + elif choice_weights == "bh": + r_choice_weights = robjects.StrVector('bh') else: r_W = robjects.r.matrix(W, nrow=p, ncol=1) @@ -69,12 +69,15 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights, r_sigma) - result = np.asarray(result.rx2('beta')), np.asarray(result.rx2('E')), \ - np.asarray(result.rx2('lambda_seq')), np.asscalar(np.array(result.rx2('sigma'))) + result = (np.asarray(result.rx2('beta')), + np.asarray(result.rx2('E')), + np.asarray(result.rx2('lambda_seq')).reshape(-1), + np.asscalar(np.array(result.rx2('sigma')))) rpy2.robjects.numpy2ri.deactivate() return result +@np.testing.dec.skipif(True, "extracting beta from SLOPE in R is troublesome here") @rpy_test_safe(libraries=['SLOPE']) def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35): @@ -97,6 +100,7 @@ def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0 normalize = True, choice_weights = "gaussian", sigma = sigma_) + print("estimated sigma", sigma_, r_sigma) print("weights output by R", r_lambda_seq) print("output of est coefs R", r_beta) @@ -108,11 +112,19 @@ def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0 soln = problem.solve() print("output of est coefs python", soln) + print(r_beta, 'huh') print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta)) @rpy_test_safe(libraries=['SLOPE']) -def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25), - target = "full", use_MLE=True): +def test_randomized_slope(n=2000, + p=100, + signal_fac=1.5, + s=10, + sigma=1., + rho=0.35, + randomizer_scale=0.7, + target = "full", + use_MLE=True): while True: inst = gaussian_instance @@ -127,16 +139,10 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35, random_signs=True)[:3] sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)) - r_beta, r_E, r_lambda_seq, r_sigma = slope_R(X, - Y, - W=None, - normalize=True, - choice_weights="gaussian", #put gaussian - sigma=sigma_) conv = slope.gaussian(X, Y, - r_sigma * r_lambda_seq, + np.linspace(3, 1, p) * sigma_, randomizer_scale=randomizer_scale * sigma_) signs = conv.fit() @@ -146,19 +152,13 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35, if nonzero.sum() > 0: if target == 'full': - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero, dispersion=sigma_) + target_spec = full_targets(conv.loglike, + conv.observed_soln, + dispersion=sigma_) elif target == 'selected': - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(conv.loglike, - conv._W, - nonzero, dispersion=sigma_) + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=sigma_) if target == "selected": beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) @@ -166,32 +166,25 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35, beta_target = beta[nonzero] if use_MLE: - estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target, - cov_target, - cov_target_score) + result = conv.selective_MLE(target_spec)[0] else: - _, pval, intervals = conv.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - compute_intervals=True) - coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + result = conv.summary(target_spec, + compute_intervals=True, + ndraw=150000) + pval = np.asarray(result['pvalue']) + lower = np.asarray(result['lower_confidence']) + upper = np.asarray(result['upper_confidence']) + + print(pd.DataFrame({'target':beta_target, + 'lower':lower, + 'upper':upper})) + + coverage = (beta_target > lower) * (beta_target < upper) break if True: - return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals - -def main(nsim=100): - - P0, PA, cover, length_int = [], [], [], [] - - for i in range(nsim): - p0, pA, cover_, intervals = test_randomized_slope() + return pval[beta_target == 0], pval[beta_target != 0], coverage, lower, upper - cover.extend(cover_) - P0.extend(p0) - PA.extend(pA) - print('coverage', np.mean(cover)) diff --git a/selectinf/randomized/tests/test_split_lasso.py b/selectinf/randomized/tests/test_split_lasso.py index 768903e3f..0e0bd855e 100644 --- a/selectinf/randomized/tests/test_split_lasso.py +++ b/selectinf/randomized/tests/test_split_lasso.py @@ -7,10 +7,10 @@ import regreg.api as rr -from ..lasso import (split_lasso, - selected_targets, +from ..lasso import split_lasso +from ...base import (selected_targets, full_targets, - debiased_targets) + debiased_targets) from ...tests.instance import gaussian_instance from ...tests.flags import SET_SEED from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue @@ -66,44 +66,29 @@ def test_split_lasso(n=100, if nonzero.sum() > 0: if target == 'full': - (observed_target, - cov_target, - cov_target_score, - alternatives) = full_targets(conv.loglike, - conv._W, - nonzero, - dispersion=sigma**2) + target_spec = full_targets(conv.loglike, + conv.observed_soln, + dispersion=sigma**2) elif target == 'selected': - (observed_target, - cov_target, - cov_target_score, - alternatives) = selected_targets(conv.loglike, - conv._W, - nonzero) #, - #dispersion=sigma**2) + target_spec = selected_targets(conv.loglike, + conv.observed_soln, + dispersion=sigma**2) elif target == 'debiased': - (observed_target, - cov_target, - cov_target_score, - alternatives) = debiased_targets(conv.loglike, - conv._W, - nonzero, - penalty=conv.penalty, - dispersion=sigma**2) - - _, pval, intervals = conv.summary(observed_target, - cov_target, - cov_target_score, - alternatives, - ndraw=ndraw, - burnin=burnin, - compute_intervals=False) - - final_estimator, observed_info_mean = conv.selective_MLE( - observed_target, - cov_target, - cov_target_score)[:2] + target_spec = debiased_targets(conv.loglike, + conv.observed_soln, + penalty=conv.penalty, + dispersion=sigma**2) + + result = conv.summary(target_spec, + ndraw=ndraw, + burnin=burnin, + compute_intervals=False) + + MLE_result, observed_info_mean, _ = conv.selective_MLE(target_spec) + + final_estimator = np.asarray(MLE_result['MLE']) + pval = np.asarray(result['pvalue']) if target == 'selected': true_target = np.linalg.pinv(X[:,nonzero]).dot(X.dot(beta)) @@ -131,33 +116,3 @@ def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4): rho=rho, target=target) -def main(nsim=500, n=100, p=200, target='selected', sigma=3, s=3): - - import matplotlib.pyplot as plt - P0, PA = [], [] - from statsmodels.distributions import ECDF - - for i in range(nsim): - p0, pA = test_split_lasso(n=n, p=p, target=target, sigma=sigma, s=s) - print(len(p0), len(pA)) - if not (len(pA) < s and target=='selected'): - P0.extend(p0) - PA.extend(pA) - - P0_clean = np.array(P0) - - P0_clean = P0_clean[P0_clean > 1.e-5] # - print(np.mean(P0_clean), np.std(P0_clean), np.mean(np.array(PA) < 0.05), np.sum(np.array(PA) < 0.05) / (i+1), np.mean(np.array(P0) < 0.05), np.mean(P0_clean < 0.05), np.mean(np.array(P0) < 1e-5), 'null pvalue + power + failure') - - if i % 3 == 0 and i > 0: - U = np.linspace(0, 1, 101) - plt.clf() - if len(P0_clean) > 0: - plt.plot(U, ECDF(P0_clean)(U)) - if len(PA) > 0: - plt.plot(U, ECDF(PA)(U), 'r') - plt.plot([0, 1], [0, 1], 'k--') - plt.savefig("plot.pdf") - plt.show() - - diff --git a/selectinf/randomized/tests/test_standalone_lasso_mle.py b/selectinf/randomized/tests/test_standalone_lasso_mle.py new file mode 100644 index 000000000..5482460da --- /dev/null +++ b/selectinf/randomized/tests/test_standalone_lasso_mle.py @@ -0,0 +1,193 @@ +from __future__ import division, print_function + +import numpy as np +import nose.tools as nt + +import regreg.api as rr + +from ..lasso import split_lasso +from ...base import selected_targets, TargetSpec +from ..query import selective_MLE +from ..approx_reference import approximate_grid_inference + +def test_standalone_inference(n=2000, + p=100, + signal_fac=1.5, + proportion=0.7, + approx=True, + MLE=True): + """ + Check that standalone functions reproduce same p-values + as methods of `selectinf.randomized.lasso` + """ + + signal = np.sqrt(signal_fac * np.log(p)) / np.sqrt(n) + X = np.random.standard_normal((n, p)) + T = np.random.exponential(1, size=(n,)) + S = np.random.choice([0,1], n, p=[0.2,0.8]) + + cox_lasso = split_lasso.coxph(X, + T, + S, + 2 * np.ones(p) * np.sqrt(n), + proportion) + + signs = cox_lasso.fit() + nonzero = signs != 0 + + cox_sel = rr.glm.cox(X[:,nonzero], T, S) + + cox_full = rr.glm.cox(X, T, S) + + refit_soln = cox_sel.solve(min_its=2000) + padded_soln = np.zeros(p) + padded_soln[nonzero] = refit_soln + cox_full.solve(min_its=2000) + + full_hess = cox_full.hessian(padded_soln) + selected_hess = full_hess[nonzero][:,nonzero] + + target_spec = selected_targets(cox_lasso.loglike, + cox_lasso.observed_soln, + hessian=full_hess, + dispersion=1) + + if nonzero.sum(): + if approx: + approx_result = cox_lasso.approximate_grid_inference(target_spec) + approx_pval = approx_result['pvalue'] + + testval = approximate_normalizer_inference(proportion, + cox_lasso.initial_soln[nonzero], + refit_soln, + signs[nonzero], + selected_hess, + cox_lasso.feature_weights[nonzero]) + + assert np.allclose(testval['pvalue'], approx_pval) + + else: + approx_pval = np.empty(nonzero.sum())*np.nan + + if MLE: + MLE_result = cox_lasso.selective_MLE(target_spec)[0] + MLE_pval = MLE_result['pvalue'] + else: + MLE_pval = np.empty(nonzero.sum())*np.nan + + # working under null here + beta = np.zeros(p) + + testval = approximate_mle_inference(proportion, + cox_lasso.initial_soln[nonzero], + refit_soln, + signs[nonzero], + selected_hess, + cox_lasso.feature_weights[nonzero]) + + assert np.allclose(testval['pvalue'], MLE_pval) + return approx_pval[beta[nonzero] == 0], MLE_pval[beta[nonzero] == 0], testval + else: + return [], [] + +def approximate_mle_inference(training_proportion, + training_betahat, + selected_beta_refit, + selected_signs, + selected_hessian, + selected_feature_weights, + level=0.9): + + nselect = selected_hessian.shape[0] + pi_s = training_proportion + ratio = (1 - pi_s) / pi_s + + target_cov = np.linalg.inv(selected_hessian) + cond_precision = selected_hessian / ratio + cond_cov = target_cov * ratio + selected_signs[np.isnan(selected_signs)] = 1 # for unpenalized + cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None] + + logdens_linear = target_cov * selected_signs[:,None] + cond_mean = selected_beta_refit * selected_signs - logdens_linear.dot( + selected_feature_weights * + selected_signs) + linear_part = -np.identity(nselect) + offset = np.zeros(nselect) + + target_score_cov = -np.identity(nselect) + observed_target = selected_beta_refit + + target_spec = selected_targets(cox_lasso.loglike, + cox_lasso.observed_soln, + hessian=full_hess, + dispersion=1) + + result = selective_MLE(target_spec, + training_betahat * selected_signs, + cond_mean, + cond_cov, + logdens_linear, + linear_part, + offset, + level=level, + useC=True)[0] + + return result + +def approximate_normalizer_inference(training_proportion, + training_betahat, + selected_beta_refit, + selected_signs, + selected_hessian, + selected_feature_weights, + alternatives=None, + level=0.9): + + nselect = selected_hessian.shape[0] + pi_s = training_proportion + ratio = (1 - pi_s) / pi_s + + target_cov = np.linalg.inv(selected_hessian) + cond_precision = selected_hessian / ratio + cond_cov = target_cov * ratio + selected_signs[np.isnan(selected_signs)] = 1 # for unpenalized + cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None] + + logdens_linear = target_cov * selected_signs[:,None] + cond_mean = selected_beta_refit * selected_signs - logdens_linear.dot( + selected_feature_weights * + selected_signs) + linear_part = -np.identity(nselect) + offset = np.zeros(nselect) + + target_score_cov = -np.identity(nselect) + observed_target = selected_beta_refit + + target = TargetSpec(observed_target, + target_cov, + target_score_cov, + None) + + inverse_info = selective_MLE(target_spec, + training_betahat * selected_signs, + cond_mean, + cond_cov, + logdens_linear, + linear_part, + offset, + level=level, + useC=True)[1] + + G = approximate_grid_inference(target_spec, + inverse_info, + training_betahat * selected_signs, + cond_mean, + cond_cov, + logdens_linear, + linear_part, + offset) + + return G.summary(alternatives=alternatives, + level=level) + diff --git a/selectinf/randomized/tests/test_topK.py b/selectinf/randomized/tests/test_topK.py index 77984d545..2c1def227 100644 --- a/selectinf/randomized/tests/test_topK.py +++ b/selectinf/randomized/tests/test_topK.py @@ -10,7 +10,7 @@ def test_topK(n=500, s=5, sigma=3, rho=0.4, - randomizer_scale=0.25, + randomizer_scale=0.50, use_MLE=True, marginal=False): @@ -46,35 +46,27 @@ def test_topK(n=500, if nonzero.sum() > 0: if marginal: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = topK_select.marginal_targets(nonzero) + target_spec = topK_select.marginal_targets(nonzero) else: - (observed_target, - cov_target, - crosscov_target_score, - alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2) + target_spec = topK_select.multivariate_targets(nonzero, dispersion=sigma**2) if use_MLE: - estimate, _, _, pval, intervals, _ = topK_select.selective_MLE(observed_target, - cov_target, - crosscov_target_score) + result = topK_select.selective_MLE(target_spec)[0] # run summary else: - _, pval, intervals = topK_select.summary(observed_target, - cov_target, - crosscov_target_score, - alternatives, - compute_intervals=True) - + result = topK_select.summary(target_spec, + compute_intervals=True) + lower = np.asarray(result['lower_confidence']) + upper = np.asarray(result['upper_confidence']) + pval = result['pvalue'] + intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) print(pval) if marginal: beta_target = true_mean[nonzero] else: beta_target = beta[nonzero] - print("beta_target and intervals", beta_target, intervals) - coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + print("beta_target and intervals", beta_target, lower, upper) + coverage = (beta_target > lower) * (beta_target < upper) print("coverage for selected target", coverage.sum()/float(nonzero.sum())) return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals @@ -82,29 +74,57 @@ def test_both(): test_topK(marginal=True) test_topK(marginal=False) -def main(nsim=5000, use_MLE=False): - - import matplotlib.pyplot as plt - import statsmodels.api as sm - U = np.linspace(0, 1, 101) - - P0, PA, cover, length_int = [], [], [], [] - for i in range(nsim): - p0, pA, cover_, intervals = test_topK(use_MLE=use_MLE) - - cover.extend(cover_) - P0.extend(p0) - PA.extend(pA) - print(np.mean(cover),'coverage so far') - - period = 10 - if use_MLE: - period = 50 - if i % period == 0 and i > 0: - plt.clf() - plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null') - plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt') - plt.plot([0, 1], [0, 1], 'k--') - plt.legend() - plt.savefig('topK_pvals.pdf') +def test_bias_topK(n=500, + p=50, + s=5, + sigma=3, + rho=0.4, + randomizer_scale=0.50, + K=5, + marginal=False): + + while True: + X = gaussian_instance(n=n, + p=p, + equicorrelated=False, + rho=rho)[0] + W = rho**(np.fabs(np.subtract.outer(np.arange(p), np.arange(p)))) + sqrtW = np.linalg.cholesky(W) + sigma = 0.15 + Z = np.random.standard_normal(p).dot(sqrtW.T) * sigma + beta = (2 * np.random.binomial(1, 0.5, size=(p,)) - 1) * 5 * sigma + beta[s:] = 0 + np.random.shuffle(beta) + + true_mean = W.dot(beta) + score = Z + true_mean + idx = np.arange(p) + + n, p = X.shape + + randomizer = randomization.isotropic_gaussian(p, randomizer_scale * sigma) + topK_select = topK(score, + W * sigma**2, + randomizer, + K) + + boundary = topK_select.fit() + nonzero = boundary != 0 + + if nonzero.sum() > 0: + + if marginal: + beta_target = true_mean[nonzero] + target_spec = topK_select.marginal_targets(nonzero) + else: + beta_target = beta[nonzero] + target_spec = topK_select.multivariate_targets(nonzero, dispersion=sigma**2) + + result = topK_select.selective_MLE(target_spec)[0] + + bias_mle = np.asarray(result['MLE'])-beta_target + bias_indest = np.asarray(result['unbiased'])-beta_target + print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased'])) + + return bias_mle, bias_indest diff --git a/selectinf/randomized/tests/test_unbiased_estimates.py b/selectinf/randomized/tests/test_unbiased_estimates.py new file mode 100644 index 000000000..a7a91cb41 --- /dev/null +++ b/selectinf/randomized/tests/test_unbiased_estimates.py @@ -0,0 +1,140 @@ +import numpy as np + +from ..lasso import lasso +from ...base import selected_targets +from ...tests.instance import gaussian_instance + +def UMVU(query, + X, + Y, + nonzero, + feat, + dispersion): + + n, p = X.shape + + nopt = nonzero.sum() + + _, randomizer_prec = query.randomizer.cov_prec + + implied_precision = np.zeros((n + nopt, n + nopt)) + + implied_precision[:n][:, :n] = (1. / dispersion) * (np.identity(n)) + (X.dot(X.T) * randomizer_prec) + + implied_precision[n:][:, :n] = -query.opt_linear.T.dot(X.T) * randomizer_prec + + implied_precision[:n][:, n:] = implied_precision[n:][:, :n].T + + implied_precision[n:][:, n:] = query.opt_linear.T.dot(query.opt_linear) * randomizer_prec + + implied_cov = np.linalg.inv(implied_precision) + + _prec = np.linalg.inv(implied_cov[:n][:, :n]) + + linear_coef = (np.linalg.pinv(X[:, feat]).dot(_prec)) + offset = -np.linalg.pinv(X[:, feat]).dot(X.dot(query.observed_subgrad) + - _prec.dot(implied_cov[:n][:, n:]).dot(query.opt_linear.T.dot(query.observed_subgrad))) * (randomizer_prec) + + linear_coef *= dispersion + offset *= dispersion + UMVU = linear_coef.dot(Y) + offset + + return UMVU + +def EST(query, + X, + Y, + nonzero, + feat, + dispersion): + + target_spec = selected_targets(query.loglike, + query.observed_soln, + dispersion=dispersion) + + _, randomizer_prec = query.randomizer.cov_prec + cond_cov = query.cond_cov + logdens_linear = query.sampler.logdens_transform[0] + cond_mean = query.cond_mean + + prec_target = np.linalg.inv(target_spec.cov_target) + prec_opt = np.linalg.inv(cond_cov) + + target_linear = target_spec.regress_target_score.T.dot(prec_target) #XXX problem here just switched cov_target_score to regress_target_score + target_offset = (-X.T.dot(Y) + query.observed_subgrad) - target_linear.dot(target_spec.observed_target) + + target_lin = - logdens_linear.dot(target_linear) + target_off = cond_mean - target_lin.dot(observed_target) + + _prec = prec_target + (target_linear.T.dot(target_linear) * randomizer_prec) - target_lin.T.dot( + prec_opt).dot(target_lin) + _P = target_linear.T.dot(target_offset) * randomizer_prec + + linear_coef = cov_target.dot(_prec) + offset = cov_target.dot(_P - target_lin.T.dot(prec_opt).dot(target_off)) + est = linear_coef.dot(observed_target) + offset + + return est + +def test_UMVU(n=500, + p=100, + signal_fac=1., + s=5, + sigma=3., + rho=0.7, + randomizer_scale=np.sqrt(0.5)): + + + inst, const = gaussian_instance, lasso.gaussian + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + X, Y, beta = inst(n=n, + p=p, + signal=signal, + s=s, + equicorrelated=True, + rho=rho, + sigma=sigma, + random_signs=True)[:3] + + sigma_ = np.std(Y) + W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + + conv = const(X, + Y, + W, + #ridge_term=0., + randomizer_scale=randomizer_scale * sigma) + + signs = conv.fit() + nonzero = signs != 0 + + if nonzero.sum() > 0: + #dispersion = sigma ** 2 + if p > n/2: + dispersion = np.std(Y) ** 2 + else: + dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p) + + feat = nonzero.copy() + feat[-5:] = 1 + dispersion = np.linalg.norm(Y - X[:, feat].dot(np.linalg.pinv(X[:, feat]).dot(Y))) ** 2 / (n - feat.sum()) + + umvu = UMVU(conv, + X, + Y, + nonzero, + feat, + dispersion) + + est = EST(conv, + X, + Y, + nonzero, + feat, + dispersion) + + print("check ", np.allclose(est-umvu, np.zeros(est.shape[0]), atol=1e-03), est-umvu) + + return umvu, est diff --git a/selectinf/sampling/sequential.py b/selectinf/sampling/sequential.py index 06a018895..450ae81c8 100644 --- a/selectinf/sampling/sequential.py +++ b/selectinf/sampling/sequential.py @@ -10,7 +10,7 @@ def sample(white_constraint, nsample, proposal_sigma=0.2, - temps=np.linspace(0, 50, 51.)): + temps=np.linspace(0, 50., 51)): """ Build up an approximately constrained Gaussian based on relaxations of the constraint. diff --git a/selectinf/sampling/tests/test_sample_sphere.py b/selectinf/sampling/tests/test_sample_sphere.py index e1be9724c..cef1b08ec 100644 --- a/selectinf/sampling/tests/test_sample_sphere.py +++ b/selectinf/sampling/tests/test_sample_sphere.py @@ -2,7 +2,7 @@ import nose import nose.tools as nt import numpy as np -import numpy.testing.decorators as dec +from numpy.testing import dec from scipy.stats import chi import nose.tools as nt @@ -90,7 +90,7 @@ def test_sample_sphere(burnin=1000, s2 = AC.sample_from_sphere(con, initial, ndraw=ndraw, burnin=burnin) return s1, s2 -@dec.slow +@np.testing.dec.slow @set_seed_iftrue(SET_SEED, 20) @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, ndraw=10, burnin=10) def test_distribution_sphere(n=15, p=10, sigma=1., diff --git a/selectinf/sampling/tests/test_sequential.py b/selectinf/sampling/tests/test_sequential.py index b4634bfd3..a4f34a2b6 100644 --- a/selectinf/sampling/tests/test_sequential.py +++ b/selectinf/sampling/tests/test_sequential.py @@ -1,5 +1,4 @@ import numpy as np -import numpy.testing.decorators as dec from scipy.stats import norm as ndist from ...constraints.affine import constraints @@ -7,13 +6,13 @@ from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue from ...tests.flags import SMALL_SAMPLES, SET_SEED -@dec.slow +@np.testing.dec.slow @set_seed_iftrue(SET_SEED) @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, nsim=10) def test_sequentially_constrained(ndraw=100, nsim=50): S = -np.identity(10)[:3] b = -6 * np.ones(3) C = constraints(S, b) - W = sample(C, nsim, temps=np.linspace(0, 200, 1001)) + W = sample(C, nsim, temps=np.linspace(0, 200., 1001)) U = np.linspace(0, 1, 101) diff --git a/selectinf/sampling/truncnorm.pyx b/selectinf/sampling/truncnorm.pyx index a9d415a1e..04cb2bbe8 100644 --- a/selectinf/sampling/truncnorm.pyx +++ b/selectinf/sampling/truncnorm.pyx @@ -15,9 +15,9 @@ This module has a code to sample from a truncated normal distribution specified by a set of affine constraints. """ -DTYPE_float = np.float +DTYPE_float = float ctypedef cnp.float_t DTYPE_float_t -DTYPE_int = np.int +DTYPE_int = int ctypedef cnp.int_t DTYPE_int_t ctypedef cnp.intp_t DTYPE_intp_t diff --git a/selectinf/tests/decorators.py b/selectinf/tests/decorators.py index 37407e65c..125a048ad 100644 --- a/selectinf/tests/decorators.py +++ b/selectinf/tests/decorators.py @@ -5,10 +5,7 @@ import nose import nose.tools -try: - from numpy.testing.decorators import SkipTest -except (ImportError, AttributeError): - from numpy.testing import SkipTest +from numpy.testing import SkipTest def set_seed_iftrue(condition, seed=10): """ @@ -209,7 +206,7 @@ def modified_gen(*args, **kwargs): for x in f(*args, **kwargs_cp): yield x else: - raise np.testing.decorators.SkipTest(get_msg(f, msg)) + raise SkipTest(get_msg(f, msg)) # Choose the right modified to use when building the actual decorator. if nose.util.isgenerator(f): diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py index 8c096b9ab..35d791053 100644 --- a/selectinf/tests/instance.py +++ b/selectinf/tests/instance.py @@ -31,9 +31,16 @@ def AR1(rho, p): X = np.random.standard_normal((n, p)).dot(cholX.T) return X, sigmaX, cholX -def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7, - random_signs=False, df=np.inf, - scale=True, center=True, +def gaussian_instance(n=100, + p=200, + s=7, + sigma=5, + rho=0., + signal=7, + random_signs=False, + df=np.inf, + scale=True, + center=True, equicorrelated=True): @@ -61,14 +68,13 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7, sigma : float Noise level - rho : float - Equicorrelation value (must be in interval [0,1]) + rho : float + Correlation parameter. Must be in interval [0,1] for + equicorrelated, [-1,1] for AR(1). signal : float or (float, float) Sizes for the coefficients. If a tuple -- then coefficients are equally spaced between these values using np.linspace. - Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). - If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. random_signs : bool If true, assign random signs to coefficients. @@ -77,9 +83,15 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7, df : int Degrees of freedom for noise (from T distribution). - equicorrelated: bool - If true, design in equi-correlated, - Else design is AR. + scale : bool + Scale columns of design matrix? + + center : bool + Center columns of design matrix? + + equicorrelated : bool + Should columns of design be equi-correlated + or AR(1)? Returns ------- @@ -101,6 +113,13 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7, sigmaX : np.ndarray((p,p)) Row covariance. + + Notes + ----- + + The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). + If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. + """ X, sigmaX = _design(n, p, rho, equicorrelated)[:2] @@ -140,7 +159,11 @@ def _noise(n, df=np.inf): return X, Y, beta * sigma, np.nonzero(active)[0], sigma, sigmaX -def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14, +def logistic_instance(n=100, + p=200, + s=7, + rho=0.3, + signal=14, random_signs=False, scale=True, center=True, @@ -162,19 +185,28 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14, s : int True sparsity - rho : float - Equicorrelation value (must be in interval [0,1]) + rho : float + Correlation parameter. Must be in interval [0,1] for + equicorrelated, [-1,1] for AR(1). signal : float or (float, float) Sizes for the coefficients. If a tuple -- then coefficients are equally spaced between these values using np.linspace. - Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). - If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. random_signs : bool If true, assign random signs to coefficients. Else they are all positive. + scale : bool + Scale columns of design matrix? + + center : bool + Center columns of design matrix? + + equicorrelated : bool + Should columns of design be equi-correlated + or AR(1)? + Returns ------- @@ -193,6 +225,11 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14, sigmaX : np.ndarray((p,p)) Row covariance. + Notes + ----- + + The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). + If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. """ X, sigmaX = _design(n, p, rho, equicorrelated)[:2] @@ -226,7 +263,11 @@ def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14, Y = np.random.binomial(1, pi) return X, Y, beta, np.nonzero(active)[0], sigmaX -def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4, +def poisson_instance(n=100, + p=200, + s=7, + rho=0.3, + signal=4, random_signs=False, scale=True, center=True, @@ -248,19 +289,28 @@ def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4, s : int True sparsity - rho : float - Equicorrelation value (must be in interval [0,1]) + rho : float + Correlation parameter. Must be in interval [0,1] for + equicorrelated, [-1,1] for AR(1). signal : float or (float, float) Sizes for the coefficients. If a tuple -- then coefficients are equally spaced between these values using np.linspace. - Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). - If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. random_signs : bool If true, assign random signs to coefficients. Else they are all positive. + scale : bool + Scale columns of design matrix? + + center : bool + Center columns of design matrix? + + equicorrelated : bool + Should columns of design be equi-correlated + or AR(1)? + Returns ------- @@ -279,6 +329,11 @@ def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4, sigmaX : np.ndarray((p,p)) Row covariance. + Notes + ----- + + The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). + If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. """ X, sigmaX = _design(n, p, rho, equicorrelated)[:2] @@ -312,6 +367,118 @@ def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4, Y = np.random.poisson(mu) return X, Y, beta, np.nonzero(active)[0], sigmaX +def cox_instance(n=100, + p=200, + s=7, + rho=0.3, + signal=4, + random_signs=False, + scale=True, + center=True, + p_censor=0.1, + equicorrelated=True): + """A testing instance for the LASSO. + Design is equi-correlated in the population, + normalized to have columns of norm 1. + + Parameters + ---------- + + n : int + Sample size + + p : int + Number of features + + s : int + True sparsity + + rho : float + Correlation parameter. Must be in interval [0,1] for + equicorrelated, [-1,1] for AR(1). + + signal : float or (float, float) + Sizes for the coefficients. If a tuple -- then coefficients + are equally spaced between these values using np.linspace. + + random_signs : bool + If true, assign random signs to coefficients. + Else they are all positive. + + scale : bool + Scale columns of design matrix? + + center : bool + Center columns of design matrix? + + equicorrelated : bool + Should columns of design be equi-correlated + or AR(1)? + + p_censor : float + Probability of right-censorship. + + Returns + ------- + + X : np.float((n,p)) + Design matrix. + + T : np.float(n) + Response vector of times. + + S : np.bool(n) + Right-censoring status. + + beta : np.float(p) + True coefficients. + + active : np.int(s) + Non-zero pattern. + + sigmaX : np.ndarray((p,p)) + Row covariance. + + Notes + ----- + + The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). + If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. + + """ + + X, sigmaX = _design(n, p, rho, equicorrelated)[:2] + + if center: + X -= X.mean(0)[None,:] + + beta = np.zeros(p) + signal = np.atleast_1d(signal) + if signal.shape == (1,): + beta[:s] = signal[0] + else: + beta[:s] = np.linspace(signal[0], signal[1], s) + if random_signs: + beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.) + np.random.shuffle(beta) + beta /= np.sqrt(n) + + if scale: + scaling = X.std(0) * np.sqrt(n) + X /= scaling[None, :] + beta *= np.sqrt(n) + sigmaX = sigmaX / np.multiply.outer(scaling, scaling) + + active = np.zeros(p, np.bool) + active[beta != 0] = True + + eta = linpred = np.dot(X, beta) + mu = np.exp(eta) + + T = np.random.exponential(mu) + S = np.random.choice([0,1], n, p=[p_censor,1-p_censor]) + return X, T, S, beta, np.nonzero(active)[0], sigmaX + def HIV_NRTI(drug='3TC', standardize=True, datafile=None, @@ -364,7 +531,7 @@ def HIV_NRTI(drug='3TC', NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts) X_NRTI = np.array(NRTI_specific, np.float) - Y = NRTI[drug] # shorthand + Y = np.asarray(NRTI[drug]) # shorthand keep = ~np.isnan(Y).astype(np.bool) X_NRTI = X_NRTI[np.nonzero(keep)]; Y=Y[keep] Y = np.array(np.log(Y), np.float); @@ -373,3 +540,97 @@ def HIV_NRTI(drug='3TC', Y -= Y.mean() X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:] return X_NRTI, Y, np.array(NRTI_muts) + + +def gaussian_group_instance(n=100, p=200, sgroup=7, sigma=5, rho=0., signal=7, + random_signs=False, df=np.inf, + scale=True, center=True, + groups=np.arange(20).repeat(10), + equicorrelated=True): + """A testing instance for the group LASSO. + If equicorrelated is True design is equi-correlated in the population, + normalized to have columns of norm 1. + If equicorrelated is False design is auto-regressive. + For the default settings, a $\\lambda$ of around 13.5 + corresponds to the theoretical $E(\\|X^T\\epsilon\\|_{\\infty})$ + with $\\epsilon \\sim N(0, \\sigma^2 I)$. + Parameters + ---------- + n : int + Sample size + p : int + Number of features + sgroup : int + True sparsity (number of active groups) + groups : array_like (1d, size == p) + Assignment of features to (non-overlapping) groups + sigma : float + Noise level + rho : float + Equicorrelation value (must be in interval [0,1]) + signal : float or (float, float) + Sizes for the coefficients. If a tuple -- then coefficients + are equally spaced between these values using np.linspace. + Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p). + If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged. + random_signs : bool + If true, assign random signs to coefficients. + Else they are all positive. + df : int + Degrees of freedom for noise (from T distribution). + equicorrelated: bool + If true, design in equi-correlated, + Else design is AR. + Returns + ------- + X : np.float((n,p)) + Design matrix. + y : np.float(n) + Response vector. + beta : np.float(p) + True coefficients. + active : np.int(s) + Non-zero pattern. + sigma : float + Noise level. + sigmaX : np.ndarray((p,p)) + Row covariance. + """ + + X, sigmaX = _design(n, p, rho, equicorrelated)[:2] + + if center: + X -= X.mean(0)[None, :] + + beta = np.zeros(p) + signal = np.atleast_1d(signal) + + group_labels = np.unique(groups) + group_active = np.random.choice(group_labels, sgroup, replace=False) + + active = np.isin(groups, group_active) + + if signal.shape == (1,): + beta[active] = signal[0] + else: + beta[active] = np.linspace(signal[0], signal[1], active.sum()) + if random_signs: + beta[active] *= (2 * np.random.binomial(1, 0.5, size=(active.sum(),)) - 1.) + beta /= np.sqrt(n) + + if scale: + scaling = X.std(0) * np.sqrt(n) + X /= scaling[None, :] + beta *= np.sqrt(n) + sigmaX = sigmaX / np.multiply.outer(scaling, scaling) + + # noise model + def _noise(n, df=np.inf): + if df == np.inf: + return np.random.standard_normal(n) + else: + sd_t = np.std(tdist.rvs(df, size=50000)) + return tdist.rvs(df, size=n) / sd_t + + Y = (X.dot(beta) + _noise(n, df)) * sigma + return X, Y, beta * sigma, np.nonzero(active)[0], sigma, sigmaX \ No newline at end of file diff --git a/selectinf/truncated/tests/test_truncated.py b/selectinf/truncated/tests/test_truncated.py index a1adfa19e..b5ddaaeb1 100644 --- a/selectinf/truncated/tests/test_truncated.py +++ b/selectinf/truncated/tests/test_truncated.py @@ -1,7 +1,6 @@ from __future__ import print_function import nose.tools as nt import numpy as np -import numpy.testing.decorators as dec from ..gaussian import truncated_gaussian, truncated_gaussian_old from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue @@ -25,7 +24,7 @@ def test_sigma(): np.around(np.array(tg2.equal_tailed_interval(Z,0.05)), 4)) @set_seed_iftrue(SET_SEED) -@dec.skipif(True, 'checking coverage: this is random with highish failure rate') +@np.testing.dec.skipif(True, 'checking coverage: this is random with highish failure rate') @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100) def test_equal_tailed_coverage(nsim=1000): @@ -44,7 +43,7 @@ def test_equal_tailed_coverage(nsim=1000): nt.assert_true(np.fabs(coverage - (1-alpha)*nsim) < 2*SE) @set_seed_iftrue(SET_SEED) -@dec.skipif(True, 'really slow') +@np.testing.dec.skipif(True, 'really slow') @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100) def test_UMAU_coverage(nsim=1000): diff --git a/selection/bayesian/selection_aware_posterior.py b/selection/bayesian/selection_aware_posterior.py new file mode 100644 index 000000000..8e00d3220 --- /dev/null +++ b/selection/bayesian/selection_aware_posterior.py @@ -0,0 +1,124 @@ +import numpy as np, sys +from selection.randomized.selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C +from scipy.stats import norm as ndist + +class posterior_inference(): + + + def __init__(self, + observed_target, + cov_target, + cov_target_score, + feasible_point, + cond_mean, + cond_cov, + logdens_linear, + linear_part, + offset, + ini_estimate): + + self.observed_target = observed_target + self.cov_target = cov_target + self.cov_target_score = cov_target_score + + self.feasible_point = feasible_point + self.cond_mean = cond_mean + self.cond_cov = cond_cov + self.target_size = cond_cov.shape[0] + self.logdens_linear = logdens_linear + self.linear_part = linear_part + self.offset = offset + self.ini_estimate = ini_estimate + + def prior(self, target_parameter, var_parameter, lam): + + std_parameter = np.sqrt(var_parameter) + grad_prior_par = -np.true_divide(target_parameter, var_parameter) + grad_prior_std = np.true_divide(target_parameter**2. , 2.*(var_parameter**2))- (lam/2.)-1./(2.*var_parameter) + log_prior = -(np.linalg.norm(target_parameter)**2.) / (2.*var_parameter) - (lam * (np.linalg.norm(std_parameter)**2)/2.)-np.log(std_parameter) + return grad_prior_par, grad_prior_std, log_prior + + def det_initial_point(self, initial_soln, solve_args={'tol':1.e-12}): + + if np.asarray(self.observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + observed_target = np.atleast_1d(self.observed_target) + prec_target = np.linalg.inv(self.cov_target) + + target_lin = - self.logdens_linear.dot(self.cov_target_score.T.dot(prec_target)) + target_offset = self.cond_mean - target_lin.dot(observed_target) + + prec_opt = np.linalg.inv(self.cond_cov) + mean_opt = target_lin.dot(initial_soln) + target_offset + conjugate_arg = prec_opt.dot(mean_opt) + + solver = solve_barrier_affine_py + + val, soln, hess = solver(conjugate_arg, + prec_opt, + self.feasible_point, + self.linear_part, + self.offset, + **solve_args) + + initial_point = initial_soln + self.cov_target.dot(target_lin.T.dot(prec_opt.dot(mean_opt - soln))) + return initial_point + + def gradient_log_likelihood(self, parameters, solve_args={'tol':1.e-15}): + + npar = self.target_size + target_parameter = parameters[:npar] + var_parameter = parameters[npar:] + if np.asarray(self.observed_target).shape in [(), (0,)]: + raise ValueError('no target specified') + + observed_target = np.atleast_1d(self.observed_target) + prec_target = np.linalg.inv(self.cov_target) + + target_lin = - self.logdens_linear.dot(self.cov_target_score.T.dot(prec_target)) + target_offset = self.cond_mean - target_lin.dot(observed_target) + + prec_opt = np.linalg.inv(self.cond_cov) + mean_opt = target_lin.dot(target_parameter) + target_offset + conjugate_arg = prec_opt.dot(mean_opt) + + solver = solve_barrier_affine_C + + val, soln, hess = solver(conjugate_arg, + prec_opt, + self.feasible_point, + self.linear_part, + self.offset, + **solve_args) + + reparam = target_parameter + self.cov_target.dot(target_lin.T.dot(prec_opt.dot(mean_opt - soln))) + neg_normalizer = (target_parameter - reparam).T.dot(prec_target).dot(target_parameter - reparam)/2. \ + + val + mean_opt.T.dot(prec_opt).dot(mean_opt) / 2. + + grad_barrier = np.diag(2. / ((1. + soln) ** 3.) - 2. / (soln ** 3.)) + + L = target_lin.T.dot(prec_opt) + N = L.dot(hess) + jacobian = (np.identity(observed_target.shape[0]) + self.cov_target.dot(L).dot(target_lin)) - \ + self.cov_target.dot(N).dot(L.T) + + log_lik = -((observed_target - reparam).T.dot(prec_target).dot(observed_target - reparam)) / 2. + neg_normalizer \ + + np.log(np.linalg.det(jacobian)) + + grad_lik = jacobian.T.dot(prec_target).dot(observed_target) + grad_neg_normalizer = -jacobian.T.dot(prec_target).dot(target_parameter) + + opt_num = self.cond_cov.shape[0] + grad_jacobian = np.zeros(opt_num) + A = np.linalg.inv(jacobian).dot(self.cov_target).dot(N) + for j in range(opt_num): + M = grad_barrier.dot(np.diag(N.T[:, j])) + grad_jacobian[j] = np.trace(A.dot(M).dot(N.T)) + + prior_info = self.hierarchical_prior(reparam, var_parameter, lam=0.01) + return np.append(grad_lik + grad_neg_normalizer + grad_jacobian + jacobian.T.dot(prior_info[0]), prior_info[1]),\ + np.append(reparam, var_parameter), log_lik + prior_info[2] + + + diff --git a/setup.py b/setup.py index cf882987b..4b6a011f0 100755 --- a/setup.py +++ b/setup.py @@ -59,9 +59,16 @@ libraries=[], include_dirs=['C-software/src'])) +EXTS.append(Extension('selectinf.algorithms.cox_utils', + ['selectinf/algorithms/cox_utils.pyx', + 'C-software/src/cox_fns.c'], + libraries=[], + include_dirs=['C-software/src'])) + EXTS.append(Extension('selectinf.randomized.selective_MLE_utils', ['selectinf/randomized/selective_MLE_utils.pyx', - 'C-software/src/selective_mle.c'], + 'C-software/src/selective_mle.c', + 'C-software/src/cox_fns.c'], libraries=[], include_dirs=['C-software/src']))