From b6dd8aea38c504528a4e5f367e3ead9b1e85f64e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 24 Sep 2019 23:37:30 -0700
Subject: [PATCH 001/187] updating .travis.yml to do a doc build

---
 .travis.yml                                |  14 ++
 doc-requirements.txt                       |   2 +-
 doc/Makefile                               |   5 +
 doc/source/learning/Basic_example.Rmd      | 106 ++++++++
 doc/source/learning/Basic_example.ipynb    | 199 +++++++++++++++
 doc/source/learning/Full_model_LASSO.Rmd   | 145 +++++++++++
 doc/source/learning/Full_model_LASSO.ipynb | 276 +++++++++++++++++++++
 doc/source/learning/Learning1.Rmd          |  26 --
 doc/source/learning/Learning1.ipynb        |  63 -----
 doc/source/learning/Learning2.Rmd          |  26 --
 doc/source/learning/Learning2.ipynb        |  63 -----
 doc/source/learning/index.rst              |   4 +-
 selectinf/learning/core.py                 |  25 +-
 selectinf/learning/learners.py             |   8 +-
 selectinf/learning/utils.py                |  82 +++---
 15 files changed, 812 insertions(+), 232 deletions(-)
 create mode 100644 doc/source/learning/Basic_example.Rmd
 create mode 100644 doc/source/learning/Basic_example.ipynb
 create mode 100644 doc/source/learning/Full_model_LASSO.Rmd
 create mode 100644 doc/source/learning/Full_model_LASSO.ipynb
 delete mode 100644 doc/source/learning/Learning1.Rmd
 delete mode 100644 doc/source/learning/Learning1.ipynb
 delete mode 100644 doc/source/learning/Learning2.Rmd
 delete mode 100644 doc/source/learning/Learning2.ipynb

diff --git a/.travis.yml b/.travis.yml
index 11e16d88b..881190701 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -69,6 +69,12 @@ matrix:
       env:
         - INSTALL_TYPE=requirements
         - DEPENDS=
+    - python: 3.6
+      sudo: true
+      dist: trusty
+      env:
+        - DOC_BUILD=1
+
 before_install:
   - source travis-tools/utils.sh
   - travis_before_install
@@ -84,6 +90,14 @@ before_install:
 
 install:
   # Install selectinf
+    - |
+      echo "backend : agg" > matplotlibrc
+      if [ "$DOC_BUILD" ]; then  # doc build
+        pip install -r doc-requirements.txt
+        cd doc
+	jupytext --sync source/*/*.ipynb
+        # Build without the API documentation, for the doctests
+        make html
   - if [ "$RUN_R_TESTS" ]; then
      sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
      pip install rpy2 statsmodels -c constraints.txt ;   
diff --git a/doc-requirements.txt b/doc-requirements.txt
index 864bedd86..37dc7d0d8 100644
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
@@ -6,9 +6,9 @@ numpydoc
 matplotlib
 texext
 nb2plots
-rpy2
 seaborn
 statsmodels
 tensorflow
 keras
 nbsphinx
+jupytext
diff --git a/doc/Makefile b/doc/Makefile
index 1f39aad81..7e84d387b 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -122,3 +122,8 @@ doctest:
 	@echo
 	@echo "The overview file is in build/doctest."
 
+github: html
+	# Needs ghp-import (pip install ghp-import)
+	ghp-import -n -p $(BUILDROOT)/html/
+	@echo
+	@echo "Published to Github"
diff --git a/doc/source/learning/Basic_example.Rmd b/doc/source/learning/Basic_example.Rmd
new file mode 100644
index 000000000..e57d8d571
--- /dev/null
+++ b/doc/source/learning/Basic_example.Rmd
@@ -0,0 +1,106 @@
+---
+jupyter:
+  jupytext:
+    cell_metadata_filter: all,-slideshow
+    formats: ipynb,Rmd
+    text_representation:
+      extension: .Rmd
+      format_name: rmarkdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+# Simple example
+
+Here we run a simple linear regression model (even without intercept) 
+and make a selection when the $Z$ score is larger than 2.
+
+The functions `partial_model_inference` and `pivot_plot` below are just simulation utilities
+used to simulate results in least squares regression. The underlying functionality
+is contained in the function `selectinf.learning.core.infer_general_target`.
+
+
+```{python collapsed=TRUE}
+import functools
+
+import numpy as np, pandas as pd
+import matplotlib.pyplot as plt
+# %matplotlib inline
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import partial_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler
+from selectinf.learning.Rfitters import logit_fit
+```
+
+```{python}
+np.random.seed(0) # for replicability
+def simulate(n=20, p=1, s=1, signal=1, sigma=2, alpha=0.1, B=2000):
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    sampler = normal_sampler(S, covS)
+
+    def base_algorithm(X, dispersion, sampler):
+
+        success = np.zeros(p)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        
+        Z = noisy_S / np.sqrt(np.linalg.norm(X)**2 * dispersion)
+        if Z > 2:
+            return set([0])
+        else:
+            return set([])
+
+    selection_algorithm = functools.partial(base_algorithm, X, dispersion)
+
+    # run selection algorithm
+
+    return partial_model_inference(X,
+                                   y,
+                                   truth,
+                                   selection_algorithm,
+                                   sampler,
+                                   B=B,
+                                   fit_probability=logit_fit,
+                                   fit_args={'df':20})
+```
+
+```{python}
+dfs = []
+for i in range(1000):
+    df = simulate()
+    if df is not None:
+        dfs.append(df)
+```
+
+```{python}
+fig = plt.figure(figsize=(8, 8))
+results = pd.concat(dfs)
+pivot_plot(results, fig=fig);
+```
+
+```{python collapsed=TRUE}
+
+```
diff --git a/doc/source/learning/Basic_example.ipynb b/doc/source/learning/Basic_example.ipynb
new file mode 100644
index 000000000..6b9989c17
--- /dev/null
+++ b/doc/source/learning/Basic_example.ipynb
@@ -0,0 +1,199 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Simple example\n",
+    "\n",
+    "Here we run a simple linear regression model (even without intercept) \n",
+    "and make a selection when the $Z$ score is larger than 2.\n",
+    "\n",
+    "The functions `partial_model_inference` and `pivot_plot` below are just simulation utilities\n",
+    "used to simulate results in least squares regression. The underlying functionality\n",
+    "is contained in the function `selectinf.learning.core.infer_general_target`.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import functools\n",
+    "\n",
+    "import numpy as np, pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "from selectinf.tests.instance import gaussian_instance\n",
+    "\n",
+    "from selectinf.learning.utils import partial_model_inference, pivot_plot\n",
+    "from selectinf.learning.core import normal_sampler\n",
+    "from selectinf.learning.Rfitters import logit_fit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
+      "  from numpy.core.umath_tests import inner1d\n",
+      "Using TensorFlow backend.\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:455: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:456: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:457: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:458: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:459: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:462: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
+      "R[write to console]: Loaded gbm 2.1.5\n",
+      "\n",
+      "R[write to console]: randomForest 4.6-14\n",
+      "\n",
+      "R[write to console]: Type rfNews() to see new features/changes/bug fixes.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "np.random.seed(0) # for replicability\n",
+    "def simulate(n=20, p=1, s=1, signal=1, sigma=2, alpha=0.1, B=2000):\n",
+    "\n",
+    "    # description of statistical problem\n",
+    "\n",
+    "    X, y, truth = gaussian_instance(n=n,\n",
+    "                                    p=p, \n",
+    "                                    s=s,\n",
+    "                                    equicorrelated=False,\n",
+    "                                    rho=0.5, \n",
+    "                                    sigma=sigma,\n",
+    "                                    signal=signal,\n",
+    "                                    random_signs=True,\n",
+    "                                    scale=False)[:3]\n",
+    "\n",
+    "    dispersion = sigma**2\n",
+    "\n",
+    "    S = X.T.dot(y)\n",
+    "    covS = dispersion * X.T.dot(X)\n",
+    "    sampler = normal_sampler(S, covS)\n",
+    "\n",
+    "    def base_algorithm(X, dispersion, sampler):\n",
+    "\n",
+    "        success = np.zeros(p)\n",
+    "\n",
+    "        scale = 0.\n",
+    "        noisy_S = sampler(scale=scale)\n",
+    "        \n",
+    "        Z = noisy_S / np.sqrt(np.linalg.norm(X)**2 * dispersion)\n",
+    "        if Z > 2:\n",
+    "            return set([0])\n",
+    "        else:\n",
+    "            return set([])\n",
+    "\n",
+    "    selection_algorithm = functools.partial(base_algorithm, X, dispersion)\n",
+    "\n",
+    "    # run selection algorithm\n",
+    "\n",
+    "    return partial_model_inference(X,\n",
+    "                                   y,\n",
+    "                                   truth,\n",
+    "                                   selection_algorithm,\n",
+    "                                   sampler,\n",
+    "                                   B=B,\n",
+    "                                   fit_probability=logit_fit,\n",
+    "                                   fit_args={'df':20})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jonathantaylor/git-repos/selectinf/selectinf/distributions/discrete_family.py:86: RuntimeWarning: divide by zero encountered in log\n",
+      "  self._lw = np.array([np.log(v) for v in xw[:,1]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "dfs = []\n",
+    "for i in range(1000):\n",
+    "    df = simulate()\n",
+    "    if df is not None:\n",
+    "        dfs.append(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHpCAYAAABqV/58AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3hUxdfA8e+kklClhCZVuliJiAUI\nhN6rIIK0nyDSXiwgIE1BsKIoxaBURZDeexUQAUWQJkU60jsJIWXeP24ISXaTbDbb93yeJw/ZuXPv\nHgzx7J05d0ZprRFCCCGEZ/JxdgBCCCGEsB9J9EIIIYQHk0QvhBBCeDBJ9EIIIYQHk0QvhBBCeDBJ\n9EIIIYQH83N2APaQN29eXbx4cWeHIYQQQjjEH3/8cUVrnc/cMY9M9MWLF2f37t3ODkMIIYRwCKXU\nqdSOydC9EEII4cEk0QshhBAeTBK9EEII4cEk0QshhBAeTBK9EEII4cE8sureErdu3eLSpUvExMQ4\nOxRhBX9/f0JCQsiRI4ezQxFCCJfmlYn+1q1bXLx4kcKFCxMUFIRSytkhiQzQWhMVFcW5c+cAJNkL\nIUQavHLo/tKlSxQuXJjg4GBJ8m5IKUVwcDCFCxfm0qVLzg5HCCFcmlcm+piYGIKCgpwdhsikoKAg\nmXoRQoh0eGWiB+RO3gPIz1AIIdLntYleCCGE8AaS6IUQQggPJoneTQ0fPpy8efM6Owy72b9/P0op\nNm3a5OxQhBDCrUmiF0IIITyYUxO9UmqKUuqSUmp/KseVUmqcUuqYUmqfUupZR8cokouJiSEuLs7Z\nYQghhNsaNw5Wr3bc+zl7wZxpwLfAjFSO1wdKJ3w9D0xM+FOk49q1a7z//vssXryYmzdv8uyzzzJ2\n7Fief/7hf74vvviC2bNnc+TIEbJkyULlypUZO3YspUqVSuwTFhZG3rx5qVOnDp988gknT57k5MmT\n/PDDD3z77besXbuWHj16sG/fPsqWLcu4ceOoWrVqsli+//57xo4dy7FjxyhQoAA9e/akf//+yfpM\nmDCB0aNHc+3aNWrWrEmfPn3s+x9I2NbZszB4MPz9N8THOzsaIVzWrdtQ7V/j+4sFIeSVGqivxtr1\nPZ2a6LXWW5RSxdPo0hSYobXWwA6lVC6lVEGt9X+2jMOVntLSOvPXiI6OplatWty4cYPPPvuMkJAQ\nJk6cSK1atTh69CgFChQA4OzZs/Tq1YtixYpx69YtJk2axIsvvsjRo0fJmTNn4vW2bdvG8ePH+eST\nTwgODk48FhkZSceOHenXrx8FChRgxIgRtGjRglOnThEcHAzAZ599xqBBg+jfvz9hYWH88ccfDBky\nhODgYHr16gXA4sWL6dmzJ2+++SbNmjVj8+bNdOnSJfP/IYRjXLwIL7xgJHshRJr8gdxAUYD/YOfc\nElQYCdmy2fFNtdZO/QKKA/tTObYMeDnJ6/VAaHrXrFSpkk7LwYMHk7020qtrfFlq2LBhOk+ePGaP\nff/999rf318fOXIksS0mJkaXLFlSv/vuu2bPiY2N1ZGRkTpbtmx6+vTpie3Vq1fXWbJk0RcuXDB5\nf0CvX78+sW3Pnj0a0CtXrtRaa33z5k2dNWtWPXz48GTnDhkyROfPn1/HxsZqrbV+7rnndL169ZL1\n+d///qcBvXHjxjT/O6T8WQoHi47W+uWXnf+LI1/y5QZfd0DXBF0U9MmEtgU00198kflfRWC31uZz\noscU4ymluimldiuldl++fNnZ4TjVunXrqFSpEiVKlCA2NpbY2FgAqlevzu7duxP77dixg9q1a5Mn\nTx78/PwIDg7mzp07HDlyJNn1KlWqRP78+U3eJyAggLCwsMTXFSpUAIyRAoDffvuNu3fv0rp168Q4\nYmNjqVmzJhcvXuTs2bPExsby559/0rRp02TXbtGihU3+Wwg769cPtm51dhRCuLzbGHPRG4DTQA3g\nFhASAn372ve9nT1Hn55zQJEkrx9NaDOhtY4AIgBCQ0O1/UNzXVeuXGHHjh34+/ubHHvssccAOH36\nNHXq1KFy5cp89913FCpUiICAABo2bMi9e/eSnWMuyQNkz54dH5+HnxUDAgIAEs+/cuUKAI8//rjZ\n88+cOUNgYCBxcXGEhIQkO5bytXBBU6bAhAnOjkIIl3cTI8n/lqTtDSAH8Fwo+Pra9/1dPdEvAXop\npWZjFOHd1Daen/dEuXPnJjQ0lIkTJ5ocCwwMBGDVqlVERkayePFismbNCkBsbCzXrl0zOcfapWZz\n584NwLJly8x+WChbtixBQUH4+vqabE4jm9WkY9EimDULnDl6tX27aZufH/z8MyQp6BTCkx0+DPPm\nwcmT5o9Hx95k/t63iOJgYlsh3mZJlg60mgGlQ3OaP9GGnJrolVI/A2FAXqXUWWAYRq0CWutJwAqg\nAXAMiAQ62yMO7WH3/+Hh4axZs4aiRYumemccFRWFj48Pfn4P/wn88ssvicP8tvDCCy8QFBTE+fPn\nadiwYar9nnnmGRYvXsybb76Z2LZgwQKbxeFxFi+G5s2dHYV5X34JrVo5OwohHOLUKXipJ5i5P0pw\nBSNtHUzS9g3n6cXY6VC6td1DBJxfdf9qOsc10NNB4bid+/fvM2/ePJP2+vXrM2nSJMLCwnj33Xcp\nWbIkV69eZefOnRQoUIB+/fpRs2ZN4uLi6Ny5M127duXAgQN8/vnn5MqVy2bx5cqVi+HDh9O3b19O\nnTpFtWrViI+P58iRI2zcuJGFCxcCMGjQIFq0aEGPHj1o3rw5mzdvZtWqVTaLw6NoDQMGODsK8zp2\nhIQnKYTwBp98klaSvwjUApIuE/Md0I0BA+CVV+wd3UOuPnQv0nD79m1atzb9SLhx40Y2btzI0KFD\nGTZsGBcvXiQkJITKlSvTpEkTAJ544gmmTZvG8OHDWbhwIU899RRz586lTZs2No2xf//+FCpUiLFj\nx/LFF1+QJUsWypQpk+x9mjdvzjfffMOYMWOYPn06YWFh/PDDD9StW9emsXiEX3+Ff/5xdhSmnnsO\nJk1yrWdVhbCju3fhxx9TOxoPNOJhklfAD0Bn6taFUaMcEGASSnvauDVGMV7S6vKUDh06RPny5R0Y\nkbAXr/tZtm8PP/3k7CiSe+opWL4cChd2diRCOMwPP8D//pdWjw1AQ+A+xppwr1G1qlFek1C+ZFNK\nqT+01qHmjskdvRDu4upVo+onpVGjjAVrnOGRR6BiRaMITwgvEhFh2taqFbz11oNXNdm1azF3794k\nLKw1ISFQvjz4OOGhdvntFMJdzJgB0dHJ2/LkgXfegYSnKYQQ9vfXX7BzZ8pWzYABitAk99Q1atRx\nZFip8pgFc4TwaFrDd9+ZtnfqJEleCAczvZv/h6xZXyZPnpNOiCZ9kuiFcAepFeF16+b4WITwYnfu\npCzCOwhU5+7d7YSH1+TMmTNOiix1kuiFcAfmJgTDwqBMGYeHIoQ3mzMHbt9+8GofxlIwFwG4ePEi\nJ1NbOceJZI5eCFeXWhFe9+6Oj0UID7N7N/zyC1y/bln/desefPcnUBswHqTPli0bK1asMNmm2xVI\nohfC1UVEmBbh5c3ruqvjCeEmFi82KuUzviDoTqAucAOAbNlysnbtKqpUqWLjCG1DEr0Qruy332DY\nMNN2KcITIlMOHjSWpch4kt+GsUWNMX7v6/sImzatpVKlSjaO0HZkjl4IV3X+PLRsCTExpsfeeMPx\n8QjhIW7cgGbNjMK6jNmEcSf/YJI+LwMHbnTpJA+S6N3W8OHDUUqZXSa2VatWyfaJT8/JkydRSrFs\n2TIbRigyJTraGFP8z8xmjW++KUV4QlgpLg5eew2OHs3omXsw9li7m/A6Py+9tInhw5+yaXz2IEP3\nbm7NmjXs2rWL5557zuprFCxYkN9++41y5crZMDKRKX36GMP2KT3/PHz1lePjEcJDDBsGK1aYtpcr\nB2+/nfp5sbEViYioy19/LSJXrkJMmLCBV14pa/e95G1BEr0by507N4ULF2bUqFEsWrTI6usEBga6\nbBGJV4qIMP84XYECMH++zM0LYaUFC8xvKJMjh7EGfdmyaZ3tT5cus+nduzf9+/enVKlS9grT5mTo\n3o0ppRg8eDBLlizh77//Ntvnv//+o0uXLpQsWZKgoCDKlCnDBx98wP379xP7pBy679Spk9kRgvHj\nxxMcHMzthIdI4+PjGTNmDKVKlSIwMJAyZcowffp0O/xNvcj27ea3evX3Nx6xk41jhLDKgQPw+uum\n7UoZ+0SlneQNgYGBREREuFWSB0n0bq9169aULl2aUanse3jlyhVy587Nl19+yapVq3jvvfeYOnUq\nvXv3TvWabdq0Yffu3Zw4cSJZ+5w5c2jQoAHZs2cHoHfv3owcOZJu3bqxfPlymjdvTpcuXWSu31pp\nFd+NGwcvveT4mITwANevG8V3d++aHhsxAho1Mm2fNWsWQ4YMwSN2eNVae9xXpUqVdFoOHjyYvMFY\nSdw1viw0bNgwnSdPHq211lOnTtU+Pj76n3/+0Vpr3bJlS129enWz58XExOiffvpJBwYG6ujoaK21\n1idOnNCAXrp0aWKfPHny6NGjRyeed/bsWa2U0nPnztVaa3306FGtlNLTpk1Ldv0OHTro0NBQi/8e\nmWXys3RX9+5pXaWK+X8Tb7yhdXy8syMUwi3Fxmpdv775X61mzbSOizM9Z9q0aVoppQE9YsQIxwdt\nBWC3TiUnyhy9B2jfvj0jRoxg9OjRTJ06NdkxrTVff/01ERERnDhxgnv37iUeO336tNkhKD8/P1q0\naMGcOXN4//33AZg7dy5Zs2alYcOGAKxfvx4fHx+aN29ObJIHUcPDw/n555+Ji4vD1x2qVBzl999h\n61bzd+tgFN7t2GHaXqUKfPONMb4ohMiwoUNh5UrT9vLljQ0hU24bO3nyZLp37554Jz937lzeeecd\nsmbN6oBo7UMSvQfw8/Ojf//+9OnTh+HDhyc79tVXX/Hee+8xYMAAqlevziOPPMKuXbvo2bNnsqSf\nUtu2bZk8eTJHjhyhTJkyzJkzhyZNmhAUFAQYUwJxcXHkzJnT7Pn//fcfjz76qM3+jm5t9GgYNCjj\n50nxnRCZsmoVfPyxafuD4ruEWchE48ePp1eSGpmnn36atWvXunWSB0n0HqNLly6MHDmSTz75JFn7\n3LlzadWqVbI5/IMHD6Z7verVq5M/f37mzJnD66+/zo4dOxg4cGDi8dy5c+Pn58e2bdvwSfmRGAgJ\nCcnE38aDzJ9vXZL39zfOLVTI9jEJ4SU++si07UHxXcqlKL788kveeeedxNehoaGsXr2a3Llz2zlK\n+5NE7yECAwN59913GThwIJUqVcLf3x+AqKgoAlPcEf7000/pXs/X15fWrVszZ84csmTJQq5cuahX\nr17i8Zo1axIXF8fNmzepXbu2bf8ynmL/fujY0bpzv/kGXnzRtvEI4UWuXzc/G2au+G7MmDHJbmSq\nVKnCqlWrUh2xdDdSdQ/OLr9L/pUJ3bt3J3v27Gzfvj2xrXbt2syZM4cJEyawevVqXn/9dY4dO2bR\n9dq0acOBAwcYO3YszZo1IyAgIPFY2bJlefPNN2nbti2ffPIJ69evZ/ny5Xz66af873//y9TfwyOk\nVeabnj59ZGc6ITJp40aIj0/eVrIkDB788LXWmg8//DBZkq9atSpr1qzxmCQPckfvUYKDg+nXrx+D\nk/xLHjp0KJcvX+aDDz4AoEWLFowbN47GjRune72XXnqJIkWKcObMGdq2bWtyfPz48ZQpU4bJkycz\ndOhQcuTIQYUKFejatavt/lLu4s8/ja+4OOP1L7/A8eOm/apVgxdeMH8Nf3/jWP369otTCC/xcDvZ\nh+rUSV58N2rUKIYl2TSqRo0aLF261O3n5FNSOpN3ka4oNDRU7969O9Xjhw4donz58g6MSNiLS/ws\nBw82X/GTUvnyRvV9ygogIYTNlS4NKQcv58+HFi0evt66dSv16tXj7t271KlTh4ULFxIcHOzYQG1E\nKfWH1jrU3DEZuhciM374wbIknzOn+TJfIYTNnTxpmuSVgho1kre9/PLLLFu2jNatW7N48WK3TfLp\nkaF7Iaz1++/w1lvp90utzFcIYRfr15u2hYbCI4+YtoeFhWVot093JHf0QljjwgVjDDDJngGpGjMG\nEhYaEkLY39q1pm3h4XH079+foxnfn9btyR29EJY4cAAOHXr4ZMTXXxtr06dUrZqx3yVAcDDUqwd1\n6zouTiG8XHy8uTv6WHbv7sy6dT/y888/s2XLFkqUKOGM8JxCEr0Q6RkwAD79NP1+oaGwejVkyWL/\nmIQQZu3bB1euJG2Jwde3PevW/QLA2bNniYiIYPTo0U6Jzxm8NtFrrVGyfrhbc8gTI4cPW5bkQ0KM\nza4lyQvhVMmH7e8DbYmLW5jY0r1791R3+/RUXjlH7+/vT1RUlLPDEJkUFRWVuAKg3Sxfnn4fPz+Y\nOxeKFLFvLEKIdD18fv4e0AJ4mOR79+7NxIkTzS7b7cm862+bICQkhHPnzhEZGemYu0JhU1prIiMj\nOXfunP3X1DdX1ZPSuHHG3LwQwqnu3YNffwWIBJoCDz+ov/vuu3z99ddeOZLrlUP3OXLkAOD8+fPE\npLZtqHBp/v7+5M+fP/FnaRfR0bBli2l7w4YQFGQ8G9+ypaxkJ4QD3b8PO3caST2lgwchKuou0BjY\nmNg+aNBgRo78yCuTPHhpogcj2ds1SQj399tvkHKKJ29eWLLEdBNrIYTd7dhhPMRy61ZqPe4A9YGt\niS0VK45g1KihDojOdXltohciXeYfxpUkL4QTxMTAK6+kleQBsgCFk7wezf/93/v2DcwNSKIXIjXm\ndsWoVcvxcQghWLYMzpxJr5cfMBOIAV4G+smvLJLohTDv+nUwtzGS/F9DCKf47jtLe/oDcwEfmjaF\nYsXsF5O7kEQvhDnmNrN+7DEoXtwp4QjhzU6cgDVrTNtDQy9y48ZMihZ9J1mhnZ+fD88/b6x1JSTR\nC2GeuWH72rUdH4cQgu+/f7j69ANFipznzp1wjh07TMuWVxg9erTXVtWnR6qKhDBH5ueFcAkxMTBl\nSsrWM0RGVufw4cMAfP755/z9998Oj81dSKIXIqVTpyDlDlfmNrMWQtjd0qXGZpEPnQCqcfWqseG8\nn58fs2fP5sknn3RGeG5Bhu6FSMnc3XxoKOTO7fhYhPByERFJXx0DagJG+b2/vz9z586ladOmTojM\nfUiiFyIlGbYXwmni4+HsWYiLg4sXkxbhHQbCAWN76MDAQBYuXEh9WZkyXZLohUgqPl4SvRBOsn49\nvPaakeCT2w/UAowDQUFBLFmyhFrye2kRSfRCJLVnT8rNrI117V980TnxCOEl9u6Fxo1NV52GvRhJ\n3vi9DAjIyooVywgLC3NsgG5MEr0QSU2datpWtarsMy+EHV29Cs2amUvyYOwpfz/h++zMnbuSsLCX\nHBecB5CqeyEeiIyEmTNN25s1c3wsQniJ2Fho2xZOnkytx3PASuBRWrdeS5MmkuQzSu7ohXhgzhzT\nHTOCg6FdO+fEI4QXGDTIfFlMjhwPH3TJnv1F6tY9xqhRgY4NzkNIohfigeTP8RhefdXYd14IYXNz\n5sBnn6Vs3USOHCH8+WcFHnssabskeWvJ0L0QAPv2GZtdp9Stm+NjEcIL3LoFPXqkbF0D1MfXN5y4\nuCNOiMozSaIXAszfzT/9NDz3nONjEcILzJplbBL50HKgMXCP69cv8Prrr6NTLnAvrCKJXojUivC6\ndTOWvhVC2JTWKbedXQQ050F1fdGiRfnpp59kkxobkUQvRGpFeK+95px4hPBwu3fDX389eDUXaA3E\nAFCiRAm2bNnCY8kn6EUmSKIXIrUivBw5HB+LEF7g4d38T0BbIBaA0qVLs2XLFooVK+akyDyTJHrh\n3ZYtM1+E172742MRwgvcugU//wwwFegAxANQsGB5Nm/ezKOPPurE6DyTJHrhvY4cMT88//TTxm51\nQgib++kniIz8DugCGMV2Pj5PsG3bJgoWLOjU2DyVJHrhnW7dMla8Szk3D9CnjxThCWEHWsOkSRr4\nPUnr03TqtIESJUKcFZbHkwVzhPeJj4eOHeHQIdNj9evD6687PiYhvMCuXbBvnwImA9HAUWA1ffs+\n4tzAPJwkeuF9Ro2CRYtM20uVMh7u9fV1fExCeIGHda++wHQgiipVsvPkk86LyRvI0L3wLnv3wrBh\npu1ZsxrJP1cux8ckhAfTWrN8+XIOH9bMmpX0iB+QXRafdABJ9MK7jBtnTBSmNH06PP644+MRwoNp\nrRk8eDCNGjXixRf/j6io5L97OXNCmzZOCs6LSKIX3uPmTZg927R94EBo2dLx8QjhwbTWvPvuu4we\nPRqA69fHAVOS9enSxVibStiXzNEL72E815O8LWdO+OAD58QjhIeKj4+nb9++fPvtt0laGwEPH2ct\nVQqGDnV4aF5JEr3wDqaLaxvat5dbCiFsZPduGDIknu3b3+TWrclJjrQAfgYCAMiWTUpiHEkSvfAO\nO3caW9GmJCvgCWETe/ZA1apx3Lv3P2BakiNtgJmAf2KLlMQ4lszRC+9gbj37KlXgiSccH4sQHubK\nFWjWLJZ7914neZLvAPxI0iQ/eDC0aOHY+LydJHrh+VIrwpO7eSEyLTYWWreO4fTpdkDS5+e6YKxn\n/3DguEEDGDHCwQEKSfTCC6RWhPfKK86JRwgPMmAAbNp0EliXpPVNjNXvHi4+1bChrEflLDJHLzxb\nakV4HTpIEZ4QGbRzJ4wfD4cPG6/j4uCPPwBKA2uBcKAjjzzyFStWKHLnNvrlzg158zolZIEkeuHp\n1qwxX4Qny3EJkSH//gt168KNG6n1qAT8hVLF+OUXRZUqDgxOpMnpQ/dKqXpKqX+UUseUUu+bOV5U\nKbVRKbVHKbVPKdXAGXEKN3TqlPH4XEovvCBFeEJk0GefJU3ydwAzm0JRnE8/VdSq5bi4RPqcmuiV\nUr7AeKA+UAF4VSlVIUW3D4BftNbPAG2BCY6NUrilyEho3twoB06pRw/HxyOEG7tzxyh1MdwC6gHV\ngP3J+r36KrzzjmNjE+lz9h19ZeCY1vpfrfV9YDbQNEUfDeRI+D4ncN6B8Ql3pLUxNL9nj+mx6tWh\nXTvHxySEG5s9G27fBrgB1AG2AVeAWsB1AJo0gcmTQSlnRSlS4+w5+sLAmSSvzwLPp+gzHFijlOoN\nZMX4lyXEQ1u2wA8/wKVLxus7d2DrVtN+RYrAL79I2a8QGWQsQ3EVI8n/mdjer99A2rR5hAIFoGhR\nSfKuytmJ3hKvAtO01l8opV4AZiqlKmqt45N2Ukp1A7oBFC1a1AlhCqdYssRYfSMuLu1+gYGwcCGE\nhDgmLiE8xJ49sGvXZYx7rIeFre+/P4HRo2UazB04e+j+HFAkyetHE9qS6gr8AqC1/g3IApg8qKG1\njtBah2qtQ/Ply2encIVLOXzYKLZLL8mDcUtSqZL9YxLCw3z11QUgjIdJXlGs2PeS5N2IsxP9LqC0\nUqqEUioAo9huSYo+pzEezkQpVR4j0V92aJTC9dy8CU2bPpg4TFufPvD66/aPSQgPc+TIOWbOrA4c\nTGjxAaYzeHBXJ0YlMsqpiV5rHQv0AlZjPKvxi9b6gFLqQ6VUk4Ru7wBvKKX2Ymx/1ElrrZ0TsXAJ\n8fHGnfyRI+n3bdsWPv/c/jEJ4WFOnz7Nyy9XR+sHv2e+wE9ky9aBtm2dGZnIKKfP0WutVwArUrQN\nTfL9QeAlR8clXNiIEbBsmWl7mTIwduzDiqASJaBsWakQEsJC+/bB1Klw8SLs2/c9ly8fTzjih/FQ\nVEteew2yZ3dikCLDnJ7ohciQxYvhww9N27NnNza4Ll/e8TEJ4QF+/dVY+S4q6kHLcIySqR+BeUBj\nQBaVdEfOnqMXwnKHDplf6Q5g5kxJ8kJY6cwZaNUqaZIHIz1EADt4kORDQ+HZZx0fn8gcSfTCPdy8\nCc2aGc/IpzR0qFGYJ4TIsHv3oGVLuHTpFBCf4qgv8EziqzffdGRkwlYk0QvXl1bxXaNGMGyY42MS\nwgNobawIvWvXHoxNabpjmuwNTZtC586OjE7YiszRC9eXWvFd2bLw44/gI59XhbDG+PEwbdoujBXv\nbgDfA49QqNCnfPLJw1+txx+HJ5+UulZ3JYleuLYtW9IuvsuZ0/ExCeEB/vwT/u//fsPYoOZWQmsu\n/Pxas2ABPJ9yMXLhtuRWSLi2r74y3/7jj1CunGNjEcKD9O27hbi4OjxM8nmADUya9JwkeQ8jiV64\nLq1h+3bT9mHDjK2yhBBWmTNnPVu31sfYVx4gH7CRHj2eoasseudxJNEL13X+vLFyR1IBATBokHPi\nEcIDrF69mvbtGwGRCS0FgE0ULvxEqgNowr1Joheu648/TNuefNJI9kKIDFu6dClNmjQhNvZeQkth\nYDNQga5d5VfLU0miF65r927TNtmBTgirREVF0aNHD+7fv5/QUgzYApTBxwcZsvdgkuiF6zJ3Ry+J\nXgirBAUFsWLFCvz9cwMlMe7kSwJQvz4ULerM6IQ9yeN1wjVpLYleCBvLm/dJ4uLWYxTfFU5sl/Xr\nPZvc0QvXlFohXsWKzolHCDd07dq1ZK+nTIH4+KdJmuQLF4YGDRwcmHAoSfTCNZmbn5dCPCEsNmnS\nJEqXLs2ePXsAiIuD77837de1K/jJ2K5Hkx+vcE0ybC+E1caNG0ffvn0BqF27Nlu2bOHEiQqcOpW8\nnxTheQe5oxeuSRK9EFb57LPPEpM8wGOPPYavb0GzO89JEZ53kDt64Xq0Nj90Hxrq+FiEcCMjR45k\nyJAhia9ffPFFlixZSYsWOTh71rS/FOF5B0n0wvWcOweXLiVvCwgwttASQpjQWjNs2DA++uijxLZq\n1aqxfPlyBg7MxpYtpudUrgwNGzowSOE0kuiF65EV8YSwmNaagQMH8sknnyS2hYeHs3jxYubOzcq3\n35qekzcvzJ0Lvr4ODFQ4jSR64Xpkfl4Ii2itefvtt/kqySL19erVY8GCBezfH2R2Xt7X10jyMjfv\nPaQYT7gemZ8XwiILFy5MluSbNGnCokWLuHUriObNITra9JwvvoCwMMfFKJxPEr1wLbIinhAWa968\nOT179gSgZcuWzJ07F6UCaTVJS4YAACAASURBVN3aKHVJ6fXXoU8fBwcpnE6G7oVrkUI8ISymlGLc\nuHE888wzdOzYET8/P3r3hl9/Ne1bqRJMmgRKOT5O4VyS6IVrkUI8IVIVGxsLgF+Spex8fHzomrDq\nzdSpmC2+y5cPFi6EoCCHhClcjAzdC9ci8/NCmBUTE0Pbtm3p0qULcXFxJsd37iTV4rtffoEiRRwQ\npHBJckcvXIvMzwthIjo6mldeeYUlS5YA4O/vz+TJk/HxMe7VLlyAFi0gcav5JL78UorvvJ0keuFa\n9u83bXv2WcfHIYSLiIqKomXLlqxcuTKxLUeOHKiEyfb790mz+K53b0dFKlyVDN0L13HnDpw5k7xN\nKShf3jnxCOFkkZGRNGnSJFmSHzBgAF9++WViou/XD7ZuNT1Xiu/EA5Lohes4fNi0rXhxqSASXunO\nnTs0aNCAdevWJbYNHTqU0aNHJyb5KVNgwgTTc6X4TiQlQ/fCdRw6ZNomd/PCC928eZMGDRqwffv2\nxLaRI0cyePDgxNe7d0OPHqbnPlj5TorvxAOS6IXrkEQvBNevX6du3brs2rUrse2zzz7j3XffTdbv\n//4v9eK76tXtHaVwJ5LoheuQRC8EHTp0SJbkv/76a/qkWM5u3z7Yts303I4dpfhOmJI5euE6JNEL\nweeff05ISAgAkyZNMknyABERpueVLy/Fd8I8uaMXriEmBo4fN22XRC+8TLly5Vi/fj179uyhQ4cO\nJscjI2HmTNPzeveGLFkcEKBwO5LohWs4dgwSlvdMlD8/PPKIc+IRwkHi4+MTF755oGLFilSsWNFs\n/zlz4Nat5G3BwfDaa/aKULg7GboXrkGG7YUXOnXqFJUqVWLHjh0Wn2Nu2P7VVyFHDhsGJjyKJHrh\nGiTRCy/z77//Uq1aNf766y/q1avHH+aWf05h3z4w95mge3c7BCg8hiR64Rok0QsvcuTIEapVq8bp\n06cBY5nbixcvpnueubv5p5+WfZ9E2mSOXrgGSfTCSxw8eJDw8HAuXLgAQJYsWVi0aBF169Y12//a\nNYiONr7MFeF17y6V9iJtkuiF88XHm1/+VhK98DD79u2jVq1aXL58GYDg4GCWLl1KzZo1TfoeOABt\n25rf5+mBrFmhXTt7RSs8hSR64XxnzhjPDCWVPTsUKuSceISwgz///JPatWtz7do1ALJly8aKFSuo\nWrWqSd8LF6BOHTh/Pu1rShGesITM0QvnS23YXsYjhYfYuXMn4eHhiUk+R44crFmzxmySv38fWrVK\nP8kDdOtm60iFJ5I7euF8Mj8vPNipU6eoVasWt2/fBiBXrlysXbuW0FQq6Pr2Nb+8bUp160oRnrCM\n3NEL55NELzxY0aJF6Z7w/FuePHlYs2YjFSuGcu8eJl8REcYytikFBkKBAsZXsWLQpQvMmiWDXsIy\nckcvnE8SvfBgSik+/fRTrl4NYvPmV6hc2fyKd6nx84O1a8HMKL8QFpFEL5xPEr3wcIcOKebO/ZA7\ndzJ+7ldfSZIXmSND98K5Ll+Gq1eTtwUEQIkSzolHiExaunQp7du3JzZh74YbN6BZM6xK8p07w1tv\n2ThA4XXkjl44l7m7+TJljPFKIdzMggULaNOmDbGxsWitmTZtBu3b+3L0aMavVbkyTJgg8/Ai8+T/\npsK5ZNheeIjZs2fTvn174uLiAPj9998ZMOAqy5eHmPT18QF/f/PXCQqC2rVh4kTZdlbYhiR64VyS\n6IUHmDFjBp07dyY+Ph6AsmXL8s476+nWzTTJZ88OO3dCuXKOjlJ4K0n0wrkk0Qs398MPP/DGG2+g\ntQagQoUKTJiwnkaNCpjt/+OPkuSFY0miF85z7Rps3mzaLoleuIkJEybQs2fPxNdPPvkk8+evo0GD\nfGaL74YPhyZNHBefECBV98KZZs40tuRKKnduqFDBOfEIkQFfffVVsiT/7LPPsnbtBvr2zWe2+K5p\nUxgyxIEBCpFAEr1wDq3hu+9M2zt2TL1KSQgXMXHiRPr165f4+vnnn2f9+vWMG5eHFStM+5crBzNm\nGEV4QjiaDN0L59i2zfz8vOzSIdxAnTp1yJevMJcvnyMg4CVOnVrB44/nMLsRTY4csGiR7DInnEc+\nXwrnMHc3X62aVCkJtxAc/BgxMeuBNty/v4oLF8wneTCK78qWdWh4QiQjd/TC8a5dg7lzTdsTNv4Q\nwtVNmgQ3bpQFZqfZb8QIaNzYMTEJkRqrEr1SqgJQDSgK5AWigEvAX8AWrfVtm0UoPM+MGaZFeHny\nQIsWzolHiDRorRk4cCANGjSgWrVqxMbC99+nf16zZvDBB/aPT4j0WJzolVKPAt2ALkDBB80pumkg\nTim1DpgILNMPHi4VAowivIgI0/aOHWUZMOFy4uPj6dWrFxMnTmT8+PGsWbOGy5dfSHWYHoyCuxYt\nYOpUKb4TriHdRK+Uyg0MB7oD/sBJYBawC7gAXAOCgDxAOeAFIAyoC/yjlHpHa73S9qELtxAVBcOG\nwYYNxl18bCwcPmza7403HB+bEGmIi4uje/fu/PDDDwDcuXOH77//ngsXXjDp26jRw33kc+aEbNkc\nGakQabPkjv4YEAh8D0zXWu9M7wSlVA6gLcYIwDKlVD+t9bhMRSrcT1ycMX65Zk3a/apXlyI84VJi\nY2Pp0qULM2fOTGxr164dgwd/R6lSpv179oTChR0YoBAZYMnA0kygpNa6pyVJHkBrfUtrHaG1DgVa\nYszfC28zaFD6SR7kkTrhUmJiYujQoUOyJN+pUydmzJjBtGl+pJyMLFYM6tRxcJBCZEC6d/Ra676Z\neQOt9aLMnC/c1Jw58Omn6fcrVEiK8ITLuH//Pq+++ioLFixIbOvWrRsTJ04kPt6HhFH8ZN54Q+bi\nhWvL8D9PpVTRhKH5tPpkV0oVtT4s4db27oUuXdLvV6gQzJ8vRXjCJURHR9OqVatkSb5Xr15MmjQJ\nHx8fli/HpAjP19eyf+pCOJM1j9edwCjO+yiNPn2ADwFfK64v3NnVq8a8fGSk6bEhQ6B1a+P7gAAo\nUwZUygc3hLCPFSuMx+KuXDE9pnUsBw404/r1VYlthQu/w19/fUb16sa/0ePHTc9r0gQKFjRtF8KV\nWJPoFaaP1Qlh6NEDTp40bW/Vylg9RBK7cIJZs+C119Lq4Qc8BzxI9IM4d24k586l/e9V1ngS7sBe\nK+MVAO7a6drCVf37r/kV7ypWNB4qliQvnODPP6FrV0t6jgCigWBgKOndzxQvDrVrZzY6IezPokSv\nlHo9RdPTZtrAGKovCrQH/s5kbMLdTJ5s2pYrl7GjhzxYLJzg8mVo3hzu3bOktwLGYOmA5VtvSRGe\ncA+W3tFPw1j1joQ/myZ8pfTgNyQS4+Ox8Bb378OUKabtAwfCY485Ph7h9WJjoU0bOH3a3NFrwKcY\npUQBSdotS/JNmkCSXWqFcGmWJvrOCX8qYAqwCFhspl8ccBX4TWt9w5ILK6XqAV9jjAZ8r7UeY6bP\nKxgFgBrYq7VuZ2HcwlGWLIFLKZZL8PeHTp2cEo7wfAsWwMKFcDuVnTUuXoQdO0zbH3/8CtHRtTl2\n7C+qVTvKsGGz8fPzt/h9ixUzvoRwFxYleq319AffK6U6Aou01jMy++ZKKV9gPFAbOAvsUkot0Vof\nTNKnNDAQeElrfV0pFZLZ9xV2YG79+ubNIUR+XML2Pv0UBgzI+Hm5cl0kJqYWx47tB+DXXxcSH7+Z\natVq2ThCIVxHhovxtNY1bPj+lYFjWut/AZRSszGmBA4m6fMGMF5rfT3h/WWVPVdz/DisXWvaLiXJ\nwg5WroT338/4eUqdJ3v2cI4cOZzwWjFlyhRq1ZIkLzyb1VX3SqlgoAXwDJALuAn8CSzUWltacV8Y\nOJPk9Vng+RR9yiS83zaM4f3hWutVCNdhbs/OUqWghi0/EwoBR49Cu3aYLEObvjPkyVOTM2eOAeDj\n48OMGTN4Le1n7oTwCNbuR98AmA7kJnn1igbGKqU6a62X2SA+MGIsjbEj3qPAFqXUEylrAJRS3TA2\n0aFoUVmUz2FSK8Lr1k0epxM2dfu2sRbTDYuqf5I6SY4cNbly5QQAfn5+zJo1i9YPFm8SwsNlONEr\npZ4FFmDcXf8EbAD+w9ijvibwKjBPKfWS1vqPdC53DiiS5PWjCW1JnQV+11rHACeUUkcwEv+upJ20\n1hFABEBoaGiGP+8LK6VWhNexo3PiER5j4UJjRigqynh98KDxlVKtWsbuceZcuHCcjz6qyfnzRum9\nv78/c+fOpWlTcw8NCeGZrLmjH4xx515Va52ypnWaUmo8sAkYhLFzXVp2AaWVUiUwEnxbIGVF/SKM\nDw9TlVJ5MYby/7UibmEP331n2taihRThiUwZMgRGjky/X8mSxv5JuXObHjty5Ag9e9bgfMIC9YGB\ngSxYsIAGDRrYOFohXJs1yz1UBeaaSfIAaK1/B+Yl9EuT1joW6AWsBg4Bv2itDyilPlRKNUnothq4\nqpQ6CGwE3tNaX7UibmFrx4/DunWm7bLtrMiEOXMsS/JZsxprMZlL8gBZsmQhICAg8fslS5ZIkhde\nyZo7+pwkL6Az5zSQ5g53D2itVwArUrQNTfK9Bt5O+BKuxNxKeFKEJzLB0o0PwVhV+YknUj9etGhR\nNmzYQIMGDZgwYQI15N+l8FLWJPrzGI/FpSUUY95eeKr7943/06YkRXjCSlevGksvmNv4MKUxYx5u\nhJiWEiVK8Pfff+PnZ69tPYRwfdb8618BvKmUeh/4TGsd9+CAUsoH6AfUAibZJkThkhYvlpXwhNW0\nhmXLYNs2iIkx2rZuhRMnTPs2amSUfQD4+UGlSlChgmm/nTt3cuvWLZPn4iXJC29nzW/AR0AzYBTQ\nXSn1K8bdewHgZaA4cAGwYJZNuC1zK+G1aAH58jk+FuF2+vWDr79Ov1/FivDzz+nvibRt2zbq169P\nbGwsK1asICwszCZxCuEJrFkZ74JS6iXgO4yla1Ou+rwWeFNrLUP3niq1IjxZCU9YICLCsiRv6caH\nmzZtolGjRty9a6zT1blzZ/7555/EQjwhvJ1VY1pa65NAXaVUYYyV8XJirIy3R2ud8jl44WnMFeGV\nLg1yFyXS8dtv0KtX+v2UMu7k09v4cN26dTRp0oSohIft8+fPz9KlSyXJC5FEpiavEpK6JHZvIkV4\nwkrnzxuzOw/m5FOjlHHHX69e2v1WrFhBixYtiI6OBqBgwYJs2LCBcuXK2ShiITyDNSvj/QJMBVZr\nreNtH5JwaeaK8AICZCU8kaboaGjVCi5cMD3WtClUTVh1IzAQXn4Znn467estXryY1q1bE5PwqaFI\nkSJs2LCBUqVK2ThyIdyfNXf0rTBWvLuklPoRmK613m/bsITLkiI8YYU+fYxh+5Sefx5mz4YsWSy/\n1rx583j11VeJjY0FoHjx4mzcuJHixYvbJlghPIw1K+NVwSjECwDeAfYqpXYrpXonLFErPNV//8lK\neCLDIiLMfz4sUADmz89Ykp81axZt27ZNTPKPPfYYW7ZskSQvRBoynOi11ju11m9hbGLzCsZz9U8C\nXwPnlFILlFLNlFLy8KqnMZfkpQhPpGH7dvPFd/7+MG8eFC6cseudOnWKuDhj6Y6yZcuyZcsWihQp\nks5ZQng3q5Ox1vo+xpr285RS+YD2QEeMZ+ybAlcB2dnEk5hL9E2bShGeMOv8eWjZ0nzx3bhx8NJL\nGb/mwIEDiY6OZt68eaxfv578+fNnPlAhPJwylpK30cWUUhgr440G/LTWvja7eAaEhobq3bt3O+Ot\nPZfW8Oijxv+9k1q1CurWdU5MwmVobdy9//03xCeU6E6fDjt3mvb93/+MoXxrPx9qrYmMjCRr1qzW\nByyEh1FK/aG1DjV3zCbD60qpshh38+2BwoACjtri2sJFHDpkmuQDAh6WSwuvpTX06GF+x+KUqlSB\nb7+1PMnPnz+fhg0bkiXJRL5SSpK8EBlgTTEeAEqpXEqpHkqpHcBB4H2MHet+wNirvqyNYhSuwNyw\n/UsvQXCw42MRLuWzzyxL8g+K7wIDLbvumDFjaNWqFa1ateL+/fuZC1IIL5bhRK+UaqyUmouxvv23\nGDvVrcO4my+gte6mtd5m2zCF05lL9Ck2DxHeZ80aGDgw/X7+/kaSL1Qo/b5aaz788EMGJlx4+fLl\njBo1KpORCuG9rBm6X5zw5xFgOjBDlr31cDExsGmTabskeq92/Di0bftwTj41SsHEifDii+lfU2vN\nBx98wMcff5zYVqNGDfr375/JaIXwXtYk+u8wFsnZYetghIvauRNu307eliuXsV+o8Apaw549cOTI\nw7aPP4br10371qsHJUoY32fPDo0bG6vdpf8emv79+/P5558nttWpU4eFCxcSLFNEQljNmt3retgj\nEOHCzA3b16wJvk55qEI4WFwcdO4MM2em3zc8HJYuNfaNzwitNf/3f//HuHHjEtsaNmzIvHnzkhXi\nCSEyTha1Eelbu9a0TYbtvcaQIZYl+eLFYc6cjCf5+Ph43nrrLb5LUtHXvHlzZs+eLbvQCWED6f5K\nKqU2ABroqLU+m/DaElprHZ6p6ITz3boFO8zM0tSu7fhYhMPNmwejR6ffLyjI2Ds+T56MXT8uLo43\n3niDqUl2RHzllVf48ccf8ff3z2C0QghzLPnsHYaR6IOTvLaE7VbiEc6zZYsxdptUsWLpbxQu3N7f\nf0OnTun38/Exdi5+6qmMv8f169f59ddfE1+3b9+eqVOn4pfRYQEhRKrS/W3SWvuk9Vp4uNSG7WXZ\nW4927Ro0awZ375oeCw+HvAnbV+XODa++av26SXnz5mXDhg1Ur16dsLAwJk+ejK/UfghhU/KxWaTN\nXCGeDNt7tLg4aNcO/v3X9Fjz5sZwvo8NP+4XKVKEHTt2kDdvXnxseWEhBJCJlfGEFzh/Hg4eNG2v\nWdPxsQiH+eADWL3atL1CBWP9+szk4nv37vGbmY3pQ0JCJMkLYSeZWQL3NaXUeqXUNaVUbMKf65RS\nr9kyQOFE5u7mn34a8uVzfCzCIebOhTFjTNtz5jSK7bJnt/7akZGRNG3alLCwMFab+yQhhLALa5bA\n9VdKLQZmADWA7MDlhD9rAjOUUouVUlIy6+5k2N6rpFZ8pxTMmgWlS1t/7bt379KoUSPWrFnD/fv3\nadasGcePH7f+gkIIi1kzRz8QaAzsSPh+q9Y6TinlC1TF2KK2ETAAGGmrQIWDaS3r23sQreHsWbhw\nwfzxmBjo0AEiI02PffQRNGhg/Xvfvn2bBg0asHXr1sS2QYMG8Zg8uSGEQ2R4P3ql1DEgHqiotTbZ\nUkopFQjsT7h2KZtEmUGyH70NHDgAFSsmbwsIMNY8leVI3Up0NLRpA4sXp983pRYtjOI7ax+yuHHj\nBvXr12dHkrUYxowZw4ABA6y7oBDCrLT2o7dmjv5RYLG5JA+gtY7G2PimsBXXFq7C3N38yy9LkndD\n06dbl+QrVIBp06xP8teuXaNWrVrJkvyXX34pSV4IB7Nm6P48kN78u39CP+GuZNlbj7FgQcbPyWzx\n3eXLl6lduzZ79+5NbPv222/p2bOndRcUQljNmjv6WUArpVQOcweVUrmAVsBPmQlMOJFsS+sxoqON\nxQ0zwscHfv7Z+uK7CxcuUKNGjcQkr5QiIiJCkrwQTmLNHf2HQEVgp1LqQ2ALcBHID1QHhgA7gY9s\nFaRwsN9/N10S7ZFH4NlnnROPsNr27RAVlbwtSxbT8osHChWCvn0zt1TCqlWrOHDgAAA+Pj5MmTKF\njh07Wn9BIUSmWJPoH/xvQwHm9rRSQGngnko+uae11rISnzswN2wv29K6JXOlFk2bwuzZ9nvPTp06\ncenSJQYNGsTMmTN59dVX7fdmQoh0WZN4f0U2rPFs8lidx3DWj7J///40adKEcuXK2f/NhBBpynCi\n11qH2SEO4Spu3TKG7lOSRO92rl8Hc0+Z2vpHefLkSUJCQghO8USGJHkhXIMsLi2S27TJdFva4sVl\nW1o3tHEjxMcnbytVyvhx2srhw4d56aWXaNq0Kffu3bPdhYUQNiOJXiSX2livbEvrduz9hOT+/fsJ\nCwvj/PnzrFu3jvbt29vu4kIIm0k30Sul3lVKZbH2DZRSzyil6lt7vnAweX7eY9hzq4K9e/dSo0YN\nLl68CEDWrFnp1auXbS4uhLApS+7oRwHHlVIDlFKFLLmoMtRVSi0EdgNPZSZI4SCbN8Phw6bt4eGO\nj0VkysmTcOxY8jaloEaNzF/7jz/+oEaNGly5cgWA7Nmzs3r1asLCwjJ/cSGEzVlSjPcE8CXGZjUj\nlVLbga0YCfw/4DqQBcgDlAOqAOFAAeAq0Av4zuaRC9s6cwZatzZtf+YZyJvX8fGITDF3Nx8aaiyH\nkBk7duygXr163Lx5E4CcOXOyevVqnn/++cxdWAhhN+kmeq31EaCRUupFoCfQEmOXOnOP2D2YyP0H\n+ASYqrW+baNYhb1ERRm7l1y+bHqsQwfHxyMyzR7D9lu3bqVBgwbcvm38SufOnZs1a9ZQqVKlzF1Y\nCGFXFj9ep7XeDmxXSr0JVANeBopi3MlHAZeAfcAmrfUBO8Qq7EFr6NHD/HNYL78Msmyp24mPh/Xr\nTdszU2qxadMmGjVqxN2EFRPz5s3LunXreOopmZUTwtVZ8xz9bWB5wpdwd99+a2xvllLhwjB3rrE1\nrXAre/dCwvR5oqAgePFF664XHx/Pe++9l5jk8+fPz/r163n88cczGakQwhHk8TpvduEC9O9v2h4Q\nAPPnQ4ECjo9JZNqUKaZt1apBYKB11/Px8WHJkiWUKVOGQoUKsXnzZknyQrgRi+7olVKvA39prffZ\nOR7hSAsXgrlFTiZNAimucks//2wM0qSU2SckCxYsyIYNG4iKiqJUqVKZu5gQwqEsvaOfBjRL2qCU\n6qiU2mDziITjmKvY6tIFOnd2fCwi0/buha5dTdv9/Mw/UJGW//77z6StcOHCkuSFcEOZGbovjrEt\nrXBHcXGwwczntC5dHB+LyLSrV6FZM9MtaQE++giKFbP8Wj/++CMlS5Zk6dKltgtQCOE0sm2st/rj\nD7hxI3lbtmxQubJz4hGJIiPh7bdh2TJIqH9LV3S0+STfujUMGGD5e0+dOpWuXbuitaZVq1asXLmS\nmpnZnF4I4XSS6L2VuWH7sDDw93d4KOKhuDho1QpWrsz8tZ54wijMs3Sbgu+++44333wz8XXZsmWp\nWLFi5gMRQjiVVN17K9lz3iUNHWqbJP/II0atZbZslvX/5ptvkiX5p59+mg0bNhASEpL5YIQQTpWR\nRG9uJTzhjiIjYds203Zb7XgirDJ/Pnz8ceav4+NjVN9burPwF198QZ8+fRJfP/fcc2zYsIG8svSx\nEB4hI4l+uFIq7sEXMBQgaVuKr1j7hCwy7ddf4f795G0FC0L58s6JR7B/P3TsmPnr5M9vJPm6dS3r\n//HHH/Puu+8mvn7hhRdYu3Ytj2R2UXwhhMvIyBx9Rjcklw3MXZXsOe9UkZEwZAisXm18D8ZKduYK\n74YNg759LbuuUpAzp2U/Rq01I0aMYMSIEYlt1apVY9myZWTPnt2yNxRCuAWLEr3WWubyPYm5Pedl\n2N5hevaEadPS79esmTFn72OH375du3YlS/I1a9ZkyZIlZM2a1fZvJoRwKkng3ubSJWNllZRkz3mH\nOHHCsiRfvjzMmGGfJA9QuXJlxo0bB0C9evVYtmyZJHkhPJQ8XudtzC2SU6ECFCrk+Fi80OTJ6ffJ\nmRMWLQJ7j6D37t2bRx99lAYNGhBo7UL4QgiXl+FEr5R6CmgHVAbyYVTjXwZ+B2Zprf+2aYTCtmTY\n3mliYsxvOJPUo4/C7NlQpoxt3zs+Pp6oqCiTu/bmzZvb9o2EEC7H4kSvlPIFvgG6YRTapSz5qQ68\np5SaAPTVWsvjeK5Ga/OJXp6fd4glS+DixeRt/v7w++/GXby/v5HobV0TGRcXR9euXTl+/DirVq2S\nIXohvExG7ug/B94E7gO/AJuAcxgJvxBQE2gF9ATuAWb2PxVO9eefcOZM8jZfX6guWxY4QkSEaVuz\nZvDMM/Z7z9jYWDp27MisWbMAaNy4McuXLycoKMh+byqEcCmWblNbCugNnALqaa3/MdNtilJqJLAK\n6KeUmqS1/td2oYpMuX4d2rQxba9Sxf6TwYJ//4U1a0zbu3e333vGxMTQrl075s2bl9hWsmRJAgIC\n7PemQgiXY2lNbweMO/dOqSR5ALTWh4GOgC/QPvPhCZuIi4N27eD4cdNjMkfrEN9/b9r22GNQo4Z9\n3i86OprWrVsnS/I9evQgIiICX19f+7ypEMIlWZroXwQOaa03p9cxoc9B4OXMBCZsaMgQWLXKtL18\neUiyvrmwj9SK8Lp1s8/jc/fu3aNFixYsXrw4sa1v376MHz8eH3s9ryeEcFmW/taXw6iqt9TvCecI\nZ5s3D0aPNm1/8AyXFGbZXWpFeJ062f69IiMjady4MStWrEhs69+/P2PHjkXJyodCeCVLE30u4FIG\nrnsRkMWyne3cOejc2bRdKfjpJ9s/wyXMMleE17w52HpjuDt37tCwYUPWJVnieMiQIYwZM0aSvBBe\nzNKq+6xAVAauGw0EZzwcYVPjx8OdO6btH30EDRs6Ph4vdP26+a0FunWz/Xu98847bNq0KfH1Rx99\nxAcffGD7NxJCuBWZsPNUqU0Mt2gBgwY5Ph4vtWEDxMcnbyte3D5FeCNHjuTxxx8H4NNPP5UkL4QA\nMvYcfTOlVHEL+9rxyWBhkdQmhidOlF3qHMjc3Xz9+vYpwsuXLx/r169n5cqVdLJHAYAQwi1lJNE/\nnfBlKVkZz5kcNTEs0mQu0dtqxeHY2Fj8/JL/CufPn1+SvBAiGUsTvZmKLuGyUludxR4TwyJVJ0/C\nsWPJ23x8ICws89e+cOECdevW5YMPPqB169aZv6AQwmNZuh/9dHsHImzI0auzCLPM3c2HhsIjmXwe\n5dy5c9SsWZMjR47QyKGyQwAAIABJREFUrl07/P39adasWeYuKoTwWE4vxlNK1VNK/aOUOqaUej+N\nfi2VUlopFerI+NyOo1dnEamyx0aBp0+fpnr16hw5cgQArTX37t3L3EWFEB7N4v/zK6XeUkoNVEr5\np9EnIKFPDwuv6QuMB+oDFYBXlVIVzPTLDvQlY4v2eCdHrs4iUhUfD+vXm7ZnZqPAEydOUL16dY4n\nLGXs5+fHnDlzaNu2rfUXFUJ4PEs3tXkRY4vaj7TWMan101rfV0oFAN8qpf7UWqeXmCsDxx5sfqOU\nmg00xVhCN6mPgE+A9yyJ16ucPAkzZsB//xmvkzxHnUiK8Bxu7164ejV5W3AwvPCCddc7evQoNWvW\n5OzZswAEBAQwb948GjdunMlIhRCeztJivI7AHYytatPzOdAP6EL6d+CFgaT7pp4Fnk/aQSn1LFBE\na71cKSWJPqmzZ409Tm/cSLufFOE5nLlh+2rVIDAw49c6dOgQ4eHh/JfwYS4wMJBFixZRr169TEYp\nhPAGlib6qsB6rbWZZdaS01rfVUqtTzgnU5RSPsCXQCcL+nYDugEULVo0s2/tHsaPTz/JSxGeU5gr\nxLNm2H7//v2Eh4dz6ZKxAnVQUBBLly4lPDw8kxEKIbyFpXP0RYGjGbjusYRz0nMOKJLk9aMJbQ9k\nByoCm5RSJ4EqwBJzBXla6witdajWOjRfvnwZCNWNmXuELqXu3aUIz8Hu3YNffzVtz2iiv3nzZrIk\nnzVrVlauXClJXgiRIZZmAF8ytgCOtvDau4DSSqkSCXP7bYEliRfR+qbWOq/WurjWujiwA2iitd6d\ngVg805UrsGdP2n1efBH69HFMPCLRtm1Gsk8qJASeeCJj18mZMycjR44EIHv27KxZs4bq1avbKEoh\nhLewdOj+MvBYBq77GHAlvU5a61ilVC9gNcaHiSla6wNKqQ+B3VrrJWlfwYtt2AA6xWevYsVgwADj\n+7JloWpVo+Je2FRcHMyfD/v2ma5jD7Bjh2lbeLh1AytvvPEGAE899RSVK1fO+AWEEF7P0kS/C6it\nlMqptb6ZVkelVE6gNmBmltKU1noFsCJF29BU+oZZFK03MDcJ3Lgx9LDoyUaRCa+/DrNmZewcS4ft\ntdYmW8o+SPZCCGENS+8xfgZyYDzznp5vMebWf7Y2KJEOrc2XdWfmIW1hkYMHM57kwbIfzfr16wkP\nD+fWrVsZfwMhhEiFpYl+PrAdY0GbzUqpWglz6kDiQjm1lFKbgHbANq31fNuHKwBjLfuTJ5O3+fra\nZhF1kabVqzN+ToUKkN6DIKtXr6ZRo0Zs3LiR+vXrc+dOug+4CCGERSxd614rpVpizKVXTfgzVin1\nYEmQPAnXUsBeoJUdYhUPmBu2r1wZcuZ0fCxextxASlr8/GDUqLT7LFu2jJYtW3L//n0ATp06xaVL\nl8iWLZuVUQohxEMWb1Ortb6olHoBeBt4A+PxuQJJupwGIoCxWusom0YpkrPVQ9oiQ+7fh82bTdvf\nfhvy5DFtDw42ivDSqrZfuHAhbdq0ISbGWHCyaNGibNiwgZIlS9ooaiGEt8vIfvQkJPBRwCil1KNA\nwYRD/2mtz9o6OGFGXJztF1EXFtmxAyIjk7flzg2ffmrMnGTUnDlzeO2114iLi4P/b+/O46Oq7v+P\nvz4JS0AQUChFKeACCGpxQYt+rUAAQUQQpQUUZNPw00Jb0baiDxXxW1vFilpRUUQWAQUEBATZF/cK\n8vtRZZNFBVdEBEG2kPP74w7JJDOTTDL75P18PPJI5tw7Nx+vIe+ce889BzjzzDNZvnw5DRs2jEK1\nIiKeUgW9P1+wK9zjbd062Lu3cNtJJ0GrVomppxwJdtm+XbuyhfzkyZPp378/eb7n85o0acKyZcuo\nX79+hFWKiBQW1mA8M7vSzMKeV9bMfm1mN5e9LAkp2GX71q2hUqXAdomqaN0xGT9+PP369csP+ebN\nm7Ny5UqFvIjERLij7ldQZL55M/ub32C8oroDL0VQl4Six+oSYt8++M9/AttLe+pnzpzJoEGDcL7J\njs4//3xWrFhBvXr1SniniEjZhBv0FqQtC6gZxVqkJD//DG+/HdjeoUP8aylnVqwInAXvjDOgtGPm\n2rVrx4UXXgjARRddxIoVK/iFlhAWkRjSaiep5J13vKHf/urWhXPPTUw95Uiwy/Zl+fuqVq1aLF68\nmP79+7Ns2TJODTZcX0Qkiso8GE8SYN68wLb27cGCXXCRaIrmE421a9fmpZd0Z0tE4kM9+lRx6BC8\n/HJguy7bx9zOnbB5c+E2M8jOLv59zjkeeOABJk2aFLviRERKoB59qpg5M/Cxuqws6No1MfWUI8F6\n8xddFHySnBOccwwfPpxHHnmEjIwMKlWqRK9evWJXpIhICKXp0ZdmPXqJtuefD2zr2RNq1Yp/LeVM\naS/bO+cYNmwYjzzyCAB5eXlMnTo1f6S9iEg8laZHP8LMRhRtNLPj0StHgtqwIfho+5yc+NdSzuTl\nlS7o8/LyGDp0KM8880x+W9euXZk+fXrA8rMiIvFQmqAv7W8pdV+iJVhv/rzz4LLL4l9LOfPRR/Dd\nd4XbsrLgiisC983Ly2Pw4MGMGzcuv+2GG25g6tSpVNKERiKSIOGuXqdBe4ly6BAEG8yVk6PR9nHg\nl9n5rrjCC3t/x48fZ+DAgYUG3vXu3ZtJkyZRoYKGwohI4ijAk12oQXh9+yamnnLkp59gypTA9p49\nC7/Ozc2lb9++hUK+X79+TJ48WSEvIgmnoE92oQbh1dSkhLH2yitw4EDhturVwX/wvHOOPn36MG3a\ntPy2W265hfHjx5NZltVuRESiTEGfzD75JPggvMGD419LOTR2bGDbTTdBtWoFr82MLl265A+0u/32\n2xk7diwZGfqnJSLJQdcVk9kLLwS2nXeelqSNg7VrvY+igj3o0KdPH44dO8bHH3/MY489ptH1IpJU\nFPTJ6tAhmDgxsH3wYA3Ci4Ngd0wuuQR869EEGDBgQGwLEhEpI11fTFYzZ8KPPxZuq1IF+vRJTD3l\nyE8/wdSpge05OXDgwAH+8Ic/8MMPP8S/MBGRMlDQJ6tgN4g1CC8upk0LPgivc+f9dOrUiWeeeYaO\nHTuyb9++xBQoIlIKCvpk9Mkn3pK0RWkmvLgIdtm+R4+9dO/egXd8/1/WrFnD/Pnz41yZiEjp6R59\nMgo1E54G4cVc8EF4e3jvvQ5s2rQuv+XJJ5/kpptuimttIiJloaBPNqFmwtMgvLgI/BvrO6pUac+m\nTf/Nb3nuuecYrEccRSRFKOiTzYwZGoSXIIGD8L4G2nHo0EbAe2Z+3LhxDBw4MBHliYiUiYI+2Wgm\nvIQpPAjvSyAb2AJARkYGEydOpI/+4BKRFKOgTyYahJdQBQ867ATaANsByMzMZMqUKfQsOsm9iEgK\n0Kj7ZPLSS4Ft55+vQXhxsHattyStpwZQB4AKFSoyY8YMhbyIpCwFfTIJNq+9lqONi8LTFpwMvEm1\napcze/YsunfvnqCqREQip0v3ycI52LgxsP2aa+JfSzkTfCa8mowe/TZduuiPLBFJbQr6ZPH117B/\nf+G2KlWgYcPE1JPi8vK8JxXD8Y9//JeDB9cC/fPbvOVoFfIikvoU9MkiWG++aVPQcqelkpcHDzwA\n//43hDdD7TqgA7DH97o/4D3N6L8crYhIqlKKJItgQd+sWfzrSHEPPgj/+7/hhvx/8B6hOxHyw4C9\ngB50EJH0oaBPFgr6iM2ZAyNHhrv3u0B74MTkRDWBxUAtLrkELrggBgWKiCSAgj5ZKOgjsnEj9O0b\n7t6rgauAn3yvTwVWAC0BGD482tWJiCSO7tEnCwV9me3bB9ddF7i0LEBWVuFhDsePL+PIkWuBEyP1\nfkFW1jIyMs6jYUO47TbQ03Qikk7Uo08GP/4I33xTuC0zExo3Tkw9KSQvzxs4t2VL4LZrr4WDBws+\nXnvtTcy6cCLk69Wrx8aNqzh06DwOHoQNG2Do0PjWLyISawr6ZBCsN3/WWVCpUvxrSTEjRkCwZeGb\nNoXJkwt68/PmzaNbt24cPnwYgPr167Nq1SrOOeec+BUrIpIACvpkoMv2ZTJ7Njz0UGB79erewLwa\nNQrajhw5wvHjxwFo1KgRq1evprGumIhIOaCgTwYK+lLbsAFuvjn4tpdfhqId9R49ejBp0iSaNGnC\nqlWrOOOMM2JfpIhIElDQJwMFfan8+GPowXcPPABduwZ/34033sj69etp0KBBbAsUEUkiCvpkoKAP\n24nBd59+Grita1e4/37v6+nTp7N79+6AfSpXrhzjCkVEkouCPtEOHYIdOwLbNUgsqHHj4I03Atv9\nB9+NGTOGnj170r59e/bs2RO4s4hIOaKgT7QtW7yV6/zVr++NKJNCnIMnnghsPzH47uSTYfTo0QwZ\nMgSA9evX89e//jXOVYqIJBcFfaLpsn3Y3n47+Ok6MfjukUceYdiwYfntrVq14l//+lccKxQRST4K\n+kRT0Ift+ecD21q39u7NP/TQQ9x999357VdccQWLFi2iZs2acaxQRCT5KOgTTUEflj17YMaMwPac\nHMd9993H/SdG4QFt27Zl4cKFnHzyyXGsUEQkOWmu+0RT0Idl8mQ4cqRw2ymnONau/RuPPz4qv61D\nhw7MmTOHqlWrxrlCEZHkpB59IuXmBp+kXUFfiHMwdmxAK40a3VEo5Dt37szcuXMV8iIifhT0ibRj\nBxw9WrjtlFOgTp3E1JOk3n4bNm0q2nqEjIx1+a+6devGrFmzyMrKimttIiLJTpfuEynUZXuz+NeS\nxAJ789CmTRZz586nY8eO1K9fnylTplCxYsX4FycikuQU9Imk+/MBnIMXX4TXXoP9+722Dz8M3C8n\nB6pXr86iRYuoUqUKFSroR1lEJBj9dkwkBX2ABx4IviIdHAOWAldz6qlw/fVea3VNLCQiUizdo0+k\ndesC28px0M+aFSrkjwK9gM7Ac/TvD5qyXkQkPAr6RPn2W1i/PrD9ggviX0sS+OSTUMvOHgF6ALN8\nr2+jZct341aXiEiq06X7RFm+PLDt3HOhXr3415Jge/d6y84ePFh0yyGgO7Aov6VDh2H07HlZHKsT\nEUltCvpEWbo0sK19+/jXEWW5ufD00/DWW4FPDoayfTts3Vq09SC1a3fl++8L/iC6667hPPro3zE9\nlSAiEjYFfSI4B0uWBLZ36BD/WqIoLw969vTutUfmJ6pUuYbvv38rv2XEiBHcf//9CnkRkVJS0CfC\np5/Czp2F2ypUgCuvTEw9UfLQQ9EI+X1kZl7NoUPv5bc8/PDDDB8+PNIDi4iUSwr6RAh22b5Vq5Re\ng37uXBgxItKj7AU6cvx4wYPzjz32GHfeeWekBxYRKbcU9ImQZvfnN22CPn0iP05GxkfAOvLyvNdP\nPfUUQ4cOjfzAIiLlmII+3nJzg4+4T9H78/v2eSPmf/opcNuQIXDVVeEdJzMTLrigHe+99wq9e/fm\n6aefJicnJ7rFioiUQwr6eFu71ktHf9WrwyWXJKaeCOTlQd++sHlz4LZrroEnn4SMUs7UcMMNN/Dp\np5/SsGHD6BQpIlLOacKceAt22b5NG0jBBVlGjoR58wLbGzeGl18uOeR37tzJ119/HdCukBcRiR4F\nfbwFe6wuBe/Pv/46PPhgYHu1ajBnDtSsWfz7P/vsM1q3bk27du347rvvYlOkiIgo6OPq4EF4N8j0\nrSl2f37TJu+SfTCTJ0Pz5sW/f9u2bbRu3ZodO3awceNGOnfuTN6JEXgiIhJVCvp4eustOHascNtp\np8E55ySmnjLYtw+6dQs++O6++7yBecXZvHkzV155JV988QUAlStXZuTIkWSU9ma+iIiEJeG/Xc2s\nk5ltNrOtZnZ3kO3DzGyDma03s2Vmlro3cEM9Vpcis72dGHy3ZUvgti5dSn6OfsOGDbRu3ZqvvvoK\ngKysLObOnUvnzp2jX6yIiAAJDnozywTGAFcDzYHeZlb0wu86oKVz7tfATODR+FYZRSk+7e2DDwYf\nfNekScmD79avX0+bNm349ttvAahatSoLFizgqnCfvxMRkTJJdI/+UmCrc267c+4o8ArQzX8H59wK\n59zPvpfvA/XjXGN0hFqWNjs7/rWUweuve6Psi6pe3Rt8V6NG6Pd+9NFHtG3blt27dwNQrVo1Fi1a\nRNu2bWNUrYiInJDooD8d8J/0fZevLZRBwMKYVhQroZalPe20+NdSSiUNvmvWLPR7P/jgA7Kzs/nh\nhx8AqFGjBkuWLOGKK66IQaUiIlJUykyYY2Z9gJZA6xDbc4AcgAYNGsSxsjClyGN1x4/D7NmwYYO3\nyB7AlCnBB9/df783MK84o0aNYp9vgqBatWqxZMkSLr744ihXLSIioSQ66L8EfuX3ur6vrRAzaw/c\nC7R2zh0JdiDn3PPA8wAtW7Z00S81As4FH4iXhPfne/aE114reb9rr4UHHih5v0mTJrFnzx4+/vhj\nli5dSosWLSIvUkREwpbooP8QaGxmZ+AFfC/gRv8dzOxCYCzQyTmXmjOrpMiytGvXhhfyTZt6l+zD\neSKuatWqzJs3jy+//JKmTZtGXqSIiJRKQu/RO+dygSHAImAjMN0594mZjTSzrr7dRgHVgBlm9n/N\nbG6Cyi27YJftk3BZ2oVhjH4oafDd9u3bA9qqVaumkBcRSZBED8bDObfAOdfEOXeWc+7vvrb7nXNz\nfV+3d87Vdc5d4PvoWvwRk1CKXLYPVqa/rCyYOjX0/D6vv/46zZo1Y/To0dEvTkREyiTRl+7TX24u\nrFgR2J5kA/FCzc47bJg3f/3JJ3sr0oUK+RkzZnDjjTeSm5vLsGHDqFWrFv37949pzSIiUjIFfayt\nWZMSy9KGmp33scdKnrhv6tSp9O3bN3+++rPOOovsFJkfQEQk3SX80n3aC3Y9vG3bpFuWNtSkfSWF\n/MSJE+nTp09+yDdt2pTVq1cn5yOOIiLlkII+1kLNb59kylLmCy+8wIABA3C+B+7PPfdcVq1axWkp\nMAmQiEh5oaCPpQMHgt/4TrKgDzU7b7t2od8zZswYcnJy8kO+RYsWrFixgrp168aoShERKQsFfSwF\nu/F9+ulJtyztsmWBbeedB/XqBd//8ccfZ8iQIfmvW7ZsyfLly6lTp06MKhQRkbJS0MdSiixLW5rL\n9p999hn33HNP/utWrVqxdOlSTjnllBhVJyIikVDQx4pzMH9+YHuSPT8fanbeUEHfqFEjZs2aRcWK\nFfntb3/L4sWLqVHc0nUiIpJQerwuVt5+G7ZsCWxPssfOtmwJPjtv66BLB3k6d+7MkiVLaNmyJSed\ndFJsCxQRkYioRx8rY8cGtrVpE/rGd4IE681fdpk3SQ6Acy5/iVl/rVu3VsiLiKQABX0s7NkDM2cG\ntg8eHP9aSlDcZXvnHH/+85+59NJL+eqrr+JbmIiIRIWCPhYmTYIjRVbTrV0bundPTD0hFDc7b15e\nHrfddhtPPfUU27ZtIzs7m++//z7+RYqISER0jz7anIPnnw9s798fKleOeznFGT06+Oy8F198nFtu\nuZWXXnopv71FixYadCcikoLUo4+2t96CTZsC22+9Nf61FGPJErj77sD21q1zueWW/oVCvk+fPkyZ\nMoWKSTZtr4iIlEw9+mgL1ptv0waaNIl7KaFs3w49e4Jveno/x/jppz7Mnz89v2XAgAG88MILZGZm\nxrVGERGJDvXooykFBuEdPAjXXQd79xbdcpSzz+7JqlUFIT948GDGjRunkBcRSWEK+mhK8kF4zsGg\nQfDf/xbdcphTT72erVtn57cMHTqUZ599lowM/YiIiKQy/RaPpgkTAtuSaBDeqFHw6quB7TVqjGLP\nnjfyX9911108+eSTWJJN1SsiIqWnoI+Wn38O1lVOmkF4ixfD8OGB7VWqwKJFf6Fjx44A3HPPPTz6\n6KMKeRGRNKHBeNGyebN3bdxfw4ZJMQhv2zbo1SvY4DsYNw5+85ssZs+ezYwZM+jbt69CXkQkjSjo\no2XjxsC2Zs3iXwewfz98/bX3dW4u9O5ddPDdYSCLO++EG2/0WqpUqcLNN98c50pFRCTWFPTRkgRB\nf/w43HEHjBkTvPfu+QG4irPOup5//vOeUDuJiEia0D36aEmCoL/vPvj3v4sL+d1ANrCWbdvu5Ykn\nHotfcSIikhDq0UdLgoN+5kz4xz+K2+MboD3wCQBmRs2aNeNQmYiIJJKCPhpyc+HTTwPb4xT0H3/s\nPcUX2ld4PfnNAGRkZDB+/Hj69esX++JERCShFPTRsG0bHDtWuK1OHTj11Kgc/siRwMVnTjgx093B\ng4HbGjUCs53s2pXNsWNbAcjMzGTy5Mn07t07KrWJiEhyU9BHQ4wu2+flwe23w+TJ3mP6pdG9O4wa\ntYP27bM5duwzACpUqMC0adPo0aNHxLWJiEhqUNBHQ4yCfvRoGDu29O9r3hzuv38rbdtms3PnTgAq\nVqzIjBkz6NatW8R1iYhI6lDQR0MMgv74cXjiidK/r0YNmDkzl65dr84P+cqVKzN79myuvvrqiGoS\nEZHUo8froiEGQb9wIezaVbr3mMHUqdCsWQWeffZZKleuTJUqVZg/f75CXkSknFKPPlLOwaZNge0R\nBn2wZe2rVIFq1YLv36iR9xx9587e6/bt2zNnzhyysrJo06ZNRLWIiEjqUtBHatcuOHCgcFu1alC/\nfpkPuXMnvPFGYPvMmQVBXlReXl7AkrKdOnUqcw0iIpIedOk+UsEu259zjncdvYzGjw+c3a5BA/At\nMBfg/fff58ILL+Tzzz8v8/cUEZH0pKCPVJTvz+fmeivKFXXLLZCZGdj+1ltv0aFDB9avX092dja7\nSntjX0RE0pqCPlJRDvo33wwchJeZCQMHBu67fPlyOnXqxAHfrYP9+/ezt/AydSIiUs4p6CMV5aAP\n9tx8ly5w+umF2xYvXsw111zDz76ZdOrWrcvKlSs5//zzy/y9RUQk/SjoIxXFoN+5ExYsCGwfPLjw\n6zfeeINrr72Ww4cPA3DaaaexatUqzj333DJ9XxERSV8K+kjs2QO7dxduq1gRzjqrTId78cXgg/Cu\nuqrg9ezZs+nevTtHjx71bW/A6tWradq0aZm+p4iIpDcFfSSC9eYbN4YKpX9qMTfXC/qibr21YBDe\n9OnT+d3vfscx3wI6Z5xxBqtXr+asMv5hISIi6U9BH4koXrYPNhNeZiYMGOB9vXLlSnr37s3x48cB\naNy4MatXr6Zhw4Zl+n4iIlI+KOgjEcWgDzYT3rXXFgzCu/zyy+nSpYvvWzRj1apV1I9gUh4RESkf\nFPSl8f33cPPNcPbZ0LBh8CHyZQj6UIPwcnIKvq5UqRLTp0/nT3/6EytXrqRevXql/j4iIlL+aArc\n0ujVC5YtK36fMgR9sEF4DRsWHoQH3ip0T5RlSTsRESm31KMP1/fflxzyZlDK0e+hZsJr1uxfjBhx\nX6mOJSIiUpR69OHaurXkfa68EqpWLdVhFy6EL78s3Gb2MG++eS9vvuldsr/vPgW+iIiUjXr04dq+\nvfjtzZrBs8+W+rCFb/M7YATO3ZvfsnTp0vxn5kVEREpLQR+uYEE/aBB89hl88w1s2FDq+/NffOH1\n6D0OuBd4MH97dnY2CxYsoFKlSmUsWkREyjtdug9XsKBv0cIbNVdGBYPwHHAX8Hj+to4dOzJ79myq\nVKlS5uOLiIioRx+uYEF/5pllPlzBTHh5wB/xD/kuXbowZ84chbyIiERMPfpwbdsW2BZB0C9YAF9+\nmQfcBhTMltO58/W89to0Xa4XEZGoUI8+HIcPBw6NB2jUqMyH9GbC+xP+IX/66b2YM+cVhbyIiESN\ngj4cn38OzhVuO+00KOOl9YJBeL8HTjyO15exYydTsWLFCAoVEREpTJfuwxHl+/MFg/B+C8wDXqNB\ng6fo1CmzzMcUEREJRkEfjmBBX8alYQNnwssGssnJKViOVkREJFp06T4cUejRHz58mIEDB/Lii1v5\n6qvC2/yXoxUREYkm9ejDEWHQ//zzz1x33XUsWbKEadOWAquBRvnbu3b1bvmLiIhEm4I+HGE+Wucc\nTJjgrX1z+LDXlpt7gHffvZbdu1cCcPjwTmA2cEf++/yXoxUREYkmBX1JnAu7R/+HPxSd7n4/0Bl4\nx69tJP4hH2w5WhERkWhR0Jdk9244eLBwW9WqULduoaaxY4uG/I9AJ+ADv7ZHgL8Wet+tt0KGRkqI\niEiMKOhLEqo3b5b/8p13YOhQ/x32AFcBH/m1jQb+XOgwlSrBwIHRK1VERKQoBX1JSrhs/9VX0KMH\nHDt2ouU7oAOw3u8Nz+BNdVsgIwOeew7q1YtuuSIiIv4U9CUpJuiPHIEbbvBWqfXsAdoCGwAwMwYP\nfoF27QYVenuFCvA//wN16sSsahEREUBBX7IQQe8cDBkC77/vv6EGcC6wgYyMDCZMmEDfvn3jU6eI\niEgQCvqShHi0buzYojPcAVSgbt0pXHIJ3HTT9fTq1SseFYqIiISkoC9JkB79R/vO4o9/DNy1YkWY\nNasil132KuY3WE9ERCRR9GBXcUIsT9v9jka+wXefAn8HvJXtnn4aLr8chbyIiCQN9eiLE2R52u8q\nns4X32UBG/EWpPkGOMitt/6dnBwFvIiIJBf16IsT5LL9pmNnAv8FWuOFPGRkPMEdd+yIa2kiIiLh\nUNAXJ0jQr6AG3iN0uwEwO4np0xfSrFnZ16cXERGJFV26L06REfcfAv9kKeBbsYaTee65hdxww+Xx\nrkxERCQsCe/Rm1knM9tsZlvN7O4g2yub2au+7R+YWaO4FefXo38PaA8czg/5mgwfvoScHIW8iIgk\nr4QGvZllAmOAq4HmQG8za15kt0HAXufc2XgTxj8Sr/rytnlBvxpv5vr9+VtO4frrl/Hww5fGqxQR\nEZEySXSP/lJgq3Nuu3PuKPAK0K3IPt2Aib6vZwLtLA7Pr7k8x9FN21mO91fIAV+7cSotWqxk2rSL\nYl2CiIhIxBJmlPJhAAAJKElEQVQd9KcDO/1e7/K1Bd3HOZcL7ANOjXVhE0btJiv3INWATF9bXaB2\n7VUsXHg+lSrFugIREZHIJTroo8bMcsxsjZmt2b17d0THOnoUFo7xLttfCrwJnAOMozFz552rFedE\nRCRlJDrovwR+5fe6vq8t6D5mVgFv5Zg9RQ/knHveOdfSOdeyToTLwlWqBC/eWzAQ73LgY+C8C5rT\nqlVEhxYREYmrRD9e9yHQ2MzOwAv0XsCNRfaZC/TDG/jeA1juXJHp6mKgerdsjv/ideb/ezufr9hO\nmwbb+XVvjbAXEZHUktCgd87lmtkQYBHerfDxzrlPzGwksMY5Nxd4EZhsZluBH/D+GIi9X/6SzO5d\n6dYdFi6Ec9oBui8vIiIpxuLQOY67li1bujVr1iS6DBERkbgws7XOuZbBtiX6Hr2IiIjEkIJeREQk\njSnoRURE0piCXkREJI0p6EVERNKYgl5ERCSNKehFRETSmIJeREQkjSnoRURE0piCXkREJI0p6EVE\nRNKYgl5ERCSNKehFRETSmIJeREQkjSnoRURE0lharkdvZruBz6N4yNrA91E8Xnml8xg5ncPI6RxG\nTucwctE+hw2dc3WCbUjLoI82M1vjnGuZ6DpSnc5j5HQOI6dzGDmdw8jF8xzq0r2IiEgaU9CLiIik\nMQV9eJ5PdAFpQucxcjqHkdM5jJzOYeTidg51j15ERCSNqUcvIiKSxhT0fsysk5ltNrOtZnZ3kO2V\nzexV3/YPzKxR/KtMbmGcw2FmtsHM1pvZMjNrmIg6k1lJ59BvvxvMzJmZRj8HEc55NLPf+34ePzGz\nqfGuMdmF8e+5gZmtMLN1vn/TnRNRZ7Iys/Fm9p2ZfRxiu5nZU77zu97MLopJIc45fXi3LzKBbcCZ\nQCXg/wHNi+xzO/Cc7+tewKuJrjuZPsI8h22Bqr6vb9M5LP059O1XHVgNvA+0THTdyfYR5s9iY2Ad\nUMv3+heJrjuZPsI8h88Dt/m+bg58lui6k+kDuBK4CPg4xPbOwELAgFbAB7GoQz36ApcCW51z251z\nR4FXgG5F9ukGTPR9PRNoZ2YWxxqTXYnn0Dm3wjn3s+/l+0D9ONeY7ML5OQR4CHgEOBzP4lJIOOfx\nVmCMc24vgHPuuzjXmOzCOYcOONn3dQ3gqzjWl/Scc6uBH4rZpRswyXneB2qaWb1o16GgL3A6sNPv\n9S5fW9B9nHO5wD7g1LhUlxrCOYf+BuH9NSsFSjyHvst7v3LOvRHPwlJMOD+LTYAmZvaOmb1vZp3i\nVl1qCOccjgD6mNkuYAEwND6lpY3S/s4skwrRPqBIOMysD9ASaJ3oWlKJmWUAjwP9E1xKOqiAd/m+\nDd6VpdVmdr5z7seEVpVaegMTnHP/MrPLgMlmdp5zLi/RhUkB9egLfAn8yu91fV9b0H3MrALepao9\ncakuNYRzDjGz9sC9QFfn3JE41ZYqSjqH1YHzgJVm9hnefb25GpAXIJyfxV3AXOfcMefcDmALXvCL\nJ5xzOAiYDuCcew/IwpvDXcIT1u/MSCnoC3wINDazM8ysEt5gu7lF9pkL9PN93QNY7nwjKgQI4xya\n2YXAWLyQ1z3RQMWeQ+fcPudcbedcI+dcI7xxDl2dc2sSU27SCuff8xy83jxmVhvvUv72eBaZ5MI5\nh18A7QDMrBle0O+Oa5WpbS5ws2/0fStgn3Pu62h/E12693HO5ZrZEGAR3mjT8c65T8xsJLDGOTcX\neBHv0tRWvAEWvRJXcfIJ8xyOAqoBM3zjGL9wznVNWNFJJsxzKCUI8zwuAq4ysw3AceAvzjldofMJ\n8xzeCbxgZnfgDczrr85PATObhvfHZG3fOIYHgIoAzrnn8MY1dAa2Aj8DA2JSh/6fiIiIpC9duhcR\nEUljCnoREZE0pqAXERFJYwp6ERGRNKagFxERSWMKehEplpn1962S1z/RtYhI6SnoRSQpmdlKM9Pz\nvyIR0oQ5IlKS2Xgz8EV9xi4RiT0FvYgUyzm3D2+lRhFJQbp0L1LOmFkj3z33CWZ2jpnNMbMfzOyg\nmb1tZlcV2b/QPXozyzKzH83sO9/iTsG+x7O+93Qp0t7OzN70fb8jZrbFzP5pZjWK1odvZUPfcU58\nrIzy6RBJewp6kfLrDOA94BS8hYZmABcDC82sZ6g3OecOA68CdYCri243s8pAT+Bb4E2/9sHAEuB/\n8BaUGY23ZsTfgHfNrKZv1x+BB4HPfa8f9PuYUKb/UpFyTHPdi5QzZtYI2OF7+Zhz7i9+21rihf8B\noKFzbr+vJ/8SMMA5N8G332XAu8BrzrkeRY7/O7ylSx93zt3pa2uItwzsEeBS59wmv/2fAW4DXnDO\n5fi1rwRaO+csWv/tIuWRevQi5dc+YKR/g2+52ylATaB7qDf61h7fAlxrZqcU2XxiKeeJfm19gErA\n0/4h73Mv8BPQ13c1QESiSEEvUn595Jz7KUj7St/nC0t4/0S88M5frtnM6gIdgXXOufV++17k+7y8\n6EGcc3uBdXhrmZ8TVuUiEjYFvUj59W2I9m98n2uE2H7CJCCPgh48wE14T/NMLLLviWOFekTvRHvN\nENtFpIwU9CLlV90Q7b/0fS72kTrn3C68HvqlZnaiJ94POAZMLbL7iWP9kuDqhfM9RaT0FPQi5ddF\nZlY9SHsb3+d1YRxjgu9zPzO7APg1sNA5t7vIfieO1aZIO77R9hcAh4GNfpuO+7ZnhlGHiISgoBcp\nv2oA9/s3+Ebd34TXs54dxjFmAfvxBtv197VNCLLfy3g9/aFmdnaRbQ8BJwMvO+eO+LXv8X1uEEYd\nIhKCZsYTKb9WA7eY2W+Ad/Aun/fE6wAMds7tL+kAzrlDZjYDGATcjhfObwTZ7zMz+zMwBvjIzKYD\nu/EmxbkM2IT3PL2/ZcDvgFlmtgA4BHzunJtclv9YkfJKPXqR8msHcDmwF/g/wO+Bj4DOzrlXS3Gc\nCb7PFYFpzrmjwXZyzj2DNyL/feAGYBjwC2AUcJlz7ocibxkH/APvysNf8Xr+g0pRl4igCXNEyh2/\nCXMmOuf6J7QYEYk59ehFRETSmIJeREQkjSnoRURE0pju0YuIiKQx9ehFRETSmIJeREQkjSnoRURE\n0piCXkREJI0p6EVERNKYgl5ERCSN/X+cKf/vaklWVQAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 576x576 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig = plt.figure(figsize=(8, 8))\n",
+    "results = pd.concat(dfs)\n",
+    "pivot_plot(results, fig=fig);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "all,-slideshow",
+   "formats": "ipynb,Rmd"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/doc/source/learning/Full_model_LASSO.Rmd b/doc/source/learning/Full_model_LASSO.Rmd
new file mode 100644
index 000000000..31c9d66a2
--- /dev/null
+++ b/doc/source/learning/Full_model_LASSO.Rmd
@@ -0,0 +1,145 @@
+---
+jupyter:
+  jupytext:
+    cell_metadata_filter: all,-slideshow
+    formats: ipynb,Rmd
+    text_representation:
+      extension: .Rmd
+      format_name: rmarkdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+# Inference in the full model
+
+This is the same example as considered in [Liu et al.](https://arxiv.org/abs/1801.09037) though we
+do not consider the special analysis in that paper. We let the computer
+guide us in correcting for selection.
+
+The functions `full_model_inference` and `pivot_plot` below are just simulation utilities
+used to simulate results in least squares regression. The underlying functionality
+is contained in the function `selectinf.learning.core.infer_full_target`.
+
+```{python}
+import functools
+
+import numpy as np, pandas as pd
+import matplotlib.pyplot as plt
+# %matplotlib inline
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance # to generate the data
+from selectinf.learning.core import normal_sampler     # our representation of the (limiting) Gaussian data
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.Rfitters import logit_fit
+```
+
+We will know generate some data from an OLS regression model and fit the LASSO
+with a fixed value of $\lambda$. In the simulation world, we know the
+true parameters, hence we can then return
+pivots for each variable selected by the LASSO. These pivots should look
+(marginally) like a draw from `np.random.sample`. This is the plot below.
+
+```{python}
+np.random.seed(0) # for replicability
+
+def simulate(n=100, 
+             p=20, 
+             s=5, 
+             signal=(0.5, 1), 
+             sigma=2, 
+             alpha=0.1, 
+             B=4000,
+             verbose=False):
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    
+    # this declares our target as linear in S where S has a given covariance
+    sampler = normal_sampler(S, covS) 
+
+    def base_algorithm(XTX, lam, sampler):
+
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
+        problem = rr.simple_problem(loss, pen)
+        soln = problem.solve(max_its=50, tol=1.e-6)
+        success += soln != 0
+        
+        return set(np.nonzero(success)[0])
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    lam = 3.5 * np.sqrt(n)
+    selection_algorithm = functools.partial(base_algorithm, XTX, lam)
+    if verbose:
+        print(selection_algorithm(sampler))
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=logit_fit,
+                                fit_args={'df':20})
+```
+
+Let's take a look at what we get as a return value:
+
+```{python}
+while True:
+    df = simulate(verbose=True)
+    if df is not None:
+        break
+df.columns
+```
+
+```{python}
+dfs = []
+for i in range(10):
+    df = simulate()
+    if df is not None:
+        dfs.append(df)
+```
+
+```{python}
+fig = plt.figure(figsize=(8, 8))
+results = pd.concat(dfs)
+pivot_plot(results, fig=fig);
+```
+
+```{python collapsed=TRUE}
+
+```
diff --git a/doc/source/learning/Full_model_LASSO.ipynb b/doc/source/learning/Full_model_LASSO.ipynb
new file mode 100644
index 000000000..49845025b
--- /dev/null
+++ b/doc/source/learning/Full_model_LASSO.ipynb
@@ -0,0 +1,276 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inference in the full model\n",
+    "\n",
+    "This is the same example as considered in [Liu et al.](https://arxiv.org/abs/1801.09037) though we\n",
+    "do not consider the special analysis in that paper. We let the computer\n",
+    "guide us in correcting for selection.\n",
+    "\n",
+    "The functions `full_model_inference` and `pivot_plot` below are just simulation utilities\n",
+    "used to simulate results in least squares regression. The underlying functionality\n",
+    "is contained in the function `selectinf.learning.core.infer_full_target`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
+      "  from numpy.core.umath_tests import inner1d\n",
+      "Using TensorFlow backend.\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:455: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:456: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:457: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:458: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:459: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:462: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
+      "R[write to console]: Loaded gbm 2.1.5\n",
+      "\n",
+      "R[write to console]: randomForest 4.6-14\n",
+      "\n",
+      "R[write to console]: Type rfNews() to see new features/changes/bug fixes.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import functools\n",
+    "\n",
+    "import numpy as np, pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "import regreg.api as rr\n",
+    "\n",
+    "from selectinf.tests.instance import gaussian_instance # to generate the data\n",
+    "from selectinf.learning.core import normal_sampler     # our representation of the (limiting) Gaussian data\n",
+    "\n",
+    "from selectinf.learning.utils import full_model_inference, pivot_plot\n",
+    "from selectinf.learning.Rfitters import logit_fit"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will know generate some data from an OLS regression model and fit the LASSO\n",
+    "with a fixed value of $\\lambda$. In the simulation world, we know the\n",
+    "true parameters, hence we can then return\n",
+    "pivots for each variable selected by the LASSO. These pivots should look\n",
+    "(marginally) like a draw from `np.random.sample`. This is the plot below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(0) # for replicability\n",
+    "\n",
+    "def simulate(n=100, \n",
+    "             p=20, \n",
+    "             s=5, \n",
+    "             signal=(0.5, 1), \n",
+    "             sigma=2, \n",
+    "             alpha=0.1, \n",
+    "             B=4000,\n",
+    "             verbose=False):\n",
+    "\n",
+    "    # description of statistical problem\n",
+    "\n",
+    "    X, y, truth = gaussian_instance(n=n,\n",
+    "                                    p=p, \n",
+    "                                    s=s,\n",
+    "                                    equicorrelated=False,\n",
+    "                                    rho=0.5, \n",
+    "                                    sigma=sigma,\n",
+    "                                    signal=signal,\n",
+    "                                    random_signs=True,\n",
+    "                                    scale=False)[:3]\n",
+    "\n",
+    "    dispersion = sigma**2\n",
+    "\n",
+    "    S = X.T.dot(y)\n",
+    "    covS = dispersion * X.T.dot(X)\n",
+    "    \n",
+    "    # this declares our target as linear in S where S has a given covariance\n",
+    "    sampler = normal_sampler(S, covS) \n",
+    "\n",
+    "    def base_algorithm(XTX, lam, sampler):\n",
+    "\n",
+    "        p = XTX.shape[0]\n",
+    "        success = np.zeros(p)\n",
+    "\n",
+    "        loss = rr.quadratic_loss((p,), Q=XTX)\n",
+    "        pen = rr.l1norm(p, lagrange=lam)\n",
+    "\n",
+    "        scale = 0.\n",
+    "        noisy_S = sampler(scale=scale)\n",
+    "        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)\n",
+    "        problem = rr.simple_problem(loss, pen)\n",
+    "        soln = problem.solve(max_its=50, tol=1.e-6)\n",
+    "        success += soln != 0\n",
+    "        \n",
+    "        return set(np.nonzero(success)[0])\n",
+    "\n",
+    "    XTX = X.T.dot(X)\n",
+    "    XTXi = np.linalg.inv(XTX)\n",
+    "    resid = y - X.dot(XTXi.dot(X.T.dot(y)))\n",
+    "    dispersion = np.linalg.norm(resid)**2 / (n-p)\n",
+    "                         \n",
+    "    lam = 3.5 * np.sqrt(n)\n",
+    "    selection_algorithm = functools.partial(base_algorithm, XTX, lam)\n",
+    "    if verbose:\n",
+    "        print(selection_algorithm(sampler))\n",
+    "    # run selection algorithm\n",
+    "\n",
+    "    return full_model_inference(X,\n",
+    "                                y,\n",
+    "                                truth,\n",
+    "                                selection_algorithm,\n",
+    "                                sampler,\n",
+    "                                success_params=(1, 1),\n",
+    "                                B=B,\n",
+    "                                fit_probability=logit_fit,\n",
+    "                                fit_args={'df':20})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's take a look at what we get as a return value:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{18, 13, 14}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Index(['B', 'alpha', 'coverage', 'id', 'length', 'lower', 'nfeature',\n",
+       "       'nsample', 'pivot', 'pvalue', 'target', 'upper', 'variable',\n",
+       "       'bonferroni_coverage', 'bonferroni_length', 'bonferroni_lower',\n",
+       "       'bonferroni_pvalue', 'bonferroni_upper', 'naive_coverage',\n",
+       "       'naive_length', 'naive_lower', 'naive_pivot', 'naive_pvalue',\n",
+       "       'naive_upper'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "while True:\n",
+    "    df = simulate(verbose=True)\n",
+    "    if df is not None:\n",
+    "        break\n",
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jonathantaylor/git-repos/selectinf/selectinf/distributions/discrete_family.py:86: RuntimeWarning: divide by zero encountered in log\n",
+      "  self._lw = np.array([np.log(v) for v in xw[:,1]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "dfs = []\n",
+    "for i in range(10):\n",
+    "    df = simulate()\n",
+    "    if df is not None:\n",
+    "        dfs.append(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHpCAYAAABqV/58AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3xUVfrH8c9JJUGKdAQiooBrV1DX\nBjEJCBFpgmJDxMqKujYUFcQuylpwsWIBlQVlQXqREOBnQSy4qKiISm/SWwgp5/fHxJEkEzJJZube\nmfm+X6+8wpy5ufcJM5nnPvece46x1iIiIiKRKcbpAERERCR4lOhFREQimBK9iIhIBFOiFxERiWBK\n9CIiIhFMiV5ERCSCxTkdQDDUq1fPNm/e3OkwREREQuLrr7/eaq2t7+u5iEz0zZs356uvvnI6DBER\nkZAwxqwu6zlduhcREYlgSvQiIiIRTIleREQkginRi4iIRDAlehERkQgWkaPu/bF79262bNlCXl6e\n06FIJcTHx9OgQQNq1qzpdCgiIq4WlYl+9+7dbN68mSZNmpCUlIQxxumQpAKsteTk5LB+/XoAJXsR\nkcOIykv3W7ZsoUmTJiQnJyvJhyFjDMnJyTRp0oQtW7Y4HY6IiKtFZaLPy8sjKSnJ6TCkipKSktT1\nIiJSjqhM9IAq+Qig11BEpHxRm+hFRESigRK9iIhIBFOiD1PDhg2jXr16TocRNN9//z3GGBYsWOB0\nKCIiYU2JXkREJII5muiNMW8ZY7YYY74v43ljjBlpjFlpjFlmjDkj1DFKcXl5eRQUFDgdhoiI+Mnp\nCXPeAf4NjC3j+c5Ay6Kvs4FXir5LObZv387999/PlClT2LVrF2eccQbPP/88Z5/913/fv/71L8aP\nH8+KFSuoVq0aZ511Fs8//zzHHXecd5vU1FTq1atHx44dGT58OKtWrWLVqlW8+eab/Pvf/+bjjz9m\nwIABLFu2jNatWzNy5EguuOCCYrGMHj2a559/npUrV9KoUSNuvfVWBg0aVGybl19+maeeeort27eT\nlpbG7bffHtz/IAlv1sKLL8KkSbB7t9PRiJQp5wBs3gS5ub6f3/S3C2n/zfNBjcHRRG+tXWSMaX6Y\nTboBY621FlhsjKltjGlsrd0YyDjcdJeWtVXfR25uLhkZGezcuZNnn32WBg0a8Morr5CRkcEvv/xC\no0aNAFi3bh0DBw7k6KOPZvfu3bz66quce+65/PLLL9SqVcu7v08//ZRff/2V4cOHk5yc7H1u//79\nXHvttdx55500atSIRx55hJ49e7J69WqSk5MBePbZZ3nggQcYNGgQqampfP311wwZMoTk5GQGDhwI\nwJQpU7j11lu55ZZb6N69OwsXLqR///5V/4+QyPXCC3DXXU5HIVKuJKD5IY9zgD+AlKLHOzYdE/wg\nrLWOfhX9H3xfxnPTgfMPeZwFtC1vn23atLGHs3z58mKPPenVHV/+evjhh23dunV9Pjd69GgbHx9v\nV6xY4W3Ly8uzLVq0sPfcc4/Pn8nPz7f79++3RxxxhB0zZoy3vX379rZatWp206ZNpY4P2KysLG/b\n0qVLLWBnzZplrbV2165dtnr16nbYsGHFfnbIkCG2YcOGNj8/31pr7Zlnnmk7depUbJsbbrjBAjY7\nO/uw/w8lX0uJEmec4fwfq770VcGvvWDTwKaAXVXU9nnj7gH5kwC+stZ3ToyYwXjGmJuMMV8ZY776\n448/nA7HUfPmzaNNmzYcc8wx5Ofnk5+fD0D79u356quvvNstXryYDh06ULduXeLi4khOTmbv3r2s\nWLGi2P7atGlDw4YNSx0nISGB1NRU7+MTTjgB8FwpAPj888/Zt28fvXv39saRn59PWloamzdvZt26\ndeTn5/PNN9/QrVu3Yvvu2bNnQP4vJEKtXOl0BCIVsgdPX/R8YA1wIRCqTien++jLsx5odsjjpkVt\npVhrXwdeB2jbtq0NfmjutXXrVhYvXkx8fHyp54499lgA1qxZQ8eOHTnrrLN47bXXOOqoo0hISODi\niy/mwIEDxX7GV5IHqFGjBjExf50rJiQkAHh/fuvWrQCceOKJPn9+7dq1JCYmUlBQQIMGDYo9V/Kx\niNfOneqXl7CyC0+S//yQthuBUC3H5fZEPxUYaIwZj2cQ3i4b4P75SFSnTh3atm3LK6+8Uuq5xMRE\nAGbPns3+/fuZMmUK1atXByA/P5/t27eX+pnKTjVbp04dAKZPn+7zZKF169YkJSURGxtbanEaLVYj\nZVq1qnTb0UfDRx+FPBSRw7nxRvj8q138xj/IYbm3/b5r7qLnxdfwM9Ckca2ydxAgjiZ6Y8x/gFSg\nnjFmHfAwEA9grX0VmAlkAiuB/cB1wYjDRlj9n56ezty5c0lJSSmzMs7JySEmJoa4uL/eAh988IH3\nMn8gnHPOOSQlJbFhwwYuvvjiMrc7/fTTmTJlCrfccou3bdKkSQGLQyLM6tWl21q2hNNOC30sIocx\nd8tW1nAdHJLkBw9+iSefHBjSOJwedX9FOc9b4NYQhRN2Dh48yMSJE0u1d+7cmVdffZXU1FTuuece\nWrRowbZt21iyZAmNGjXizjvvJC0tjYKCAq677jquv/56fvjhB0aMGEHt2rUDFl/t2rUZNmwYd9xx\nB6tXr6Zdu3YUFhayYsUKsrOzmTx5MgAPPPAAPXv2ZMCAAfTo0YOFCxcye/bsgMUhEaasil7ERTZs\n2MyaNRnAodPEvMbgwTeFPBa3X7qXw9izZw+9e/cu1Z6dnU12djZDhw7l4YcfZvPmzTRo0ICzzjqL\nrl27AnDyySfzzjvvMGzYMCZPnsypp57Khx9+yOWXXx7QGAcNGsRRRx3F888/z7/+9S+qVatGq1at\nih2nR48evPTSSzz99NOMGTOG1NRU3nzzTS666KKAxiIRwleib9481FGIlKmwsJDOnbvwV5I3wJvU\nqXMdNWqEPh5jI+26NZ7BeIeOLi/pxx9/5G9/+1sII5Jg0WsZhXr2hKKrQV5jx8I11zgTj4gPL744\nn3/+82LgIJ454a7i9NPhm2+CczxjzNfW2ra+nlNFLyLhRRW9hIG6ddOAKXjG3HuuvDr1NlWiF5Hw\n4mswnvroxWHW2mJ3KHneph2LbePU2zRiJswRkSiwZw+UvAU0Lg6OOsqZeESAn3/+mfPPP59Vh1xt\nctOFJyV6EQkfvqr5pk09yV7EAcuXL6d9+/Z89tlnpKWlsXbtWsBdF56U6EUkfLipTJKot2zZMlJT\nU9m8eTMAmzdv9lb1bnqrKtGLSPjwVSYp0YsDvvnmGy688EL+XFvliCOOYPbs2VxwwQUUFqqiFxGp\nHE2WIy6wZMkS0tPTvVOG16pVi48//pgLLrgAgM2b4eDB4j9TsyYEcD6yClHHloiED1X04rBPP/2U\nzp07s2fPHgCOPPJIPv74Y9q0aePdpqzz0UouG1JlquhFJHyoohcHLViwgIsuusib5OvVq0d2dnax\nJA/uOx9Vog9Tw4YNwxjjc5rYXr16FVsnvjyrVq3CGMP06dMDGKFIELjtE1SixtKlS8nMzGTfvn2A\nZ/nuBQsWcOqpp5ba1m3no0r0YW7u3Ll8+eWXVdpH48aN+fzzzzn//PMDFJVIEOzfDyWXL46J8dxe\nJxJkJ510krewOuqoo1i4cCEnnniiz23ddj6qRB/G6tSpw8knn8wTTzxRpf0kJiby97//PaAr14kE\n3Jo1pduaNIH4+NDHIlEnPj6e8ePHc+ONN7Jw4UJat25d5raq6CVgjDE8+OCDTJ06le+++87nNhs3\nbqR///60aNGCpKQkWrVqxUMPPcTBQ4aElrx0369fP84888xS+xo1ahTJycne/qnCwkKefvppjjvu\nOBITE2nVqhVjxowJwm8qgvs+PSXqJCYm8vrrr3PccccddjtV9BJQvXv3pmXLlmVW9Vu3bqVOnTo8\n99xzzJ49m3vvvZe3336b2267rcx9Xn755Xz11Vf8/vvvxdonTJhAZmYmNYrWWbztttt4/PHHuemm\nm5gxYwY9evSgf//+6uuX4HDbp6dEtHHjxjFkyBAqusKrte47J9XtdeDcPQ++VPBNFRMTw+DBg7n+\n+ut59NFHadWqVbHnTz75ZEaMGOF9fN5551G9enX69+/PSy+9REJCQql9dujQgbp16zJhwgTuv/9+\nANavX88nn3zCBx98AMDKlSt55ZVXePvtt7n22msByMjIYOPGjTzyyCN06dKlQr+HSLnc9ukpEWvM\nmDFcd911WGuJj49n6NChfv/s1q2Qk1O8LTkZ6tULcJAVoIo+Alx99dWkpKTw1FNPlXrOWssLL7zA\nCSecQFJSEvHx8Vx11VXk5uayxlefJxAXF0fPnj2ZMGGCt+3DDz+kevXqXHzxxQBkZWURExNDjx49\nyM/P936lp6fz7bffUlBQEJxfVqKXKnoJgTfeeMOb5MHz2ffnSHt/uO0eelCijwhxcXEMGjSI9957\nj9UlPgxfeOEF7rnnHnr06MGUKVNYsmQJo0aNAuDAgQNl7rNPnz58++23rFixAvBctu/atStJSUmA\np0ugoKCAWrVqER8f7/3q168f+fn5bNy4MUi/rUQtN00eLhFp1KhR3HTTTd4kf9ppp5GdnU316tX9\n3ocbz0d16T5C9O/fn8cff5zhw4cXa//www/p1atXsT785cuXl7u/9u3b07BhQyZMmEDfvn1ZvHgx\ngwcP9j5fp04d4uLi+PTTT4mJKX2+2KBBgyr8NiI+uGnycIk4zz33HHfffbf3cdu2bZkzZw516tSp\n0H7c2MOkRB8hEhMTueeeexg8eDBt2rQhvuiWo5ycHBITE4tt+/7775e7v9jYWHr37s2ECROoVq0a\ntWvXplOnTt7n09LSKCgoYNeuXXTo0CGwv4xISbm5sGFD6faUlNDHIhHn6aefLlbI/P3vf2f27NnU\nqlWrwvty44UnXboHzwA4t3xVwc0330yNGjX47LPPvG0dOnRgwoQJvPzyy8yZM4e+ffuycuVKv/Z3\n+eWX88MPP/D888/TvXv3YgP3WrduzS233EKfPn0YPnw4WVlZzJgxg2eeeYYbbrihSr+HSClFa3wX\n07gxlDiJFakIay2PPvposSR/wQUXMHfu3EoleXDnhScl+giSnJzMnXfeWaxt6NChXHHFFTz00ENc\nccUVJCQkMHLkSL/2d95559GsWTM2btxInz59Sj0/atQohgwZwtixY8nMzKRfv37MmDGDdu3aBeT3\nEfFyY5kkYe+JJ57g4Ycf9j6+8MILmTVrlvcW4spw41vVVPQewXDQtm1b+9VXX5X5/I8//sjf/va3\nEEYkwaLXMkq8+SaUvFLUpw/85z/OxCMR4ZNPPqFTp07s27ePjh07MnnyZJKTkyu9P2uhVi0omlPM\na8MGzwWoYDLGfG2tbevrOVX0IuJ+biyTJOydf/75TJ8+nd69ezNlypQqJXmAHTtKJ/nERGjYsEq7\nrTINxpPIN2MGPPus77nSJTxs3Vq6zemOzwg0bhyMGgXRdXdsKpDKCSdUfU95eaXbUlI8ay85SYle\nItv330PXrlBY6HQkEmiq6ANqwQK46iqnowimAmAwcCPQMmRHdcPbVJfuJbL9979K8pFKFX1AHTIR\nZgTKB/oBzwJpwO+H3TqQ3PA2VaKXyPbrr05HIMHQrBmUWNdBqiZy/1TygKuA94oerwNeD9nR3TDN\nSNReurfWYty0mI1UmF93jPi6qVXCW6NG8N57EBvrdCQRJTL/VA4CfYDJh7TdDPhe7TOQYmPhxhuh\nZ8+gH6pcUZno4+PjycnJqfIIS3FWTk6OdwbAMvkarf3xx9CiRVBikiCLi/NU8zpJD6jCQt+JfulS\nqFkz9PEEQm7uAW69tRfZ2TO8bX373sbQoS+GpMhr0ACOOCLoh/FLVCb6Bg0asH79epo0aUJSUpIq\n+zBjrSUnJ4f169fT8HD3reTnw/r1pdvPOQcqsEiFSKTbssUzy/ChatSAU08Nz3Oq/fv306NHD7Kz\n53rb7rnnHp555pmo/LyPykRfs+gUdcOGDeT5uh9CXC8+Pp6GDRt6X0uf1q2Dksvl1q+vJC9SQlnT\nFIRjTty3bx+XXHIJ2dnZ3rYHH3yQxx57LCqTPERpogdPsj9skpDw58ZJp0VcKFL+VPbu3Uvnzp35\n5JNPvG2PPPIIQ4cOdTAq50VtopcooNnURPwSKX8q1apVo0mTJt7HTz31FPfff7+DEbmDEr1Erkj5\n9BIJskip6OPi4nj33XfJy8vj/PPPL7XIV7RSopfIFSmfXiJBFknnxPHx8Xz44YfEOD3vrIvof0Ii\nVyR9eokEUbieE2/evJkRI0aUmlNDSb44VfQSucL100skhKwNz3PiDRs2kJ6ezk8//cTWrVt56qmn\nonZUfXl02iORqaDA92p1SvQixWzbBvv3F29LSoJ69ZyJxx9r166lffv2/PTTTwCMGDGC7777zuGo\n3EuJXiLTxo2eCXMOdeSR4TvNl0iQhNs99L///jvt2rVj5cqVgGcA3vjx4znllFMcjsy9dOleIlM4\nXosUcUA49XCtXLmStLQ01q5dC/w18K5bt24OR+ZuSvQSmcLp00vEQeFyTvzTTz+Rnp7Ohg0bAEhM\nTGTy5Ml07tzZ4cjcT4leIlO4fHqJOCwczom///57MjIy2Lx5MwBJSUlMnTqVjIwMhyMLD0r0EpnC\n4dNLxAXcfk78v//9j4yMDLZu3QpA9erVmT59Oqmpqc4GFkaU6CUyuf3TS8QlfJ0Tu+lP5eDBgxw8\neBCAGjVqMGvWLM477zyHowovGnUvkcntn14iLuHrnNhNF7/OPPNMZs2aRdOmTfn444+V5CtBFb1E\nnsJCXboX8cPOnbB7d/G2xERo2NCZeMpy7rnnsnLlShITE50OJSypopfIs3kz5OYWb6tZE2rXdiYe\nEZfyVc2npICTM8guWLCA5cuXl2pXkq88JXqJPGVV826dAUTEIW7r4Zo7dy6dO3cmPT2dFStWOBdI\nhFGil8ijgXgifnFT//yMGTO45JJLOHDgAJs2baJv376lFquRylGil8ij/nkRv7ilov/oo4/o0aOH\nd3R9SkoK77//vhapCRAleok8quhF/OKGiv7DDz+kd+/e5OXlAXDMMcewaNEijj322NAGEsGU6CXy\nqKIX8YvTFf37779Pnz59yC9agKply5YsWrSIo/X3GlBK9BJ5VNGL+MXJiv7tt9/mmmuuobCwEIC/\n/e1vLFy4kKZNm4YmgCiiRC+RxVpV9CJ+2LMHtm8v3hYXB0cdFfxjv/baa/Tv39872O7kk09mwYIF\nNG7cOPgHj0JK9BJZtm6F/fuLtyUnQ716zsQj4lK+zoebNYPY2OAe11rLF1984X182mmnMX/+fBo0\naBDcA0cxzYwnkaWsTkeN3hUpxqkeLmMMb7zxBrm5ufzyyy/MmTOHI488MvgHjmJK9OKs3Fy4916Y\nOhX27q36/opuzylGl+1db+dOGDgQsrNLT2oowXHgQOm2UP2pxMbGMmbMGHJycqhRo0ZoDhrFlOjF\nWUOHwksvBfcYGojnejfcAP/9r9NRSDD+VKy1zJw5k8zMzGL3xcfFxSnJh4j66MVZkyYF/xiq6F0t\nL89zQUecF+g/FWstDz74IF26dOGf//ynZrpziBK9OKegwHdHYaB16hT8Y0ilrV/vSfbirJgY6NAh\ncPuz1nLPPffw1FNPATBy5EjeeuutwB1A/KZEL87ZsAGKJsoIitq14YUX4NRTg3cMqbJQnOvJ4dWv\nD++8A02aBGZ/hYWF3H777Tz33HPeti5dunDVVVcF5gBSIeqjF+f4+oQ/5RTIygrM/o88Mvj3CkmV\n+XobdOsGo0eHPJSoVbdu4G5MKSws5JZbbuGNN97wtvXs2ZP//Oc/JCQkBOYgUiFK9OIcX7fCHXus\n7nmPMr7eBi1b6m0QjgoKCrjhhht45513vG2XX3457777LvHx8c4FFuV06V6co6lqBb0NIkV+fj59\n+/YtluSvueYa3nvvPSV5hynRi3M0Va2gt0EkyMvL48orr2TcuHHetv79+/P2228TF6cLx05Tohfn\nqJQT9DaIBKtWrWLevHnex3/20cdqjIwrKNGLc1TKRb2CAli7tnS73gbhpWXLlnz88cfUqlWL22+/\nnZdffpmYGKUXt9A1FXFGYaHzi2GL43zdYVmnDmjCtPDTpk0bvv32W44++uhiM+CJ8xw/5TLGdDLG\n/GyMWWmMud/H8ynGmGxjzFJjzDJjTKYTcUqAbd5cel76mjU9975L1NBFnfC0d+9efvzxx1LtzZs3\nV5J3IUcTvTEmFhgFdAZOAK4wxpxQYrOHgA+stacDfYCXQxulBIU6ZgW9DcLR7t276dSpE+3ateP7\n7793Ohzxg9MV/VnASmvtb9bag8B4oFuJbSxQs+jftYANIYxPgkWlnKC3QbjZuXMnHTt25NNPP2Xr\n1q1kZGSwY8cOp8OScjjdR98EOHQozjrg7BLbDAPmGmNuA6oDGaEJTYJKpZygt0E42bZtGx07duSb\nb77xtg0ePFhryYcBpyt6f1wBvGOtbQpkAu8aY0rFbYy5yRjzlTHmqz/++CPkQUoF+fqEVykXdZTo\nw8Mff/xBWlpasST/8ssvc8cddzgYlfjL6US/Hmh2yOOmRW2Huh74AMBa+zlQDSg1Oaa19nVrbVtr\nbdv69esHKVwJGI24F3TpPhxs2rSJ1NRUli1bBoAxhtGjRzNgwACHIxN/OZ3ovwRaGmOOMcYk4Bls\nV3Jl6jVAOoAx5m94Er1K9nCnij7q6Q5L91u/fj3t27dn+fLlAMTExDBmzBiuv/56hyOTinC0j95a\nm2+MGQjMAWKBt6y1PxhjHgW+stZOBe4G3jDG3IlnYF4/a611LmqpMmv1CS+6w9Ll1qxZQ1paGr/+\n+isAsbGxvPfee/Tp08fhyKSinB6Mh7V2JjCzRNvQQ/69HDgv1HFJEP3xB+TkFG9LTvaslSlRQ/3z\n7jZ69Ghvko+Li2P8+PFceumlDkclleH0pXuJRmVV85poI6qof97dhg0bRv/+/UlISGDSpElK8mFM\niV5CT/3zgip6t4uJieH1119n8eLFXHLJJU6HI1WgRC+hp/55QRW926xevZrCwsJibbGxsZx++ukO\nRSSBokQvoaeKXlBF7yZLly6lTZs23HzzzaWSvYQ/JXoJPVX0gip6t/jyyy9JS0tj27ZtjB49mvvv\nL7W2mIQ5JXoJPVX0Uc9aVfRu8Pnnn5ORkcHOnTsBqF27Nr1793Y4Kgk0JXoJLd1DL8DWraXvsKxe\nXXdYhtKiRYvo2LEju3fvBqBu3brMnz+fM8880+HIJNCU6CW0duyAPXuKtyUmQoMGzsQjjijroo7u\nsAyNrKwsOnfuzN69ewGoX78+2dnZGngXoZToJbTK+oSP0VsxmuiyvXPmzJlDly5d2L9/PwCNGjVi\nwYIFnHzyyQ5HJsGiT1cJLY3AEvQ2cMq0adPo2rUrBw4cAKBJkyYsXLiQE044weHIJJiU6CW0VMoJ\nehs4IScnhwEDBnCwaIGBo48+mkWLFtGqVSuHI5NgU6KX0FIpJ+ht4ISkpCRmzpxJnTp1aNGiBQsX\nLqRFixZOhyUh4PiiNhJmfv4ZbrwRvvwSCgoq/vP5+aXbVMqFnU8+gdtvhx9+8NxIUVF5eaXb9DYI\nvlNOOYWsrCzq169PkyZNnA5HQkSJXvxnLXTtCitWBHa/KuXCyt69kJlZ+uaJqtLbIPC2b99OnTp1\nirWddtppDkUjTtGle/Hfzz8HPsmDSrkws2BB4JN8tWq6wzLQXn31VVq2bMnSpUudDkUcpkQv/vv9\n98Dv89RTQZcQw0ow3gaZmbrDMpBGjhzJgAED2L59Ox06dGD58uVOhyQO0p+W+M/XUOnKiomBs8+G\nCRM0S0qYCeTbID4eLroIXn45cPuMds8++yx33HGH9/Gxxx5L48aNHYxInKY+evGfr6HSDzwADz9c\n8X3FxECc3n7hyNfbYPRouOaaiu8rNtbzJYHx+OOPM2TIEO/jc889l1mzZlGzZk0HoxKn6ZNW/Oer\nlDv2WEhICHko4hxfb4PjjtPbwEnWWh5++GEee+wxb1u7du2YMWMGRxxxhIORiRso0Yv/dPOzoLeB\n21hrGTx4MMOHD/e2paenM2XKFKpXr+5gZOIWSvTiP01nFvX27fOsPHeo2Fho2tSZeKKdtZa77rqL\nF154wdvWqVMnJk2aRFJSkoORiZtoMJ7458AB2LSpeJsx0KyZM/GII3xV802aaLiFUyZPnlwsyXft\n2pWPPvpISV6KUaIX/6xZU7rtqKPUMRtldFHHXXr06MGtt94KwKWXXsqHH35IYmKiw1GJ2+g8XPxT\n1vKyElV8VfRK9M4xxjBy5EhOP/10rr32WuJ0aUV80LtC/KNPeEHne07LL1or4tCEHhMTw/XXX+9U\nSBIGdOle/KNPeEHne07Ky8ujT58+9O/fn4LKLCglUUsVvfhHn/CCzveckpuby2WXXcbUqVMBiI+P\n54033iBG8waLH5ToxT8ahSXofM8JOTk5XHrppcyaNcvbVrNmTYymjhY/6XRQ/KNZUqKe7rAMvf37\n99O1a9diSf6+++7jueeeU6IXvynRS/kOHoT160u3p6SEPhZxjO6wDK29e/eSmZnJvHnzvG1Dhw7l\nqaeeUpKXCtGleynf2rVgbfG2hg1Bk3JEFfXPh86uXbvIzMzks88+87Y9/vjjPPjggw5GJeFKiV7K\np45ZQW+DUNmxYwcXXXQRX375pbft2Wef5Z577nEwKglnSvRSPpVygt4GoXLNNdcUS/Ivvvgit99+\nu4MRSbhTH72UT6WcoLdBqIwYMYIGDRoA8OqrryrJS5WpopfyqZQTdIdlqBx//PFkZWWxdOlSrrnm\nGqfDkQigRC/lUykn6A7LYCksLCw18c1JJ53ESSed5FBEEml06V7Kp4o+6ukOy+BYvXo1bdq0YfHi\nxU6HIhFMiV4OLz8f1q0r3a5EH1XWrdMdloH222+/0a5dO7799ls6derE119/7XRIEqGU6OXw1q+H\nkgto1K0LRxzhTDziCPXPB9aKFSto164da4pmIcrJyWHz5s0ORyWRSoleDk+f8IL65wNp+fLltG/f\nnvVFfSHVqlVj6tSpZGZmOrRq57QAACAASURBVByZRCoNxpPD0ye8oPO9QFm2bBkZGRn88ccfACQn\nJzNt2jTS0tIcjkwimRK9HJ4+4QWd7wXCN998Q4cOHdi+fTsARxxxBDNnzuSCCy5wODKJdLp0L4en\nT3hB53tVtWTJEtLT071JvmbNmsydO1dJXkJCFb0cnj7hBZ3vVcXq1avJyMhgz549ANSuXZuPP/6Y\ntm3bOhyZRAtV9HJ4+oSPWNZ61pgv72vfPs8ChiXpbeCflJQUbr75ZgDq1q1Ldna2kryElCp6KVth\noe9FyPUJH/aeew6eeQYqe0dXvXq6w9JfxhieeeYZkpKSuOyyyzTjnYScEr2UbeNGyMsr3larFtSu\n7Uw8EhCLFsHdd1dtHzrXqxhjDI8++qjTYUiU0qV7KZtmxItIH39c9X0cc0zV9xGppk2bxtVXX01+\nfr7ToYgAqujlcHbuLN1Wr17o45CA2rat6vvo1q3q+4hEkyZN4vLLLyc/Px9rLWPHjiU2NtbpsCTK\nKdFL2XbtKt2my/Zhz9f5W1wc+JOPGjSAG26Aq64KfFzhbvz48Vx99dUUFE0Z/cUXX7Bt2zbv2vIi\nTlGil7L5yghK9GHP18s6aRJccknoY4kUY8eO5brrrqOwsBCA1q1bk5WVpSQvrqA+eimbr4xQq1bo\n45CA0vlbYL355pv069fPm+RPOOEEFixYQJMmTRyOTMRDiV7KpowQkXz1yOj8rXJefvllbrjhBmzR\nGr6nnHIKCxYsoFGjRg5HJvIXJXopm/roI5LO3wLjhRde4NZbb/U+PuOMM5g/fz7169d3MCqR0pTo\npWzKCBFJL2vVvfLKK9x5553ex2effTZZWVnUrVvXwahEfFOil7Kpjz7i5OXB/v3F22JiNMtdRXXs\n2NHbB3/eeecxd+5cautsSVxKiV7KptIv4vjqjalZ05PsxX/HHnssWVlZXH755cyePZuaNWs6HZJI\nmXR7nZRNffQRR+dugdO6dWvGjx/vdBgi5apUojfGnAC0A1KAekAOsAX4Flhkrd0TsAjFOcoKEUcv\nacVZaxk8eDCZmZm0a9fO6XBEKszvRG+MaQrcBPQHGv/ZXGIzCxQYY+YBrwDT7Z/3nUj4UR99xFGi\nr5jCwkIGDhzIK6+8wqhRo5g7dy7nnHOO02GJVEi5id4YUwcYBtwMxAOrgHHAl8AmYDuQBNQFjgfO\nAVKBi4CfjTF3W2tnBT50Caq8PM9C5IcyxtOhK2FL527+Kygo4Oabb+bNN98EYO/evYwePVqJXsKO\nPxX9SiARGA2MsdYuKe8HjDE1gT54rgBMN8bcaa0dWaVIJbR27y7dplFbYU/DLvyTn59P//79effd\nd71tV155Ja+99pqDUYlUjj+J/l3gSWvtZn93aq3dDbwOvG6M6Q5Uq2R84hRd441IelnLl5eXR9++\nfYsNtOvXrx+jR4/WSnQSlspN9NbaO6pyAGvtR1X5eXGIMkJE0st6eAcPHuSKK65g0qRJ3rabbrqJ\nV155hRhdzZIwVeF3rjEmpejS/OG2qWGMSal8WOI4deZGJCX6suXm5tKrV69iSX7gwIG8+uqrSvIS\n1irz7v0dKK/Kv71oOwlXyggRSQva+Jafn0/37t2ZNm2at+3uu+9m5MiRGFPy5iKR8FKZRG8ofVud\nRBqN2opIOn/zLS4ujjPPPNP7+IEHHuDZZ59VkpeIEKyZ8RoB+8rdStxLGSEi6WUt2yOPPEJubi7J\nyckMHTpUSV4ihl+J3hjTt0TTaT7aAGLxzJZ3NfBdFWMTJ6mPPiIp0ZfNGMPTTz+tBC8Rx9+K/h08\ns95R9L1b0VdJf/6F7AceqVJk4ixlhIikPnqP7du388wzz/Doo4+SkJDgbVeSl0jkb6K/rui7Ad4C\nPgKm+NiuANgGfG6t9ZEpSjPGdAJexHM1YLS19mkf21yGZ3Y+C/zPWnuln3FLZamPPiLp/A22bt1K\nhw4d+Pbbb/nll18YP3488fHxToclEjR+JXpr7Zg//22MuRb4yFo7tqoHN8bEAqOADsA64EtjzFRr\n7fJDtmkJDAbOs9buMMY0qOpxxQ/KCBGnoKDsCQ+jxebNm8nIyOD7778HYPLkySxcuJCMjAyHIxMJ\nngoPxrPWXhjA458FrLTW/gZgjBmPp0tg+SHb3AiMstbuKDr+lgAeX8qiRB9xfCX5GjUgLkoWq96w\nYQPp6en89NNPgOcy/VtvvaUkLxGv0n/ixphkoCdwOlAb2AV8A0y21vo74r4JsPaQx+uAs0ts06ro\neJ/iubw/zFo7u7Jxi580GC/iRPNLunbtWtLS0li5ciUAMTExjB07lquuusrhyESCr7Lr0WcCY4A6\nFL+n3gLPG2Ous9ZOD0B84ImxJZ4V8ZoCi4wxJ5ccA2CMuQnPIjqkpGhSvipTH33EidaXdNWqVaSl\npfH77545vOLi4hg3bhy9e/d2ODKR0KjMFLhnAJPwVPHv41mfvnPR9/eL2icaY9r4sbv1QLNDHjct\najvUOmCqtTbPWvs7sAJP4i/GWvu6tbattbZt/fr1K/hbSSm6dB9xovEl/fXXX2nfvr03ycfHxzNx\n4kQleYkqlZkZ70E8lfsF1tq+1tp3rLVzir73Bc4vev4BP/b1JdDSGHOMMSYBz9K2U0ts8xGeah5j\nTD08l/J/q0Tc4q/CQt8dutFynTdCRVuiX7FiBe3atWPNmjUAJCYm8tFHH9Gtm687g0UiV2US/QXA\nh9baxb6etNZ+AUws2u6wrLX5wEBgDvAj8IG19gdjzKPGmK5Fm80BthljlgPZwL3W2m2ViFv8tXs3\nWFu8rXr16Bm1FaGirY++WrVq3nvkq1WrxtSpU8nMzHQ4KpHQq8wndy2KD6DzZQ3g10071tqZwMwS\nbUMP+bcF7ir6klCI1s7cCBdtL2tKSgrz588nMzOTl19+mQsvDOQNQyLhozKJfgOe2+IOpy2wsRL7\nFjeItmu8USIaX9ZjjjmG7777jjhdjZIoVplL9zOBNGPM/UUT3ngZY2KMMXcDGZSo0iWMRGNGiAKR\n/rIuWbKEefPmlWpXkpdoV5m/gMeA7sATwM3GmP/DU703wjMQrzmwCXg8QDFKqEVbZ26UiORE/+mn\nn9K5c2fy8/OZOXMmqampTock4hqVmRlvkzHmPOA1PFPXHl1ik4+BW6y1unQfriI5I0SxSF3QZsGC\nBXTp0oV9+zzzdF133XX8/PPPxRarEYlmlbqmZa1dBVxkjGmCZ2a8WnhmxltqrS15H7yEm2gbtRUl\nIvH8bd68eXTt2pWcnBwAGjZsyLRp05TkRQ5Rpc6roqSuxB5pIjEjSMS9rDNnzqRnz57k5uYC0Lhx\nY+bPn8/xxx/vcGQi7lKZmfE+MMZ0NsZUZiCfhINIywgCRNbLOmXKFLp37+5N8s2aNWPRokVK8iI+\nVCZZ9wKmA+uNMc8aY04KcEziNA3Gi0iR0kc/ceJEevXqRV5eHgDNmzdn0aJFHHfccQ5HJuJOlUn0\nf8czEC8BuBv4nzHmK2PMbUVT1Eq4Ux99xLE2Ms7fxo0bR58+fcjPzwfg2GOPZdGiRTRv3tzZwERc\nrMKJ3lq7xFr7D6AxcBme++VPAV7EU+VPMsZ0N8bo5tVwFUnXeAWAvXs9SxgcKikJEhOdiaeyVq9e\nTUFBAQCtW7dm0aJFNGvWrJyfEolulU7G1tqDeOa0n2iMqQ9cDVyL5x77bsA2oEEggpQQU6KPOJHy\nkg4ePJjc3FwmTpxIVlYWDRs2dDokEdcLyIA6a+0f1trn8dxqdw+QD9QNxL7FAZFwjVeKiaSX9OGH\nH+aLL75QkhfxU0ASvTGmtTHmSWA18CwQD6wMxL7FAeqjjzjh+pL+97//5cCBA8XajDFUr17doYhE\nwk+lE70xprYxZoAxZjGwHLgfz4p1b+JZq751gGKUUIqUUVtSTDheun/66afp1asXvXr14uDBg06H\nIxK2KnMf/SXGmA/xzG//bzwr1c3D00ffyFp7k7X208CGKSGzbx8UDXbyqlbN8yVhK5wSvbWWRx99\nlMGDBwMwY8YMnnjiCYejEglflRmMN6Xo+wpgDDBW095GEFXzESlcEr21loceeognn3zS23bhhRcy\naNAgB6MSCW+VSfSvAWOstYsDHYy4QLh25sphhcNkOdZaBg0axIgRI7xtHTt2ZPLkySQnJzsYmUh4\nq8zqdQOCEYi4RLiUflIhbn9ZrbX885//ZOTIkd62iy++mIkTJ1JN3UYiVaJJbaQ4t2cEqRQ3v6yF\nhYX84x//4LXXXvO29ejRg/Hjx2sVOpEAKDfRG2PmAxa41lq7ruixP6y1Nr1K0UnouTkjSKW59WUt\nKCjgxhtv5O233/a2XXbZZbz33nvEx8c7GJlI5PCnok/Fk+iTD3nsD1uJeMRpGowXkdzaR79jxw7+\n7//+z/v46quv5u233yYuThcbRQKl3L8ma23M4R5LhNFgvIjk1oq+Xr16zJ8/n/bt25Oamsobb7xB\nbGys02GJRBSdNktxbs0IUiVuflmbNWvG4sWLqVevHjExqiNEAk1/VVKcmzOCVJpbXtYDBw7w+eef\nl2pv0KCBkrxIkFRlCtyrjDFZxpjtxpj8ou/zjDFXBTJACTH10Ucct8xqvH//frp160Zqaipz5swJ\n7cFFolhlpsCNN8ZMAcYCFwI1gD+KvqcBY40xU4wxGjIbjtRHH3EOHIC8vOJt8fGe9ehDZd++fXTp\n0oW5c+dy8OBBunfvzq+//hq6AESiWGUq+sHAJcAXeBJ9NWttY6AankS/BOgC3BeoICWE3HKNVwKm\nrJfUmNAcf8+ePXTq1Ins7Gxv2wMPPMCxxx4bmgBEolxlEn1fPEvQplprF1prCwCstQXW2gV4br/7\nDegXoBgllJToI46TL+nOnTvp2LEjn3zyibft6aefZsiQIaEJQEQqleibAlOstT7XjbTW5uJZ+KZJ\nVQIThyjRRxynXtLt27eTkZHB4sV/LYvx3HPPcd99utgnEkqVub1uA1Be/3t80XYSbtw6s4pUmhMv\n6R9//EGHDh343//+523797//za233hrcA4tIKZWp6McBvYwxNX09aYypDfQC3q9KYOKAAwcgN7d4\nW1wcaOWwsBbqin7Tpk1ceOGF3iRvjOH1119XkhdxSGUq+keBk4AlxphHgUXAZqAh0B4YgmdA3mOB\nClKqKCcH1q8vf7utW0u3hXLUVgSzFtauhYM+O7yC65dfSrcFM9HPnj2bH374AYCYmBjeeustrr32\n2uAdUEQOqzKJPqfouwHe9fG8AVoCB0zxBGGttZqJL5SshUcfhSefrHyGUf98lX3zDfTsCatXOx3J\nX4L5svbr148tW7bwwAMP8O6773LFFVcE72AiUq7KJN7/QwvWhIcvv4Rhw6q2D/XPV1n//u5K8hD8\nl3XQoEF07dqV448/PrgHEpFyVTjRW2tTgxCHBMO8eVXfR8OGVd9HFNu6FQ4Zj+YagXxZV61aRYMG\nDUguMZZDSV7EHTS5dCQLRBl56aVV30cUc1slD1CtGnTuHJh9/fTTT5x33nl069aNAwcOBGanIhJQ\n6jOPZKtWlW5r3Ni/UfR168KVV8J11wU8rGji6yWoXh0aNQp5KAC0agWDB0PTplXf1/fff09GRgab\nN29mw4YNXH311UycOLHqOxaRgCo30Rtj7gH+ba2t1Om6MeZ0oJG1dlZlfl6qwFc5OX06nHFG6GOJ\nUr5egquvhldfDX0sgfS///2PjIwMthbdqVG9enUGDhzocFQi4os/l+6fAH41xtxnjDnKn50aj4uM\nMZOBr4BTqxKkVIK1vrNM8+YhDyWa+arow/0l+Prrr7nwwgu9Sb5GjRrMmTOH1NRUZwMTEZ/8uXR/\nMvAc8BTwuDHmM+ATPAl8I7ADz4I2dYHjgb8D6UAjYBswEHgt4JHL4W3Z4pkA51BHHAFHHulMPFEq\n0s61Fi9eTKdOndhVNN1erVq1mDNnDmeffbbDkYlIWcpN9NbaFUAXY8y5wK3ApcAF+L7F7s8b538G\nhgNvW2v3BChWqYiySklNfhNSvl6Go48OeRgB8cknn5CZmcmePZ4/6Tp16jB37lzatGnjcGQicjh+\nD8az1n4GfGaMuQVoB5wPpOCp5HOALcAyYIG19ocgxCoV4auUDNcME8Yi5dL9ggUL6NKlC/v27QOg\nXr16zJs3j1NPVa+ciNtV5j76PcCMoi9xq0jJMGFs507Yvbt4W0JC+E1NUFhYyL333utN8g0bNiQr\nK4sTTzzR4chExB+6jz5SRVrncBgq67J9TJj91cXExDB16lRatWrFUUcdxcKFC5XkRcKIXxW9MaYv\n8K21dlmQ45FAiaTO4TAVSb0njRs3Zv78+eTk5HDcccc5HY6IVIC/tcU7QPdDG4wx1xpj5gc8IgkM\nVfSOC+fek40bN5Zqa9KkiZK8SBiqykXE5niWpRW3sVYVvQuEa0X/3nvv0aJFC6ZNm+Z0KCISAGHW\nWyh+2b4digZOeSUlQf36zsQTpcKxon/77bfp27cvBw4coFevXsyfr4t2IuFOiT4SlVXN6x76kAq3\niv61116jf//+WOuZIqN169acdNJJDkclIlWlRB+J1D/vCuFU0b/00kvccsst3sennXYa8+fPp0GD\nBg5GJSKBUJFE72smPHEj9c87bs8eTw/KoeLi4Ci/VosIrX/961/cfvvt3sdnnnkm8+fPp169eg5G\nJSKBUpEJc4YZY4aVbDTGFJSxvbXWahlcJ6iid5yvl6BZM4iNDX0sh/Pkk0/y4IMPeh+fc845zJo1\ni1q1ajkYlYgEUkUqelPBL3ULOEUVvePcfq5lrWXYsGHFkny7du2YM2eOkrxIhPGr4rbWKmmHk3Dq\nHI5Qbj/X+vLLL3nkkUe8j9PS0pg6dSrVq1d3MCoRCQYl8EgUbsO9I5Dbz7XOOussRo4cCUCnTp2Y\nPn26krxIhFIfeqTZuROK1gr3SkiARo2ciSdKhcO51m233UbTpk3JzMwkMTHR6XBEJEgqnOiNMacC\nVwJnAfXxjMb/A/gCGGet/S6gEUrF+MowKSnht5JKmHNbRV9YWEhOTk6pqr1Hjx4ORSQioeJ3ojfG\nxAIvATfx14C7Q7UH7jXGvAzcYf+cdUNCy20ZJkq5qaIvKCjg+uuv59dff2X27Nm6RC8SZSpS0Y8A\nbgEOAh8AC4D1eBL+UUAa0Au4FTgADApkoOInN2WYKLV/P2zZUrwtJgaaNg19LPn5+Vx77bWMGzcO\ngEsuuYQZM2aQlJQU+mBExBH+LlN7HHAbsBroZK392cdmbxljHgdmA3caY1611v4WuFDFL6roHbdm\nTem2Jk0gPj60ceTl5XHllVcyceJEb1uLFi1ISEgIbSAi4ih/O26vwVO59ysjyQNgrf0JuBaIBa6u\nenhSYaroHeeGc63c3Fx69+5dLMkPGDCA119/nVi3zdojIkHlb6I/F/jRWruwvA2LtlkOnF+VwKSS\n3JBlopzTk+UcOHCAnj17MmXKFG/bHXfcwahRo4jRoEyRqOPvX/3xeEbV++uLop+RUFNF7zgnJ8vZ\nv38/l1xyCTNnzvS2DRo0iOeffx6j1QtFopK/ib42sKXcrf6yGTiy4uFIlezdC9u2FW9z60oqEcyp\nin7v3r1cfPHFzJs3z9s2ZMgQnn76aSV5kSjm76j76kBOBfabCyRXPBzxy4YNsGlT6XZfpWTTpp5k\nLyHjVEV/9913s2DBAu/jxx57jIceeij4BxYRV1MGCCc5OXDFFXBI32u51D8fck5V9I8//jiffvop\nP/zwA8888wz33ntv8A8qIq5XkUTf3RjT3M9tT694KFKusWMrluRB/fMhlpvrueBSUrNmwT92/fr1\nycrKYtasWfTr1y/4BxSRsFCRRH9a0Ze/NDNeoB1yWdZvxx0X8DCkbGvXlm5r3BiCMZV8fn4+cSW6\nZRo2bKgkLyLF+JvorwtqFOIfX52/hxMfD5ddFpRQxLdQ3d24adMmLrroIh566CF69+4d+AOISMTw\ndz36McEORPzgK4ucdJLvKdeOPhr++U9o1SroYclfQpHo169fT1paGitWrODKK68kPj6e7t27B/Yg\nIhIxHB+MZ4zpBLyIZza90dbap8vY7lJgInCmtfarEIboDgcOlB5pbwx8/bVnGVpxhWBPY7BmzRrS\n0tL49ddfAbDWcuDAgcAdQEQijt/TZBlj/mGMGWyMKXPGbmNMQtE2A/zcZywwCugMnABcYYw5wcd2\nNYA7qNikPZHF1wTqRx2lJO8ywazof//9d9q3b+9N8nFxcUyYMIE+ffoE5gAiEpH8SvTGmHPxLFGb\naK3NK2s7a+1BIAH4tzHmbD92fRaw0lr7W9HPjge6+djuMWA4nlXxopNmvAsLwbq17pdffqFdu3as\nKjqTSEhIYNKkSVx66aVV37mIRDR/K/prgb14lqotzwhgD9Dfj22bAIeOU15X1OZljDkDaGatneFf\nqBFKc9iHhWBMlvPjjz/Svn171q1bB0BiYiJTpkzhkksuqdqORSQq+NtHfwGQZa3dW96G1tp9xpis\nop+pEmNMDPAc0M+PbW8CbgJISUmp6qHdRxW96+Xlwfr1pdur8jJ9//33pKens6VogfukpCSmTZtG\nenp65XcqIlHF34o+BfilAvtdWfQz5VkPHDqVSNOitj/VAE4CFhhjVgF/B6YaY9qW3JG19nVrbVtr\nbdv69etXINQwoYre9datg8LC4m0NGkBSUuX2t2vXrmJJvnr16syaNUtJXkQqxN9EH0vFJsCxfu77\nS6ClMeYYY0wC0AeY6t2JtbustfWstc2ttc2BxUDXqBx1r4re9QLdP1+rVi0ef/xxAGrUqMHcuXNp\n37595XcoIlHJ30v3fwDHVmC/xwJby9vIWptvjBkIzMFzMvGWtfYHY8yjwFfW2qmH30MUUUXvesHo\nn7/xxhsBOPXUUznrrLOqtjMRiUr+JvovgQ7GmFrW2l2H29AYUwvoAMw73HZ/stbOBGaWaBtaxrap\nfkUbaQ4e9D2BeiSORQhjgajorbWllpT9M9mLiFSGv5fu/wPUxHPPe3n+jadv/T+VDUpK8NX527Bh\n5Tt/JSiqWtFnZWWRnp7O7t27AxaTiIi/if6/wGd4JrRZaIzJKOpTB7wT5WQYYxYAVwKfWmv/G/hw\no5RT655KhVSld2XOnDl06dKF7OxsOnfuzN695d7gIiLiF3/nurdFU9DOwXPb3Bwg3xizrWiTukX7\nMsD/gF5BiDV6BaPzVwKusuMlp0+fzqWXXsrBgweL9rOaLVu2cMQRRwQ4QhGJRn5PgWut3QycAwzB\nM8lNPNCo6Cu+qO0h4Fxr7ZbAhxrFNBDP9QoKfC9RW16inzx5Mj179vQm+ZSUFBYuXEiLFi2CEKWI\nRKMKLWpjrc0BngCeMMY0BRoXPbXRWrsu0MFJEd1a53obNkB+fvG2OnWgRo2yf2bChAlcddVVFBQU\nANCiRQvmz5/P0XptRSSAKr16XVFiV3IPBVX0rlfRl+jdd9+lX79+FBYNsmzVqhVZWVk0bdo0KPGJ\nSPTyd1GbdsYYv+/lMsacYozpW/mwpBhV9K5XkZforbfe4tprr/Um+RNOOIEFCxYoyYtIUPjbR59N\nifnmjTH3HTIYr6QewNtViEv+lJ9fuc5fCSl/K/qJEydy/fXXY61nosmTTz6Z7OxsGjduXHpjEZEA\n8DfRGx9t1YDaAYxFfNmwwTPS61B164JGZLuKv3dApqenc/rppwNwxhlnkJ2dTYMGDYIbnIhENb9H\n3YtD1D8fFvy9A/LII49k7ty59OvXj6ysLOrWrRv02EQkulV6MJ6EiCbLCQsVeZnq1avH22+rZ0tE\nQkMVvdtpshzXKyz0nehTUiwPP/wwY8eODX1QIiJFVNG7nSp619u0ybPu0KFq1rQMHz6Y4cOHExMT\nQ0JCAn369HEmQBGJahWp6CuyHr0Eiip61yt9LmZJSLiL4cOHA1BYWMi4ceO8I+1FREKpIhX9MGPM\nsJKNxpgCH9tKoKiid73i52KFwG1s3fqyt6Vr16588MEHpZafFREJhYok+op+Sql8qaqyOn9V0bvK\nXy9RIXAzMNr73KWXXsq4ceNISEjw8ZMiIsHn7+p1GrTnhI0bIS+veFvt2lCrljPxiE+eir4A6A/8\nNfDuiiuuYOzYscTFaSiMiDhHn0Bupmo+LPz+ez7QF/iPty019VreffdNYmNjHYtLRASU6N0tTCfL\n2b8fvv229Ej0SGSt5dNPrwYmHNJ6A8888xqxsboQJiLOU6J3szCs6LOyoEcP2LPH6UhCxQBdgA/w\nDEv5B/ASxxyjJC8i7qBE72ZhVtFbC7feGk1J/k9XA3nA98AIqlc3aGZbEXELJXo3C7OKfutW+Pln\np6NwynXef7VqBbqTTkTcQtcX3SzMKnpf4UaevcCtwPYyt7juujKfEhEJOVX0bmVt2E2W4yvcevXg\nxBNDH0sw5Ofv5rvvMtm9+1Nq1FjCKafMIy7ur1sda9SA7t2hf38HgxQRKUGJ3q22bIEDB4q3HXEE\nHHmkM/H4wVdF36sXvPJKyEMJuB07dtCpUyd2714CwJ49XzFgwHSuuuoqhyMTETk8JXq3Kquad3Hn\nb5gNKfDbtm3b6NChA0uXLvW2vfjii0ryIhIWlOjdKgwXswmzIQV+2bJlCxkZGXz33XfetldffZWb\nb77ZwahERPynRO9WYdY/D2EZ8mFt3LiR9PR0fvzxRwCMMYwePZr+6oQXkTCiRO9WYVbRWxt2IR/W\n+vXrSUtLY8WKFQDExMQwZswYrr76aocjExGpGCV6twqz6+A7d5aeKCcxERo2dCaeqli7di2pqan8\n9ttvAMTGxvL+++9z+eWXOxyZiEjF6T56twqzkW2+zktSUiAmDN9htWrVon79+gDEx8fz4YcfKsmL\nSNgKw4/hKFDWdXAXV/SR1D9fs2ZNZs+ezbnnnsukSZPo0aOH0yGJiFSaLt270fbtsG9f8bakJCiq\nMt0okvrnAWrXrs0nbxysxwAAHRNJREFUn3yCcfHtjCIi/lBF70ZlZU0XJ51wrui/++473nnnnVLt\nSvIiEglU0btRmPXPQ/hW9EuXLqVDhw5s27YNgH79+jkbkIhIgKmid6Mw65+H8KzolyxZQlpamjfJ\n33XXXezYscPhqEREAkuJ3o3CMGuGW0X/2WefkZGRwc6dOwFPn/zcuXM50sVrCYiIVIYSvRuFWdbc\nvRtKFsJxcXDUUc7EU55FixbRsWNH9hTd+F+3bl2ys7Np27atw5GJiASeEr0bhVlF7yvcZs0gNjb0\nsZQnKyuLTp06sa/oroYGDRqwYMECTjvtNIcjExEJDiV6Nwqzij5czktmz55Nly5dyMnJAaBx48Ys\nXLiQk046yeHIRESCR4nebXbuhF27irclJECjRs7E44dwGDs4bdo0unXrxoEDBwBo2rQpCxcu5Pjj\nj3c4MhGR4FKidxtf5bHL55INh7sBc3NzKSgoAKB58+YsWrSIli1bOhyViEjwuTd7RKtwKI9LCIeQ\ne/XqxdixY2nVqhULFy7kmGOOcTokEZGQUKJ3m3Aoj0sIl5CvvPJKli1bRkpKitOhiIiEjBK924RD\neVyCG0P+4IMP+OOPP0q1JyYmOhCNiIhzlOjdJlzK4yL79kHJfBoTA02aOBMPwKhRo7j88svJyMjw\nznonIhKtlOjdxo3l8WGsWVO6rWlTiI8PfSwAzz//PAMHDgRg2bJlDBo0yJlARERcQonebcKsonfT\nLf/Dhw/nrrvu8j7++9//zr/+9S9nghERcQklejfZuxdKXmp281yyuGeynMcee4z777/f+/j8889n\nzpw51K5dO/TBiIi4iBK9m/jKmk2bepK9Szld0VtrGTJkCEOHDvW2XXjhhcyaNYuaNWuGLhAREZdy\nbwaJRmHWPw/OVvTWWu677z6effZZb1uHDh346KOPSE5ODk0QIiIup0Qfavv2wZIlULRyWjFz5pRu\nc2Gi37gRvvkGCgpg2bLSz4ciZGstd955Jy+++KK3LTMzk//+979Uq1Yt+AGIiIQJJfpQ+v576NAB\nNm3y/2dcNhDvzTfhppugsLDsbUIRcm5uLkuXLvU+7tatGxMmTNB98iIiJSjRh9LDD1csyYOrKvr9\n++Guuw6f5I3xLFEbbNWqVWP69OlcdNFFNG3alPfff594p+7pExFxMSX6UPrss4r/TOvWgY+jkpYv\nh927D7/NMcdAqIrqGjVqMGfOHJKSkohz8YBFEREnadR9qBw4UPFq/tRT4eyzgxNPJfgaeFfSzTcH\n59h5eXnMmjWrVHuNGjWU5EVEDkOfkKHiawq55GRITy/dbgycfLLnOrmLlqf1dVNAixZw4omeX6Vz\nZ+jbN/DHPXjwIFdccQWTJk3ilVde4ZZbbgn8QUREIpQSfaj4ypKnnw5Tp4Y8lMry9SvcdBPcd1/w\njpmbm0vv3r2ZNm0aAAMGDOCUU07h3HPPDd5BRUQiiBJ9qITZ1La+hPpXyMnJoUePHsw55LbDu+66\ni3POOSd4BxURiTDuuS4c6cJwMpySQvkr7Nu3jy5duhRL8oMHD2bEiBEYY4JzUBGRCKREHyphXtFb\nG7pfYc+ePXTu3Jn58+d724YNG8YTTzyhJC8iUkG6dB8qYV7R79xZ+ta6xERo2DCwx9m1axedO3fm\n888/97Y9+eSTDB48OLAHEhGJEkr0oRLmFb2v8FNSAntTwI4dO7jooov48ssvvW0jRozg7rvvDtxB\nRESijBJ9KBw8COvXl25PSQl9LJUUigsS33zzTbFpbUeOHMltt90W2IOIiEQZ9dGHwrp1nk7uQzVs\nCElJzsRTCaG4IJGens748eOJj4/ntddeU5IXEQkAVfShEOb98xC6X+HSSy/ll19+4egw6tYQEXEz\nVfSh4OSi7QESjF9h7dq1bNy4sVS7kryISOAo0YeCr3I4zJJZoH+FVatW0b59e9LT09myZUvldyQi\nIoelRB8KquiL+fXXX2nfvj2///47P/74I5mZmRQebu1bERGpNCX6UAjzin73bti+vXhbXBw0blzx\nff3888+0a9eONUWL/CQmJvLoo48S46LFe0REIonjn67GmE7GmJ+NMSuNMff7eP4uY8xyY8wyY0yW\nMSZ8MuSfwnwwXln30MfGVmw/y5cvp3379mzYsAGAatWqMXXqVDIzMwMQpYiI+OJoojfGxAKjgM7A\nCcAVxpgTSmy2FGhrrT0FmAg8E9ooqyg/33N7XUlhVNEH4ta6ZcuWkZqayubNmwFITk5m5syZdOzY\nMQARiohIWZyu6M8CVlprf7PWHgTGA90O3cBam22t3V/0cDHQNMQxVs369VBQULytXj34//buPjqq\n8trj+HdDSECpIIooYIBapNRai7KsWr3ACAURoSrVaFGkXPXSRRdq0bay2nr1Vmu5otel2MqtglhR\nUo3FokUUAr0WWCpUW99TUQKUSoFAFRFC9v1jDmGSTMIkc+b991krKzPPeebMnsfgnv2c55xz+OGZ\niacNkp2QWLt2LcOGDWPr1q0AdO7cmSVLljBs2LBQ4hMRkeZlOtH3Aqpjnm8M2pozGXgupRGFLccv\nfQvJfYQ1a9YQiUTYHhzk79KlC0uXLuXss88OMUIREWlOzlwwx8wmAIOBIc1svwa4BqA0my4tm+PH\n5yG5jzBz5kx27twJwJFHHsnSpUs57bTTQotNRERalumKfhNwfMzz3kFbA2Y2HJgBjHX3z+LtyN0f\ndPfB7j64e/fuKQm2TQq8on/kkUcYOnQoRx99NMuXL1eSFxFJs0xX9C8D/c2sH9EEXwZcHtvBzAYB\nvwJGuXvuXVmlwCv6ww47jGeeeYZNmzYxYMCAMMMSEZEEZLSid/daYCqwBHgLWOjub5jZrWY2Nug2\nE+gMlJvZn81sUYbCbZscr+h374ZgDV29du2gVzMrKd5///0mbZ07d1aSFxHJkExP3ePuz7r7ie5+\ngrv/LGj7ibsvCh4Pd/ce7v7V4Gdsy3vMMjle0cf7ntK7N3To0LT9d7/7HQMHDuTuu+9OfWAiIpKQ\njCf6vFZXB8EV4BrIoYo+0UvflpeXM378ePbu3csNN9zA3LlzUx2aiIgkQIk+lf7+d9i3r2Fb167Q\npUtm4mmDRK7e+9hjj1FWVkZtbS0AJ5xwApFIJPXBiYjIISnRp1IB3Mxm3rx5TJgwof6mNAMGDGDl\nypXZdYqjiEgBU6JPpRy/mQ20/BHmzJnDpEmTcHcATjrpJFasWEHPnj3TF6CIiLRIiT6VcnwhHjT/\nEe6//36uueaa+iR/yimnsHz5cnr06JHW+EREpGVK9KmU46fWQfyP8OKLs5g6dWr988GDB7Ns2TKy\n6kJFIiICKNGnVo5X9Hv2RNcTNvQBs2bdXP/sjDPO4IUXXqBbt25pjU1ERBKjRJ9KOV7RV1c3bevZ\nsy9PPfUUHTp04JxzzuH555+nSw6dRSAiUmgyfQnc/OWe84m+uYV4o0ePZunSpQwePJjDc+h2uyIi\nhUiJPlU++ig69x2rc2fIoSnu6PcUB3YA0bgPHHkYMiTuTQRFRCTLaOo+VZo7Pm+W7kjabP16B64D\nTgc2Azm1xEBERFCiT50cn7avq6ujvHwKcC/wNyAC/DOXPoKIiKCp+9TJ4RX3+/fv5+qrr+a99x6O\naT0F6JIrH0FERAJK9KmSoxV9bW0tkyZN4tFHH41pnQA8DBQp0YuI5Bgl+lTJwYp+3759TJgwgYUL\nF8a0TgLmAO0B0CXsRURyixJ9quRYRb93717KysqoqKiIab0WmM2BpRw9ekCnTpmITkRE2kqL8VLB\nPacq+j179nDRRRc1SPIXXvg94AFi/0Sy+HuKiIg0Q4k+FbZtg08+adjWqRNk6bXgZ86cyeLFi+uf\nT58+nTFj/gdoeCpgln5PERGRFijRp0Jz0/ZZeg79jTfeyMiRIwG4+eab+cUvfsGGDU1jVUUvIpJ7\ndIw+FXLsPvQdO3akoqKC8vJyrrjiCswsl448iIhIC1TRp0K8ij6LsuSexpfmBTp16sSVV16JBbMO\nObaWUEREmqFEnwpZXNFv376ds88+m9tvv73FfqroRUTyg6buUyFLK/qtW7cyYsQIXnvtNV599VWK\ni4uZPn16k361tbBxY9PXZ8l3FRERaQUl+lTIwop+y5YtDB8+nDfeeAMAM6Nr165x+27eHE32sY46\nKnrzPRERyS1K9KmQZRX95s2biUQivPPOOwC0a9eOhx56iIkTJ8btr+PzIiL5Q4k+bDU1sHNnw7bi\nYjj22IyEU11dTSQSoaqqCoD27dszf/58LrvssmZfo+PzIiL5Q4k+bPHK4dJSaJf+dY/r168nEonw\nQZC5i4qKWLBgAePHj2/xdVk2ISEiIklQog9blhyfr6qqIhKJUF1dDUCHDh0oLy9n3Lhxh3xtlnwE\nEREJgRJ92LKgHK6treW8886rT/IlJSVUVFRw3nnnJfR6Td2LiOQPnUcftiwoh4uKinjggQcoKSmh\nU6dO/P73v084yYMW44mI5BNV9GHLgooeYPjw4Tz99NN07NiRoUOHJvy6ujrYsKFpuxK9iEhuUqIP\nW4bmvevq6mjXaMHfqFGjWr2fLVtg796GbV26QDOn3IuISJbT1H3YMjB1v3r1agYNGsSH8WYTWknH\n50VE8osSfZj+9S/Yvr1hW1ER9OyZsrf84x//yIgRI3j99deJRCJsjHft2lbQ8XkRkfyiRB+meFmy\nd+9osk+BZcuWMWrUKD7++GMAdu3axY4dO5Lapyp6EZH8okQfpjQuxHv++ec5//zz2b17NwA9evSg\nsrKSk08+Oan9qqIXEckvSvRhStPx+cWLF3PBBRfU31e+Z8+erFixgpNOOinpfauiFxHJL0r0YUpD\nRV9RUcGFF17I3mBpfGlpKStXrmTAgAGh7F8VvYhIftHpdWFKcUW/cOFCLr/8cvbv3w9Av379WL58\nOX1a8R61tVBZCe++G3+7KnoRkfyiRB+mFFb0lZWVXHbZZdTV1QHQv39/li1bRu/evRPeR10dfPOb\nsHhx4u/buTN069baaEVEJFto6j5MKazozzrrLMaMGQPAwIEDWbFiRauSPMDKla1L8hAN36x1rxER\nkeyhRB+WTz+Fjz5q2GYWPb0uBMXFxSxcuJBp06ZRWVnJcccd1+p9vPpq6983hPV9IiKSQZq6D0u8\naftevaC4OLS3KCkp4Z577mnz6+NNOLSkuBimTm3z24mISBZQog9LyMfn77rrLmpqarjtttvaHlMj\n8UIcNQr69Wva3q0bXHwxDBoU2tuLiEgGKNGHJcTj87fffjszZswAolP2P/7xj5MI7KB4If70p3DG\nGaHsXkREspCO0YclhPPS3J1bbrmlPskDvPDCC/XnzCfDXafOiYgUIlX0YUnySjPuzowZM7jjjjvq\n2yKRCIsWLaI4hOP8NTXRe+7EKimBY45JetciIpLFlOjDkkS57O5Mnz6dWbNm1beNHDmSiooKOnXq\nlLLw+vSBdprTERHJa0r0YWljRV9XV8e0adO477776tvGjBlDeXk5HTt2zHR4IiKS45Tow/DZZ7B5\nc9P20tIWX1ZXV8eUKVN48MEH69suuugiFixYEMp0fSwdnxcRKUyauA1DdXXTtmOPhUNU5NOmTWuQ\n5MvKynj88cdDT/Kgil5EpFAp0YehjeXyJZdcwmGHHQbAFVdcwfz58+nQoUO4sQVU0YuIFCZN3Yeh\njRfLOeecc3jmmWd48sknuffee2nfvn34sQVU0YuIFCYl+jAkcbGcSCRCJBIJN544VNGLiBQmTd2H\nIYGKfs+ePXznO9+hqqoqPTHF2LULduxo2FZUBG24L46IiOQYJfowHKKi3717N2PHjuXhhx8mEonw\nQWvvLpOkeN9DSkshhUcKREQkSyjRh6GFefGPP/6Y888/n6VLlwJQXV1NRUVF+mIj1Mvwi4hIjtEx\n+mTt2webNjVtLy1l165djB49mpdeeqm++dZbb+X6669PY4Ch31hPRERyiBJ9sjZuhLq6hm3du1Oz\nbx+jRo1izZo19c133nknN910U5oDVEUvIlLIlOiTFadc3tarF98491zWrl1b33b33Xdz3XXXpTOy\neqroRUQKlxJ9shqVyx8BI9av5/WdO+vbZs+ezZQpU9IbVwxV9CIihUuJPlkx5fI2YBjwZpDkzYw5\nc+YwefLkzMQWUEUvIlK4tOo+WTHlchfgpOBxu3btmDdvXsaT/CefwNatDdvatYNevTITj4iIpJcq\n+mTFlMtFwG8Avv51Lpo6lbKyskxFVW/DhqZtvXtDii6pLyIiWUaJPlmNDoB3AJ6YPRv7ylcyEk5j\nOj4vIlLYNHWfhPfefpufffgh3qjdsugAuI7Pi4gUNlX0bfTWW28RGTKELXV1fAL8DDCAI4+EI47I\nbHAxVNGLiBQ2VfRt8Je//IUhQ4awJVjldg+w/sDGLCuXddc6EZHCpkTfSuvWrWPYsGFsDZL84cBz\nwOcPdMiycln3oRcRKWxK9K3w8ssvE4lE2LZtGwBHlJTwPDAktlOWlcuq6EVEClvGE72ZjTKzd8ys\nysx+GGd7iZk9EWxfY2Z90x8lrFq1iuHDh1NTUwNA165dWTpyJGc17phF5fKePbBlS9P2449Pfywi\nIpIZGV2MZ2btgfuBEcBG4GUzW+Tub8Z0mwzscPcvmFkZcCdwaapj2/lhDeuu/SUAr21fz4/WzuXT\n/XsBOKJDJ+4cOIETVi1t8ronX+3Lez9PdXSJCb6TNNCzJ5SUpD8WERHJjEyvuj8dqHL39wHM7HFg\nHBCb6McBtwSPfwvcZ2bm7o3PagvVzvXbGbrkRywDbgY+Ddq7Ay/u+5STV90X93X/9Wgf/pzKwJKU\nRRMOIiKSBpmeuu8FVMc83xi0xe3j7rXATuCotEQHdAbaB4+PBSqBk1vo/wF9UxxRcnR8XkSksGQ6\n0YfGzK4xs1fM7JWtjS/unoTTgT8AXwRWAF9qoe9GelFD19DeOxVObulbioiI5J1MJ/pNQOzSsN5B\nW9w+ZlZE9N4x2xrvyN0fdPfB7j64e/fuoQZ5FvBX4MRD9Ludmwkum5OVevaEiRMzHYWIiKRTpo/R\nvwz0N7N+RBN6GXB5oz6LgInAKmA8sCzVx+cBOvfqQuXXfpBQ39qijvytdBhHlA4hsVekX69eMH48\nHHdcpiMREZF0ymiid/daM5sKLCF6KPwhd3/DzG4FXnH3RcCvgflmVgVsJ/plIOW69T+KoasTXz4/\nPIWxiIiItFWmK3rc/Vng2UZtP4l5vAf4VrrjEhERyQeZPkYvIiIiKaRELyIikseU6EVERPKYEr2I\niEgeU6IXERHJY0r0IiIieUyJXkREJI8p0YuIiOQxJXoREZE8pkQvIiKSx5ToRURE8pgSvYiISB5T\nohcREcljSvQiIiJ5TIleREQkj5m7ZzqG0JnZVuDDEHd5NPDPEPdXqDSOydMYJk9jmDyNYfLCHsM+\n7t493oa8TPRhM7NX3H1wpuPIdRrH5GkMk6cxTJ7GMHnpHENN3YuIiOQxJXoREZE8pkSfmAczHUCe\n0DgmT2OYPI1h8jSGyUvbGOoYvYiISB5TRS8iIpLHlOhjmNkoM3vHzKrM7IdxtpeY2RPB9jVm1jf9\nUWa3BMbwBjN708xeN7MXzaxPJuLMZocaw5h+F5uZm5lWP8eRyDia2SXB3+MbZvZYumPMdgn8ey41\ns+Vmti74Nz06E3FmKzN7yMw+MrO/NrPdzOzeYHxfN7NTUxKIu+sneviiPfA34PNAMfAa8KVGfb4L\n/DJ4XAY8kem4s+knwTEcBhwWPJ6iMWz9GAb9PgesBFYDgzMdd7b9JPi32B9YBxwZPD8m03Fn00+C\nY/ggMCV4/CXgg0zHnU0/wL8BpwJ/bWb7aOA5wIAzgDWpiEMV/UGnA1Xu/r677wUeB8Y16jMOmBc8\n/i1wrplZGmPMdoccQ3df7u67g6ergd5pjjHbJfJ3CHAbcCewJ53B5ZBExvFq4H533wHg7h+lOcZs\nl8gYOnBE8LgLsDmN8WU9d18JbG+hyzjgEY9aDXQ1s+PCjkOJ/qBeQHXM841BW9w+7l4L7ASOSkt0\nuSGRMYw1mei3WTnokGMYTO8d7+6L0xlYjknkb/FE4EQze8nMVpvZqLRFlxsSGcNbgAlmthF4Fvhe\nekLLG639f2abFIW9Q5FEmNkEYDAwJNOx5BIzawfMAq7KcCj5oIjo9P1QojNLK83sZHevyWhUueUy\nYK6732VmZwLzzezL7l6X6cDkIFX0B20Cjo953jtoi9vHzIqITlVtS0t0uSGRMcTMhgMzgLHu/lma\nYssVhxrDzwFfBirN7AOix/UWaUFeE4n8LW4EFrn7PndfD7xLNPFLVCJjOBlYCODuq4CORK/hLolJ\n6P+ZyVKiP+hloL+Z9TOzYqKL7RY16rMImBg8Hg8s82BFhQAJjKGZDQJ+RTTJ65hoUy2OobvvdPej\n3b2vu/clus5hrLu/kplws1Yi/56fJlrNY2ZHE53Kfz+dQWa5RMZwA3AugJkNJJrot6Y1yty2CLgy\nWH1/BrDT3f8e9pto6j7g7rVmNhVYQnS16UPu/oaZ3Qq84u6LgF8TnZqqIrrAoixzEWefBMdwJtAZ\nKA/WMW5w97EZCzrLJDiGcggJjuMS4Btm9iawH7jR3TVDF0hwDL8PzDGz64kuzLtKxc9BZraA6JfJ\no4N1DD8FOgC4+y+JrmsYDVQBu4FJKYlD/01ERETyl6buRURE8pgSvYiISB5TohcREcljSvQiIiJ5\nTIleREQkjynRi0iLzOyq4C55V2U6FhFpPSV6EclKZlZpZjr/VyRJumCOiBxKBdEr8IV+xS4RST0l\nehFpkbvvJHqnRhHJQZq6FykwZtY3OOY+18y+aGZPm9l2M/vEzP7PzL7RqH+DY/Rm1tHMaszso+Dm\nTvHe44HgNWMatZ9rZn8I3u8zM3vXzH5uZl0ax0dwZ8NgPwd+KkMeDpG8p0QvUrj6AauAbkRvNFQO\nnAY8Z2aXNvcid98DPAF0B85rvN3MSoBLgX8Af4hpvxZYCnyd6A1l7iZ6z4gfAH8ys65B1xrgP4EP\ng+f/GfMzt02fVKSA6Vr3IgXGzPoC64On/+3uN8ZsG0w0+X8M9HH3XUEl/zAwyd3nBv3OBP4EPOnu\n4xvt/1tEb106y92/H7T1IXob2M+A09397Zj+s4EpwBx3vyamvRIY4u4W1mcXKUSq6EUK107g1tiG\n4Ha3vwG6Ahc298Lg3uPvAheYWbdGmw/cynleTNsEoBi4LzbJB2YA/wKuCGYDRCRESvQihWutu/8r\nTntl8HvQIV4/j2jyrr9ds5n1AEYC69z99Zi+pwa/lzXeibvvANYRvZf5FxOKXEQSpkQvUrj+0Uz7\nluB3l2a2H/AIUMfBCh7g20TP5pnXqO+BfTV3it6B9q7NbBeRNlKiFylcPZppPzb43eIpde6+kWiF\nfrqZHajEJwL7gMcadT+wr2OJ77hE3lNEWk+JXqRwnWpmn4vTPjT4vS6BfcwNfk80s68CXwGec/et\njfod2NfQRu0Eq+2/CuwB3orZtD/Y3j6BOESkGUr0IoWrC/CT2IZg1f23iVbWFQns4ylgF9HFdlcF\nbXPj9HuUaKX/PTP7QqNttwFHAI+6+2cx7duC36UJxCEizdCV8UQK10rg383sa8BLRKfPLyVaAFzr\n7rsOtQN3/9TMyoHJwHeJJufFcfp9YGbXAfcDa81sIbCV6EVxzgTeJno+fawXgW8BT5nZs8CnwIfu\nPr8tH1akUKmiFylc64GzgB3AfwCXAGuB0e7+RCv2Mzf43QFY4O5743Vy99lEV+SvBi4GbgCOAWYC\nZ7r79kYv+V/gDqIzDzcRrfwntyIuEUEXzBEpODEXzJnn7ldlNBgRSTlV9CIiInlMiV5ERCSPKdGL\niIjkMR2jFxERyWOq6EVERPKYEr2IiEgeU6IXERHJY0r0IiIieUyJXkREJI8p0YuIiOSx/wdSwEuL\nP6kZDwAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 576x576 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig = plt.figure(figsize=(8, 8))\n",
+    "results = pd.concat(dfs)\n",
+    "pivot_plot(results, fig=fig);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "all,-slideshow",
+   "formats": "ipynb,Rmd"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/doc/source/learning/Learning1.Rmd b/doc/source/learning/Learning1.Rmd
deleted file mode 100644
index 359cbe982..000000000
--- a/doc/source/learning/Learning1.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-jupyter:
-  jupytext:
-    cell_metadata_filter: all,-slideshow
-    formats: ipynb,Rmd
-    text_representation:
-      extension: .Rmd
-      format_name: rmarkdown
-      format_version: '1.1'
-      jupytext_version: 1.1.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-# Learning 1
-
-```{python}
-import numpy as np
-print('notebook 1')
-```
-
-```{python collapsed=TRUE}
-
-```
diff --git a/doc/source/learning/Learning1.ipynb b/doc/source/learning/Learning1.ipynb
deleted file mode 100644
index 6ead7af9e..000000000
--- a/doc/source/learning/Learning1.ipynb
+++ /dev/null
@@ -1,63 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Learning 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "notebook 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "print('notebook 1')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "cell_metadata_filter": "all,-slideshow",
-   "formats": "ipynb,Rmd"
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/doc/source/learning/Learning2.Rmd b/doc/source/learning/Learning2.Rmd
deleted file mode 100644
index aca1f8f4d..000000000
--- a/doc/source/learning/Learning2.Rmd
+++ /dev/null
@@ -1,26 +0,0 @@
----
-jupyter:
-  jupytext:
-    cell_metadata_filter: all,-slideshow
-    formats: ipynb,Rmd
-    text_representation:
-      extension: .Rmd
-      format_name: rmarkdown
-      format_version: '1.1'
-      jupytext_version: 1.1.1
-  kernelspec:
-    display_name: Python 3
-    language: python
-    name: python3
----
-
-# Learning 2
-
-```{python}
-import numpy as np
-print('notebook 2')
-```
-
-```{python collapsed=TRUE}
-
-```
diff --git a/doc/source/learning/Learning2.ipynb b/doc/source/learning/Learning2.ipynb
deleted file mode 100644
index 66c0d95dc..000000000
--- a/doc/source/learning/Learning2.ipynb
+++ /dev/null
@@ -1,63 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Learning 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "notebook 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "print('notebook 2')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "cell_metadata_filter": "all,-slideshow",
-   "formats": "ipynb,Rmd"
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/doc/source/learning/index.rst b/doc/source/learning/index.rst
index 8a74213aa..7ee107c8a 100644
--- a/doc/source/learning/index.rst
+++ b/doc/source/learning/index.rst
@@ -8,5 +8,5 @@ case considered above.
 .. toctree:: 
    :maxdepth: 2
 
-   Learning1.ipynb
-   Learning2.ipynb
\ No newline at end of file
+   Basic_example.ipynb
+   Full_model_LASSO.ipynb
\ No newline at end of file
diff --git a/selectinf/learning/core.py b/selectinf/learning/core.py
index 7ad6b4b68..a293dc9fd 100644
--- a/selectinf/learning/core.py
+++ b/selectinf/learning/core.py
@@ -364,22 +364,8 @@ def _inference(observed_target,
     else:
         weight_val = np.squeeze(weight_fn(target_val))
 
-    if DEBUG:
-        import matplotlib.pyplot as plt, uuid
-        plt.plot(target_val, weight_val)
-        id_ = 'inference_' + str(uuid.uuid1())
-        plt.savefig(id_+'_prob.png')
-        plt.clf()
-
     weight_val *= ndist.pdf((target_val - observed_target) / target_sd)
 
-    plt.plot(target_val, weight_val)
-    plt.plot(target_val, ndist.pdf((target_val - observed_target) / target_sd), label='gaussian')
-    plt.plot([hypothesis], [0], '+', color='orange')
-    plt.legend()
-    plt.savefig(id_+'_dens.png')
-    plt.clf()
-
     exp_family = discrete_family(target_val, weight_val)
 
     pivot = exp_family.cdf((hypothesis - observed_target) 
@@ -474,7 +460,13 @@ def repeat_selection(base_algorithm, sampler, min_success, num_tries):
     return set(final_value)
 
 
-def cross_inference(learning_data, nuisance, direction, fit_probability, nref=200, fit_args={}):
+def cross_inference(learning_data, 
+                    nuisance, 
+                    direction, 
+                    fit_probability, 
+                    nref=200, 
+                    fit_args={},
+                    verbose=False):
 
     T, Y = learning_data
 
@@ -514,7 +506,8 @@ def new_weight_fn(nuisance, direction, weight_fn, target_val):
 
         weight_val = new_weight_fn(d_T)
         exp_family = discrete_family(d_T, weight_val)
-        print(ref_Y)
+        if verbose:
+            print(ref_Y)
         pval = [exp_family.cdf(0, x=t) for t, y in zip(ref_T, ref_Y) if y == 1]
         pvalues.append(pval)
 
diff --git a/selectinf/learning/learners.py b/selectinf/learning/learners.py
index c34a80d5a..717ab1e08 100644
--- a/selectinf/learning/learners.py
+++ b/selectinf/learning/learners.py
@@ -191,7 +191,8 @@ def learn(self,
               fit_probability,
               fit_args = {},
               B=500,
-              check_selection=None):
+              check_selection=None,
+              verbose=False):
                   
         """
         fit_probability : callable
@@ -206,11 +207,14 @@ def learn(self,
         check_selection : callable (optional)
             Callable that determines selection variable.
 
+        verbose : bool
+            Print out probability of selection?
         """
 
         learning_selection, learning_T, random_algorithm = self.generate_data(B=B,
                                                                       check_selection=check_selection)
-        print('prob(select): ', np.mean(learning_selection, 0))
+        if verbose:
+            print('prob(select): ', np.mean(learning_selection, 0))
         conditional_laws = fit_probability(learning_T, learning_selection, **fit_args)
         return conditional_laws, (learning_T, learning_selection)
 
diff --git a/selectinf/learning/utils.py b/selectinf/learning/utils.py
index a590a418b..4eeb77b77 100644
--- a/selectinf/learning/utils.py
+++ b/selectinf/learning/utils.py
@@ -441,14 +441,16 @@ def lee_inference(X,
 try:
     import matplotlib.pyplot as plt
 
-    def pivot_plot(df, 
-                   outbase,
-                   figsize=(8,8)):
+    def pivot_plot_old(df, 
+                       outbase=None,
+                       figsize=(8,8),
+                       verbose=False):
 
-        print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
-        print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
+        if verbose:
+            print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
+            print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
 
-        print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
+            print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
 
         f = plt.figure(num=1, figsize=figsize)
         plt.clf()
@@ -457,7 +459,8 @@ def pivot_plot(df,
         plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'r', label='Naive', linewidth=3)
         plt.legend(fontsize=15)
         plt.plot([0,1], [0,1], 'k--', linewidth=2)
-        plt.savefig(outbase + '.pdf')
+        if outbase is not None:
+            plt.savefig(outbase + '.pdf')
         pivot_ax = plt.gca()
         pivot_ax.set_ylabel(r'P(pivot < t)')
         pivot_ax.set_xlabel(r't')
@@ -514,25 +517,27 @@ def liu_inference(X,
     import statsmodels.api as sm
 
     def pvalue_plot(df, 
-                    outbase,
+                    outbase=None,
                     figsize=(8, 8),
                     naive=True,
                     split=False,
-                    bonferroni=False):
+                    bonferroni=False,
+                    verbose=False):
 
-        print("selective:", np.mean(df['pvalue']), np.std(df['pvalue']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
+        if verbose:
+            print("selective:", np.mean(df['pvalue']), np.std(df['pvalue']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
 
-        if naive:
-            print("naive:", np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
-            print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
+            if naive:
+                print("naive:", np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
+                print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
 
-        if split:
-            print("split:", np.mean(df['split_length']), np.std(df['split_length']), np.mean(df['split_coverage']))
-            print("len ratio selective divided by split:", np.mean(np.array(df['length']) / np.array(df['split_length'])))
+            if split:
+                print("split:", np.mean(df['split_length']), np.std(df['split_length']), np.mean(df['split_coverage']))
+                print("len ratio selective divided by split:", np.mean(np.array(df['length']) / np.array(df['split_length'])))
 
-        if bonferroni:
-            print("bonferroni:", np.mean(df['bonferroni_length']), np.std(df['bonferroni_length']), np.mean(df['bonferroni_coverage']))
-            print("len ratio selective divided by bonferroni:", np.mean(np.array(df['length']) / np.array(df['bonferroni_length'])))
+            if bonferroni:
+                print("bonferroni:", np.mean(df['bonferroni_length']), np.std(df['bonferroni_length']), np.mean(df['bonferroni_coverage']))
+                print("len ratio selective divided by bonferroni:", np.mean(np.array(df['length']) / np.array(df['bonferroni_length'])))
 
         f = plt.figure(figsize=figsize)
         plt.clf()
@@ -569,21 +574,29 @@ def pvalue_plot(df,
         pvalue_ax.set_ylabel(r'ECDF(pvalue)', fontsize=20)
         pvalue_ax.set_xlabel(r'pvalue', fontsize=20)
 
-        plt.savefig(outbase + '_pvalues.pdf')
-        plt.savefig(outbase + '_pvalues.png', dpi=300)
+        if outbase is not None:
+            plt.savefig(outbase + '_pvalues.pdf')
+            plt.savefig(outbase + '_pvalues.png', dpi=300)
 
         return pvalue_ax
 
-    def pivot_plot_new(df,
-                       outbase,
-                       palette = {'Learned': 'b',
-                                  'Naive': 'r',
-                                  'Bonferroni': 'gray',
-                                  'Lee':'gray',
-                                  'Strawman':'gray'},
-                       figsize=(8, 8), straw=False):
-
-        f = plt.figure(figsize=figsize)
+    def pivot_plot(df,
+                   outbase=None,
+                   palette = {'Learned': 'b',
+                              'Naive': 'r',
+                              'Bonferroni': 'gray',
+                              'Lee':'gray',
+                              'Strawman':'gray'},
+                   fig=None,
+                   figsize=(8, 8), 
+                   straw=False,
+                   verbose=False):
+
+        if fig is None:
+            f = plt.figure(figsize=figsize)
+        else:
+            f = fig
+        f.clf()
         new_df = pd.DataFrame({'Learned': df['pivot'],
                                'Naive': df['naive_pivot']})
         if straw:
@@ -598,8 +611,11 @@ def pivot_plot_new(df,
         ax.set_ylabel('ECDF(pivot)', fontsize=20)
         ax.legend(fontsize=15)
 
-        pngfile = outbase + '_pivot.png'
-        plt.savefig(pngfile, dpi=300)
+        if outbase is not None:
+            pngfile = outbase + '_pivot.png'
+            plt.savefig(pngfile, dpi=300)
+        else:
+            pngfile = None
 
         return ax, f, pngfile, df, new_df
 

From 7ce3837c7ff1abd1d3cd63bafb80ec66cba119a8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 24 Sep 2019 23:43:00 -0700
Subject: [PATCH 002/187] trying to fix .travis.yml

switching when we change directory

fixing path

needed rtd theme

need pandoc

removing print statement
---
 .travis.yml                   | 27 +++++++++++++++------------
 doc-requirements.txt          |  1 +
 selectinf/learning/fitters.py |  2 --
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 881190701..7b9c78817 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,6 +3,7 @@ dist: trusty
 python:
   - 2.7
   - 3.5
+  - 3.6
 notifications:
   email: false
 addons:
@@ -71,7 +72,6 @@ matrix:
         - DEPENDS=
     - python: 3.6
       sudo: true
-      dist: trusty
       env:
         - DOC_BUILD=1
 
@@ -90,14 +90,6 @@ before_install:
 
 install:
   # Install selectinf
-    - |
-      echo "backend : agg" > matplotlibrc
-      if [ "$DOC_BUILD" ]; then  # doc build
-        pip install -r doc-requirements.txt
-        cd doc
-	jupytext --sync source/*/*.ipynb
-        # Build without the API documentation, for the doctests
-        make html
   - if [ "$RUN_R_TESTS" ]; then
      sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
      pip install rpy2 statsmodels -c constraints.txt ;   
@@ -121,11 +113,22 @@ script:
     # No figure windows for mpl; quote to hide : from travis-ci yaml parsing
     - pip install -r requirements.txt -c constraints.txt; # older rpy2
     # Change into an innocuous directory and find tests from installation
-    - mkdir for_testing
-    - cd for_testing
     - 'echo "backend : agg" > matplotlibrc'
-
+    - |
+      if [ "$DOC_BUILD" ]; then
+         pip install -r doc-requirements.txt;
+         cd doc;
+         jupytext --sync source/*/*.ipynb;
+         sudo apt-get install pandoc;
+         make html;
+      fi
+    #	
+    #        # Build the htmlwithout the API documentation, for the doctests
+    #        
+    #      fi
     # Doctests only on platforms that have compatible fp output
+    - mkdir for_testing
+    - cd for_testing
     - if [ `uname` == "Darwin" ] ||
       [ "${TRAVIS_PYTHON_VERSION:0:1}" == "3" ]; then
       DOCTEST_ARGS="--with-doctest";
diff --git a/doc-requirements.txt b/doc-requirements.txt
index 37dc7d0d8..ab7ed399c 100644
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
@@ -12,3 +12,4 @@ tensorflow
 keras
 nbsphinx
 jupytext
+sphinx_rtd_theme
diff --git a/selectinf/learning/fitters.py b/selectinf/learning/fitters.py
index 525179102..c6edb396c 100644
--- a/selectinf/learning/fitters.py
+++ b/selectinf/learning/fitters.py
@@ -8,7 +8,6 @@ def gbm_fit_sk(T, Y, **params):
 
     fitfns = []
     for j in range(Y.shape[1]):
-        print('variable %d' % (j+1,))
         y = Y[:,j].astype(np.int)
         clf = ensemble.GradientBoostingClassifier(**params)
         clf.fit(T, y)
@@ -24,7 +23,6 @@ def random_forest_fit_sk(T, Y, **params):
 
     fitfns = []
     for j in range(Y.shape[1]):
-        print('variable %d' % (j+1,))
         y = Y[:,j].astype(np.int)
         clf = ensemble.RandomForestClassifier(**params)
         clf.fit(T, y)

From 3ea185bb728675a6d8ec653684782a66c373747c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Sep 2019 01:03:21 -0700
Subject: [PATCH 003/187] full model simulation

---
 doc/source/learning/Full_model_LASSO.Rmd   | 32 ++++------
 doc/source/learning/Full_model_LASSO.ipynb | 74 ++++++----------------
 2 files changed, 35 insertions(+), 71 deletions(-)

diff --git a/doc/source/learning/Full_model_LASSO.Rmd b/doc/source/learning/Full_model_LASSO.Rmd
index 31c9d66a2..bbbe6bf63 100644
--- a/doc/source/learning/Full_model_LASSO.Rmd
+++ b/doc/source/learning/Full_model_LASSO.Rmd
@@ -36,7 +36,7 @@ from selectinf.tests.instance import gaussian_instance # to generate the data
 from selectinf.learning.core import normal_sampler     # our representation of the (limiting) Gaussian data
 
 from selectinf.learning.utils import full_model_inference, pivot_plot
-from selectinf.learning.Rfitters import logit_fit
+from selectinf.learning.fitters import gbm_fit_sk
 ```
 
 We will know generate some data from an OLS regression model and fit the LASSO
@@ -45,16 +45,16 @@ true parameters, hence we can then return
 pivots for each variable selected by the LASSO. These pivots should look
 (marginally) like a draw from `np.random.sample`. This is the plot below.
 
-```{python}
+```{python collapsed=TRUE}
 np.random.seed(0) # for replicability
 
-def simulate(n=100, 
+def simulate(n=200, 
              p=20, 
              s=5, 
              signal=(0.5, 1), 
              sigma=2, 
              alpha=0.1, 
-             B=4000,
+             B=6000,
              verbose=False):
 
     # description of statistical problem
@@ -69,7 +69,11 @@ def simulate(n=100,
                                     random_signs=True,
                                     scale=False)[:3]
 
-    dispersion = sigma**2
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
 
     S = X.T.dot(y)
     covS = dispersion * X.T.dot(X)
@@ -89,16 +93,12 @@ def simulate(n=100,
         noisy_S = sampler(scale=scale)
         loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
         problem = rr.simple_problem(loss, pen)
-        soln = problem.solve(max_its=50, tol=1.e-6)
+        soln = problem.solve(max_its=100, tol=1.e-10)
         success += soln != 0
         
         return set(np.nonzero(success)[0])
 
-    XTX = X.T.dot(X)
-    XTXi = np.linalg.inv(XTX)
-    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
-    dispersion = np.linalg.norm(resid)**2 / (n-p)
-                         
+    
     lam = 3.5 * np.sqrt(n)
     selection_algorithm = functools.partial(base_algorithm, XTX, lam)
     if verbose:
@@ -112,8 +112,8 @@ def simulate(n=100,
                                 sampler,
                                 success_params=(1, 1),
                                 B=B,
-                                fit_probability=logit_fit,
-                                fit_args={'df':20})
+                                fit_probability=gbm_fit_sk,
+                                fit_args={'n_estimators':500})
 ```
 
 Let's take a look at what we get as a return value:
@@ -128,7 +128,7 @@ df.columns
 
 ```{python}
 dfs = []
-for i in range(10):
+for i in range(30):
     df = simulate()
     if df is not None:
         dfs.append(df)
@@ -139,7 +139,3 @@ fig = plt.figure(figsize=(8, 8))
 results = pd.concat(dfs)
 pivot_plot(results, fig=fig);
 ```
-
-```{python collapsed=TRUE}
-
-```
diff --git a/doc/source/learning/Full_model_LASSO.ipynb b/doc/source/learning/Full_model_LASSO.ipynb
index 49845025b..fbceea950 100644
--- a/doc/source/learning/Full_model_LASSO.ipynb
+++ b/doc/source/learning/Full_model_LASSO.ipynb
@@ -23,29 +23,7 @@
     {
      "name": "stderr",
      "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
-      "  from numpy.core.umath_tests import inner1d\n",
-      "Using TensorFlow backend.\n",
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:455: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:456: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:457: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:458: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:459: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
-      "/Users/jonathantaylor/anaconda/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:462: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
-      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
-      "R[write to console]: Loaded gbm 2.1.5\n",
-      "\n",
-      "R[write to console]: randomForest 4.6-14\n",
-      "\n",
-      "R[write to console]: Type rfNews() to see new features/changes/bug fixes.\n",
-      "\n"
-     ]
+     "text": []
     }
    ],
    "source": [
@@ -60,7 +38,7 @@
     "from selectinf.learning.core import normal_sampler     # our representation of the (limiting) Gaussian data\n",
     "\n",
     "from selectinf.learning.utils import full_model_inference, pivot_plot\n",
-    "from selectinf.learning.Rfitters import logit_fit"
+    "from selectinf.learning.fitters import gbm_fit_sk"
    ]
   },
   {
@@ -77,18 +55,20 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "np.random.seed(0) # for replicability\n",
     "\n",
-    "def simulate(n=100, \n",
+    "def simulate(n=200, \n",
     "             p=20, \n",
     "             s=5, \n",
     "             signal=(0.5, 1), \n",
     "             sigma=2, \n",
     "             alpha=0.1, \n",
-    "             B=4000,\n",
+    "             B=6000,\n",
     "             verbose=False):\n",
     "\n",
     "    # description of statistical problem\n",
@@ -103,7 +83,11 @@
     "                                    random_signs=True,\n",
     "                                    scale=False)[:3]\n",
     "\n",
-    "    dispersion = sigma**2\n",
+    "\n",
+    "    XTX = X.T.dot(X)\n",
+    "    XTXi = np.linalg.inv(XTX)\n",
+    "    resid = y - X.dot(XTXi.dot(X.T.dot(y)))\n",
+    "    dispersion = np.linalg.norm(resid)**2 / (n-p)\n",
     "\n",
     "    S = X.T.dot(y)\n",
     "    covS = dispersion * X.T.dot(X)\n",
@@ -123,16 +107,12 @@
     "        noisy_S = sampler(scale=scale)\n",
     "        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)\n",
     "        problem = rr.simple_problem(loss, pen)\n",
-    "        soln = problem.solve(max_its=50, tol=1.e-6)\n",
+    "        soln = problem.solve(max_its=100, tol=1.e-10)\n",
     "        success += soln != 0\n",
     "        \n",
     "        return set(np.nonzero(success)[0])\n",
     "\n",
-    "    XTX = X.T.dot(X)\n",
-    "    XTXi = np.linalg.inv(XTX)\n",
-    "    resid = y - X.dot(XTXi.dot(X.T.dot(y)))\n",
-    "    dispersion = np.linalg.norm(resid)**2 / (n-p)\n",
-    "                         \n",
+    "    \n",
     "    lam = 3.5 * np.sqrt(n)\n",
     "    selection_algorithm = functools.partial(base_algorithm, XTX, lam)\n",
     "    if verbose:\n",
@@ -146,8 +126,8 @@
     "                                sampler,\n",
     "                                success_params=(1, 1),\n",
     "                                B=B,\n",
-    "                                fit_probability=logit_fit,\n",
-    "                                fit_args={'df':20})"
+    "                                fit_probability=gbm_fit_sk,\n",
+    "                                fit_args={'n_estimators':500})"
    ]
   },
   {
@@ -166,7 +146,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{18, 13, 14}\n"
+      "{19}\n"
      ]
     },
     {
@@ -200,17 +180,14 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/git-repos/selectinf/selectinf/distributions/discrete_family.py:86: RuntimeWarning: divide by zero encountered in log\n",
-      "  self._lw = np.array([np.log(v) for v in xw[:,1]])\n"
-     ]
+     "text": []
     }
    ],
    "source": [
     "dfs = []\n",
-    "for i in range(10):\n",
+    "for i in range(30):\n",
     "    df = simulate()\n",
     "    if df is not None:\n",
     "        dfs.append(df)"
@@ -223,7 +200,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHpCAYAAABqV/58AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3xUVfrH8c9JJUGKdAQiooBrV1DX\nBjEJCBFpgmJDxMqKujYUFcQuylpwsWIBlQVlQXqREOBnQSy4qKiISm/SWwgp5/fHxJEkEzJJZube\nmfm+X6+8wpy5ufcJM5nnPvece46x1iIiIiKRKcbpAERERCR4lOhFREQimBK9iIhIBFOiFxERiWBK\n9CIiIhFMiV5ERCSCxTkdQDDUq1fPNm/e3OkwREREQuLrr7/eaq2t7+u5iEz0zZs356uvvnI6DBER\nkZAwxqwu6zlduhcREYlgSvQiIiIRTIleREQkginRi4iIRDAlehERkQgWkaPu/bF79262bNlCXl6e\n06FIJcTHx9OgQQNq1qzpdCgiIq4WlYl+9+7dbN68mSZNmpCUlIQxxumQpAKsteTk5LB+/XoAJXsR\nkcOIykv3W7ZsoUmTJiQnJyvJhyFjDMnJyTRp0oQtW7Y4HY6IiKtFZaLPy8sjKSnJ6TCkipKSktT1\nIiJSjqhM9IAq+Qig11BEpHxRm+hFRESigRK9iIhIBFOiD1PDhg2jXr16TocRNN9//z3GGBYsWOB0\nKCIiYU2JXkREJII5muiNMW8ZY7YYY74v43ljjBlpjFlpjFlmjDkj1DFKcXl5eRQUFDgdhoiI+Mnp\nCXPeAf4NjC3j+c5Ay6Kvs4FXir5LObZv387999/PlClT2LVrF2eccQbPP/88Z5/913/fv/71L8aP\nH8+KFSuoVq0aZ511Fs8//zzHHXecd5vU1FTq1atHx44dGT58OKtWrWLVqlW8+eab/Pvf/+bjjz9m\nwIABLFu2jNatWzNy5EguuOCCYrGMHj2a559/npUrV9KoUSNuvfVWBg0aVGybl19+maeeeort27eT\nlpbG7bffHtz/IAlv1sKLL8KkSbB7t9PRiJQp5wBs3gS5ub6f3/S3C2n/zfNBjcHRRG+tXWSMaX6Y\nTboBY621FlhsjKltjGlsrd0YyDjcdJeWtVXfR25uLhkZGezcuZNnn32WBg0a8Morr5CRkcEvv/xC\no0aNAFi3bh0DBw7k6KOPZvfu3bz66quce+65/PLLL9SqVcu7v08//ZRff/2V4cOHk5yc7H1u//79\nXHvttdx55500atSIRx55hJ49e7J69WqSk5MBePbZZ3nggQcYNGgQqampfP311wwZMoTk5GQGDhwI\nwJQpU7j11lu55ZZb6N69OwsXLqR///5V/4+QyPXCC3DXXU5HIVKuJKD5IY9zgD+AlKLHOzYdE/wg\nrLWOfhX9H3xfxnPTgfMPeZwFtC1vn23atLGHs3z58mKPPenVHV/+evjhh23dunV9Pjd69GgbHx9v\nV6xY4W3Ly8uzLVq0sPfcc4/Pn8nPz7f79++3RxxxhB0zZoy3vX379rZatWp206ZNpY4P2KysLG/b\n0qVLLWBnzZplrbV2165dtnr16nbYsGHFfnbIkCG2YcOGNj8/31pr7Zlnnmk7depUbJsbbrjBAjY7\nO/uw/w8lX0uJEmec4fwfq770VcGvvWDTwKaAXVXU9nnj7gH5kwC+stZ3ToyYwXjGmJuMMV8ZY776\n448/nA7HUfPmzaNNmzYcc8wx5Ofnk5+fD0D79u356quvvNstXryYDh06ULduXeLi4khOTmbv3r2s\nWLGi2P7atGlDw4YNSx0nISGB1NRU7+MTTjgB8FwpAPj888/Zt28fvXv39saRn59PWloamzdvZt26\ndeTn5/PNN9/QrVu3Yvvu2bNnQP4vJEKtXOl0BCIVsgdPX/R8YA1wIRCqTien++jLsx5odsjjpkVt\npVhrXwdeB2jbtq0NfmjutXXrVhYvXkx8fHyp54499lgA1qxZQ8eOHTnrrLN47bXXOOqoo0hISODi\niy/mwIEDxX7GV5IHqFGjBjExf50rJiQkAHh/fuvWrQCceOKJPn9+7dq1JCYmUlBQQIMGDYo9V/Kx\niNfOneqXl7CyC0+S//yQthuBUC3H5fZEPxUYaIwZj2cQ3i4b4P75SFSnTh3atm3LK6+8Uuq5xMRE\nAGbPns3+/fuZMmUK1atXByA/P5/t27eX+pnKTjVbp04dAKZPn+7zZKF169YkJSURGxtbanEaLVYj\nZVq1qnTb0UfDRx+FPBSRw7nxRvj8q138xj/IYbm3/b5r7qLnxdfwM9Ckca2ydxAgjiZ6Y8x/gFSg\nnjFmHfAwEA9grX0VmAlkAiuB/cB1wYjDRlj9n56ezty5c0lJSSmzMs7JySEmJoa4uL/eAh988IH3\nMn8gnHPOOSQlJbFhwwYuvvjiMrc7/fTTmTJlCrfccou3bdKkSQGLQyLM6tWl21q2hNNOC30sIocx\nd8tW1nAdHJLkBw9+iSefHBjSOJwedX9FOc9b4NYQhRN2Dh48yMSJE0u1d+7cmVdffZXU1FTuuece\nWrRowbZt21iyZAmNGjXizjvvJC0tjYKCAq677jquv/56fvjhB0aMGEHt2rUDFl/t2rUZNmwYd9xx\nB6tXr6Zdu3YUFhayYsUKsrOzmTx5MgAPPPAAPXv2ZMCAAfTo0YOFCxcye/bsgMUhEaasil7ERTZs\n2MyaNRnAodPEvMbgwTeFPBa3X7qXw9izZw+9e/cu1Z6dnU12djZDhw7l4YcfZvPmzTRo0ICzzjqL\nrl27AnDyySfzzjvvMGzYMCZPnsypp57Khx9+yOWXXx7QGAcNGsRRRx3F888/z7/+9S+qVatGq1at\nih2nR48evPTSSzz99NOMGTOG1NRU3nzzTS666KKAxiIRwleib9481FGIlKmwsJDOnbvwV5I3wJvU\nqXMdNWqEPh5jI+26NZ7BeIeOLi/pxx9/5G9/+1sII5Jg0WsZhXr2hKKrQV5jx8I11zgTj4gPL744\nn3/+82LgIJ454a7i9NPhm2+CczxjzNfW2ra+nlNFLyLhRRW9hIG6ddOAKXjG3HuuvDr1NlWiF5Hw\n4mswnvroxWHW2mJ3KHneph2LbePU2zRiJswRkSiwZw+UvAU0Lg6OOsqZeESAn3/+mfPPP59Vh1xt\nctOFJyV6EQkfvqr5pk09yV7EAcuXL6d9+/Z89tlnpKWlsXbtWsBdF56U6EUkfLipTJKot2zZMlJT\nU9m8eTMAmzdv9lb1bnqrKtGLSPjwVSYp0YsDvvnmGy688EL+XFvliCOOYPbs2VxwwQUUFqqiFxGp\nHE2WIy6wZMkS0tPTvVOG16pVi48//pgLLrgAgM2b4eDB4j9TsyYEcD6yClHHloiED1X04rBPP/2U\nzp07s2fPHgCOPPJIPv74Y9q0aePdpqzz0UouG1JlquhFJHyoohcHLViwgIsuusib5OvVq0d2dnax\nJA/uOx9Vog9Tw4YNwxjjc5rYXr16FVsnvjyrVq3CGMP06dMDGKFIELjtE1SixtKlS8nMzGTfvn2A\nZ/nuBQsWcOqpp5ba1m3no0r0YW7u3Ll8+eWXVdpH48aN+fzzzzn//PMDFJVIEOzfDyWXL46J8dxe\nJxJkJ510krewOuqoo1i4cCEnnniiz23ddj6qRB/G6tSpw8knn8wTTzxRpf0kJiby97//PaAr14kE\n3Jo1pduaNIH4+NDHIlEnPj6e8ePHc+ONN7Jw4UJat25d5raq6CVgjDE8+OCDTJ06le+++87nNhs3\nbqR///60aNGCpKQkWrVqxUMPPcTBQ4aElrx0369fP84888xS+xo1ahTJycne/qnCwkKefvppjjvu\nOBITE2nVqhVjxowJwm8qgvs+PSXqJCYm8vrrr3PccccddjtV9BJQvXv3pmXLlmVW9Vu3bqVOnTo8\n99xzzJ49m3vvvZe3336b2267rcx9Xn755Xz11Vf8/vvvxdonTJhAZmYmNYrWWbztttt4/PHHuemm\nm5gxYwY9evSgf//+6uuX4HDbp6dEtHHjxjFkyBAqusKrte47J9XtdeDcPQ++VPBNFRMTw+DBg7n+\n+ut59NFHadWqVbHnTz75ZEaMGOF9fN5551G9enX69+/PSy+9REJCQql9dujQgbp16zJhwgTuv/9+\nANavX88nn3zCBx98AMDKlSt55ZVXePvtt7n22msByMjIYOPGjTzyyCN06dKlQr+HSLnc9ukpEWvM\nmDFcd911WGuJj49n6NChfv/s1q2Qk1O8LTkZ6tULcJAVoIo+Alx99dWkpKTw1FNPlXrOWssLL7zA\nCSecQFJSEvHx8Vx11VXk5uayxlefJxAXF0fPnj2ZMGGCt+3DDz+kevXqXHzxxQBkZWURExNDjx49\nyM/P936lp6fz7bffUlBQEJxfVqKXKnoJgTfeeMOb5MHz2ffnSHt/uO0eelCijwhxcXEMGjSI9957\nj9UlPgxfeOEF7rnnHnr06MGUKVNYsmQJo0aNAuDAgQNl7rNPnz58++23rFixAvBctu/atStJSUmA\np0ugoKCAWrVqER8f7/3q168f+fn5bNy4MUi/rUQtN00eLhFp1KhR3HTTTd4kf9ppp5GdnU316tX9\n3ocbz0d16T5C9O/fn8cff5zhw4cXa//www/p1atXsT785cuXl7u/9u3b07BhQyZMmEDfvn1ZvHgx\ngwcP9j5fp04d4uLi+PTTT4mJKX2+2KBBgyr8NiI+uGnycIk4zz33HHfffbf3cdu2bZkzZw516tSp\n0H7c2MOkRB8hEhMTueeeexg8eDBt2rQhvuiWo5ycHBITE4tt+/7775e7v9jYWHr37s2ECROoVq0a\ntWvXplOnTt7n09LSKCgoYNeuXXTo0CGwv4xISbm5sGFD6faUlNDHIhHn6aefLlbI/P3vf2f27NnU\nqlWrwvty44UnXboHzwA4t3xVwc0330yNGjX47LPPvG0dOnRgwoQJvPzyy8yZM4e+ffuycuVKv/Z3\n+eWX88MPP/D888/TvXv3YgP3WrduzS233EKfPn0YPnw4WVlZzJgxg2eeeYYbbrihSr+HSClFa3wX\n07gxlDiJFakIay2PPvposSR/wQUXMHfu3EoleXDnhScl+giSnJzMnXfeWaxt6NChXHHFFTz00ENc\nccUVJCQkMHLkSL/2d95559GsWTM2btxInz59Sj0/atQohgwZwtixY8nMzKRfv37MmDGDdu3aBeT3\nEfFyY5kkYe+JJ57g4Ycf9j6+8MILmTVrlvcW4spw41vVVPQewXDQtm1b+9VXX5X5/I8//sjf/va3\nEEYkwaLXMkq8+SaUvFLUpw/85z/OxCMR4ZNPPqFTp07s27ePjh07MnnyZJKTkyu9P2uhVi0omlPM\na8MGzwWoYDLGfG2tbevrOVX0IuJ+biyTJOydf/75TJ8+nd69ezNlypQqJXmAHTtKJ/nERGjYsEq7\nrTINxpPIN2MGPPus77nSJTxs3Vq6zemOzwg0bhyMGgXRdXdsKpDKCSdUfU95eaXbUlI8ay85SYle\nItv330PXrlBY6HQkEmiq6ANqwQK46iqnowimAmAwcCPQMmRHdcPbVJfuJbL9979K8pFKFX1AHTIR\nZgTKB/oBzwJpwO+H3TqQ3PA2VaKXyPbrr05HIMHQrBmUWNdBqiZy/1TygKuA94oerwNeD9nR3TDN\nSNReurfWYty0mI1UmF93jPi6qVXCW6NG8N57EBvrdCQRJTL/VA4CfYDJh7TdDPhe7TOQYmPhxhuh\nZ8+gH6pcUZno4+PjycnJqfIIS3FWTk6OdwbAMvkarf3xx9CiRVBikiCLi/NU8zpJD6jCQt+JfulS\nqFkz9PEEQm7uAW69tRfZ2TO8bX373sbQoS+GpMhr0ACOOCLoh/FLVCb6Bg0asH79epo0aUJSUpIq\n+zBjrSUnJ4f169fT8HD3reTnw/r1pdvPOQcqsEiFSKTbssUzy/ChatSAU08Nz3Oq/fv306NHD7Kz\n53rb7rnnHp555pmo/LyPykRfs+gUdcOGDeT5uh9CXC8+Pp6GDRt6X0uf1q2Dksvl1q+vJC9SQlnT\nFIRjTty3bx+XXHIJ2dnZ3rYHH3yQxx57LCqTPERpogdPsj9skpDw58ZJp0VcKFL+VPbu3Uvnzp35\n5JNPvG2PPPIIQ4cOdTAq50VtopcooNnURPwSKX8q1apVo0mTJt7HTz31FPfff7+DEbmDEr1Erkj5\n9BIJskip6OPi4nj33XfJy8vj/PPPL7XIV7RSopfIFSmfXiJBFknnxPHx8Xz44YfEOD3vrIvof0Ii\nVyR9eokEUbieE2/evJkRI0aUmlNDSb44VfQSucL100skhKwNz3PiDRs2kJ6ezk8//cTWrVt56qmn\nonZUfXl02iORqaDA92p1SvQixWzbBvv3F29LSoJ69ZyJxx9r166lffv2/PTTTwCMGDGC7777zuGo\n3EuJXiLTxo2eCXMOdeSR4TvNl0iQhNs99L///jvt2rVj5cqVgGcA3vjx4znllFMcjsy9dOleIlM4\nXosUcUA49XCtXLmStLQ01q5dC/w18K5bt24OR+ZuSvQSmcLp00vEQeFyTvzTTz+Rnp7Ohg0bAEhM\nTGTy5Ml07tzZ4cjcT4leIlO4fHqJOCwczom///57MjIy2Lx5MwBJSUlMnTqVjIwMhyMLD0r0EpnC\n4dNLxAXcfk78v//9j4yMDLZu3QpA9erVmT59Oqmpqc4GFkaU6CUyuf3TS8QlfJ0Tu+lP5eDBgxw8\neBCAGjVqMGvWLM477zyHowovGnUvkcntn14iLuHrnNhNF7/OPPNMZs2aRdOmTfn444+V5CtBFb1E\nnsJCXboX8cPOnbB7d/G2xERo2NCZeMpy7rnnsnLlShITE50OJSypopfIs3kz5OYWb6tZE2rXdiYe\nEZfyVc2npICTM8guWLCA5cuXl2pXkq88JXqJPGVV826dAUTEIW7r4Zo7dy6dO3cmPT2dFStWOBdI\nhFGil8ijgXgifnFT//yMGTO45JJLOHDgAJs2baJv376lFquRylGil8ij/nkRv7ilov/oo4/o0aOH\nd3R9SkoK77//vhapCRAleok8quhF/OKGiv7DDz+kd+/e5OXlAXDMMcewaNEijj322NAGEsGU6CXy\nqKIX8YvTFf37779Pnz59yC9agKply5YsWrSIo/X3GlBK9BJ5VNGL+MXJiv7tt9/mmmuuobCwEIC/\n/e1vLFy4kKZNm4YmgCiiRC+RxVpV9CJ+2LMHtm8v3hYXB0cdFfxjv/baa/Tv39872O7kk09mwYIF\nNG7cOPgHj0JK9BJZtm6F/fuLtyUnQ716zsQj4lK+zoebNYPY2OAe11rLF1984X182mmnMX/+fBo0\naBDcA0cxzYwnkaWsTkeN3hUpxqkeLmMMb7zxBrm5ufzyyy/MmTOHI488MvgHjmJK9OKs3Fy4916Y\nOhX27q36/opuzylGl+1db+dOGDgQsrNLT2oowXHgQOm2UP2pxMbGMmbMGHJycqhRo0ZoDhrFlOjF\nWUOHwksvBfcYGojnejfcAP/9r9NRSDD+VKy1zJw5k8zMzGL3xcfFxSnJh4j66MVZkyYF/xiq6F0t\nL89zQUecF+g/FWstDz74IF26dOGf//ynZrpziBK9OKegwHdHYaB16hT8Y0ilrV/vSfbirJgY6NAh\ncPuz1nLPPffw1FNPATBy5EjeeuutwB1A/KZEL87ZsAGKJsoIitq14YUX4NRTg3cMqbJQnOvJ4dWv\nD++8A02aBGZ/hYWF3H777Tz33HPeti5dunDVVVcF5gBSIeqjF+f4+oQ/5RTIygrM/o88Mvj3CkmV\n+XobdOsGo0eHPJSoVbdu4G5MKSws5JZbbuGNN97wtvXs2ZP//Oc/JCQkBOYgUiFK9OIcX7fCHXus\n7nmPMr7eBi1b6m0QjgoKCrjhhht45513vG2XX3457777LvHx8c4FFuV06V6co6lqBb0NIkV+fj59\n+/YtluSvueYa3nvvPSV5hynRi3M0Va2gt0EkyMvL48orr2TcuHHetv79+/P2228TF6cLx05Tohfn\nqJQT9DaIBKtWrWLevHnex3/20cdqjIwrKNGLc1TKRb2CAli7tnS73gbhpWXLlnz88cfUqlWL22+/\nnZdffpmYGKUXt9A1FXFGYaHzi2GL43zdYVmnDmjCtPDTpk0bvv32W44++uhiM+CJ8xw/5TLGdDLG\n/GyMWWmMud/H8ynGmGxjzFJjzDJjTKYTcUqAbd5cel76mjU9975L1NBFnfC0d+9efvzxx1LtzZs3\nV5J3IUcTvTEmFhgFdAZOAK4wxpxQYrOHgA+stacDfYCXQxulBIU6ZgW9DcLR7t276dSpE+3ateP7\n7793Ohzxg9MV/VnASmvtb9bag8B4oFuJbSxQs+jftYANIYxPgkWlnKC3QbjZuXMnHTt25NNPP2Xr\n1q1kZGSwY8cOp8OScjjdR98EOHQozjrg7BLbDAPmGmNuA6oDGaEJTYJKpZygt0E42bZtGx07duSb\nb77xtg0ePFhryYcBpyt6f1wBvGOtbQpkAu8aY0rFbYy5yRjzlTHmqz/++CPkQUoF+fqEVykXdZTo\nw8Mff/xBWlpasST/8ssvc8cddzgYlfjL6US/Hmh2yOOmRW2Huh74AMBa+zlQDSg1Oaa19nVrbVtr\nbdv69esHKVwJGI24F3TpPhxs2rSJ1NRUli1bBoAxhtGjRzNgwACHIxN/OZ3ovwRaGmOOMcYk4Bls\nV3Jl6jVAOoAx5m94Er1K9nCnij7q6Q5L91u/fj3t27dn+fLlAMTExDBmzBiuv/56hyOTinC0j95a\nm2+MGQjMAWKBt6y1PxhjHgW+stZOBe4G3jDG3IlnYF4/a611LmqpMmv1CS+6w9Ll1qxZQ1paGr/+\n+isAsbGxvPfee/Tp08fhyKSinB6Mh7V2JjCzRNvQQ/69HDgv1HFJEP3xB+TkFG9LTvaslSlRQ/3z\n7jZ69Ghvko+Li2P8+PFceumlDkclleH0pXuJRmVV85poI6qof97dhg0bRv/+/UlISGDSpElK8mFM\niV5CT/3zgip6t4uJieH1119n8eLFXHLJJU6HI1WgRC+hp/55QRW926xevZrCwsJibbGxsZx++ukO\nRSSBokQvoaeKXlBF7yZLly6lTZs23HzzzaWSvYQ/JXoJPVX0gip6t/jyyy9JS0tj27ZtjB49mvvv\nL7W2mIQ5JXoJPVX0Uc9aVfRu8Pnnn5ORkcHOnTsBqF27Nr1793Y4Kgk0JXoJLd1DL8DWraXvsKxe\nXXdYhtKiRYvo2LEju3fvBqBu3brMnz+fM8880+HIJNCU6CW0duyAPXuKtyUmQoMGzsQjjijroo7u\nsAyNrKwsOnfuzN69ewGoX78+2dnZGngXoZToJbTK+oSP0VsxmuiyvXPmzJlDly5d2L9/PwCNGjVi\nwYIFnHzyyQ5HJsGiT1cJLY3AEvQ2cMq0adPo2rUrBw4cAKBJkyYsXLiQE044weHIJJiU6CW0VMoJ\nehs4IScnhwEDBnCwaIGBo48+mkWLFtGqVSuHI5NgU6KX0FIpJ+ht4ISkpCRmzpxJnTp1aNGiBQsX\nLqRFixZOhyUh4PiiNhJmfv4ZbrwRvvwSCgoq/vP5+aXbVMqFnU8+gdtvhx9+8NxIUVF5eaXb9DYI\nvlNOOYWsrCzq169PkyZNnA5HQkSJXvxnLXTtCitWBHa/KuXCyt69kJlZ+uaJqtLbIPC2b99OnTp1\nirWddtppDkUjTtGle/Hfzz8HPsmDSrkws2BB4JN8tWq6wzLQXn31VVq2bMnSpUudDkUcpkQv/vv9\n98Dv89RTQZcQw0ow3gaZmbrDMpBGjhzJgAED2L59Ox06dGD58uVOhyQO0p+W+M/XUOnKiomBs8+G\nCRM0S0qYCeTbID4eLroIXn45cPuMds8++yx33HGH9/Gxxx5L48aNHYxInKY+evGfr6HSDzwADz9c\n8X3FxECc3n7hyNfbYPRouOaaiu8rNtbzJYHx+OOPM2TIEO/jc889l1mzZlGzZk0HoxKn6ZNW/Oer\nlDv2WEhICHko4hxfb4PjjtPbwEnWWh5++GEee+wxb1u7du2YMWMGRxxxhIORiRso0Yv/dPOzoLeB\n21hrGTx4MMOHD/e2paenM2XKFKpXr+5gZOIWSvTiP01nFvX27fOsPHeo2Fho2tSZeKKdtZa77rqL\nF154wdvWqVMnJk2aRFJSkoORiZtoMJ7458AB2LSpeJsx0KyZM/GII3xV802aaLiFUyZPnlwsyXft\n2pWPPvpISV6KUaIX/6xZU7rtqKPUMRtldFHHXXr06MGtt94KwKWXXsqHH35IYmKiw1GJ2+g8XPxT\n1vKyElV8VfRK9M4xxjBy5EhOP/10rr32WuJ0aUV80LtC/KNPeEHne07LL1or4tCEHhMTw/XXX+9U\nSBIGdOle/KNPeEHne07Ky8ujT58+9O/fn4LKLCglUUsVvfhHn/CCzveckpuby2WXXcbUqVMBiI+P\n54033iBG8waLH5ToxT8ahSXofM8JOTk5XHrppcyaNcvbVrNmTYymjhY/6XRQ/KNZUqKe7rAMvf37\n99O1a9diSf6+++7jueeeU6IXvynRS/kOHoT160u3p6SEPhZxjO6wDK29e/eSmZnJvHnzvG1Dhw7l\nqaeeUpKXCtGleynf2rVgbfG2hg1Bk3JEFfXPh86uXbvIzMzks88+87Y9/vjjPPjggw5GJeFKiV7K\np45ZQW+DUNmxYwcXXXQRX375pbft2Wef5Z577nEwKglnSvRSPpVygt4GoXLNNdcUS/Ivvvgit99+\nu4MRSbhTH72UT6WcoLdBqIwYMYIGDRoA8OqrryrJS5WpopfyqZQTdIdlqBx//PFkZWWxdOlSrrnm\nGqfDkQigRC/lUykn6A7LYCksLCw18c1JJ53ESSed5FBEEml06V7Kp4o+6ukOy+BYvXo1bdq0YfHi\nxU6HIhFMiV4OLz8f1q0r3a5EH1XWrdMdloH222+/0a5dO7799ls6derE119/7XRIEqGU6OXw1q+H\nkgto1K0LRxzhTDziCPXPB9aKFSto164da4pmIcrJyWHz5s0ORyWRSoleDk+f8IL65wNp+fLltG/f\nnvVFfSHVqlVj6tSpZGZmOrRq57QAACAASURBVByZRCoNxpPD0ye8oPO9QFm2bBkZGRn88ccfACQn\nJzNt2jTS0tIcjkwimRK9HJ4+4QWd7wXCN998Q4cOHdi+fTsARxxxBDNnzuSCCy5wODKJdLp0L4en\nT3hB53tVtWTJEtLT071JvmbNmsydO1dJXkJCFb0cnj7hBZ3vVcXq1avJyMhgz549ANSuXZuPP/6Y\ntm3bOhyZRAtV9HJ4+oSPWNZ61pgv72vfPs8ChiXpbeCflJQUbr75ZgDq1q1Ldna2kryElCp6KVth\noe9FyPUJH/aeew6eeQYqe0dXvXq6w9JfxhieeeYZkpKSuOyyyzTjnYScEr2UbeNGyMsr3larFtSu\n7Uw8EhCLFsHdd1dtHzrXqxhjDI8++qjTYUiU0qV7KZtmxItIH39c9X0cc0zV9xGppk2bxtVXX01+\nfr7ToYgAqujlcHbuLN1Wr17o45CA2rat6vvo1q3q+4hEkyZN4vLLLyc/Px9rLWPHjiU2NtbpsCTK\nKdFL2XbtKt2my/Zhz9f5W1wc+JOPGjSAG26Aq64KfFzhbvz48Vx99dUUFE0Z/cUXX7Bt2zbv2vIi\nTlGil7L5yghK9GHP18s6aRJccknoY4kUY8eO5brrrqOwsBCA1q1bk5WVpSQvrqA+eimbr4xQq1bo\n45CA0vlbYL355pv069fPm+RPOOEEFixYQJMmTRyOTMRDiV7KpowQkXz1yOj8rXJefvllbrjhBmzR\nGr6nnHIKCxYsoFGjRg5HJvIXJXopm/roI5LO3wLjhRde4NZbb/U+PuOMM5g/fz7169d3MCqR0pTo\npWzKCBFJL2vVvfLKK9x5553ex2effTZZWVnUrVvXwahEfFOil7Kpjz7i5OXB/v3F22JiNMtdRXXs\n2NHbB3/eeecxd+5cautsSVxKiV7KptIv4vjqjalZ05PsxX/HHnssWVlZXH755cyePZuaNWs6HZJI\nmXR7nZRNffQRR+dugdO6dWvGjx/vdBgi5apUojfGnAC0A1KAekAOsAX4Flhkrd0TsAjFOcoKEUcv\nacVZaxk8eDCZmZm0a9fO6XBEKszvRG+MaQrcBPQHGv/ZXGIzCxQYY+YBrwDT7Z/3nUj4UR99xFGi\nr5jCwkIGDhzIK6+8wqhRo5g7dy7nnHOO02GJVEi5id4YUwcYBtwMxAOrgHHAl8AmYDuQBNQFjgfO\nAVKBi4CfjTF3W2tnBT50Caq8PM9C5IcyxtOhK2FL527+Kygo4Oabb+bNN98EYO/evYwePVqJXsKO\nPxX9SiARGA2MsdYuKe8HjDE1gT54rgBMN8bcaa0dWaVIJbR27y7dplFbYU/DLvyTn59P//79effd\nd71tV155Ja+99pqDUYlUjj+J/l3gSWvtZn93aq3dDbwOvG6M6Q5Uq2R84hRd441IelnLl5eXR9++\nfYsNtOvXrx+jR4/WSnQSlspN9NbaO6pyAGvtR1X5eXGIMkJE0st6eAcPHuSKK65g0qRJ3rabbrqJ\nV155hRhdzZIwVeF3rjEmpejS/OG2qWGMSal8WOI4deZGJCX6suXm5tKrV69iSX7gwIG8+uqrSvIS\n1irz7v0dKK/Kv71oOwlXyggRSQva+Jafn0/37t2ZNm2at+3uu+9m5MiRGFPy5iKR8FKZRG8ofVud\nRBqN2opIOn/zLS4ujjPPPNP7+IEHHuDZZ59VkpeIEKyZ8RoB+8rdStxLGSEi6WUt2yOPPEJubi7J\nyckMHTpUSV4ihl+J3hjTt0TTaT7aAGLxzJZ3NfBdFWMTJ6mPPiIp0ZfNGMPTTz+tBC8Rx9+K/h08\ns95R9L1b0VdJf/6F7AceqVJk4ixlhIikPnqP7du388wzz/Doo4+SkJDgbVeSl0jkb6K/rui7Ad4C\nPgKm+NiuANgGfG6t9ZEpSjPGdAJexHM1YLS19mkf21yGZ3Y+C/zPWnuln3FLZamPPiLp/A22bt1K\nhw4d+Pbbb/nll18YP3488fHxToclEjR+JXpr7Zg//22MuRb4yFo7tqoHN8bEAqOADsA64EtjzFRr\n7fJDtmkJDAbOs9buMMY0qOpxxQ/KCBGnoKDsCQ+jxebNm8nIyOD7778HYPLkySxcuJCMjAyHIxMJ\nngoPxrPWXhjA458FrLTW/gZgjBmPp0tg+SHb3AiMstbuKDr+lgAeX8qiRB9xfCX5GjUgLkoWq96w\nYQPp6en89NNPgOcy/VtvvaUkLxGv0n/ixphkoCdwOlAb2AV8A0y21vo74r4JsPaQx+uAs0ts06ro\neJ/iubw/zFo7u7Jxi580GC/iRPNLunbtWtLS0li5ciUAMTExjB07lquuusrhyESCr7Lr0WcCY4A6\nFL+n3gLPG2Ous9ZOD0B84ImxJZ4V8ZoCi4wxJ5ccA2CMuQnPIjqkpGhSvipTH33EidaXdNWqVaSl\npfH77545vOLi4hg3bhy9e/d2ODKR0KjMFLhnAJPwVPHv41mfvnPR9/eL2icaY9r4sbv1QLNDHjct\najvUOmCqtTbPWvs7sAJP4i/GWvu6tbattbZt/fr1K/hbSSm6dB9xovEl/fXXX2nfvr03ycfHxzNx\n4kQleYkqlZkZ70E8lfsF1tq+1tp3rLVzir73Bc4vev4BP/b1JdDSGHOMMSYBz9K2U0ts8xGeah5j\nTD08l/J/q0Tc4q/CQt8dutFynTdCRVuiX7FiBe3atWPNmjUAJCYm8tFHH9Gtm687g0UiV2US/QXA\nh9baxb6etNZ+AUws2u6wrLX5wEBgDvAj8IG19gdjzKPGmK5Fm80BthljlgPZwL3W2m2ViFv8tXs3\nWFu8rXr16Bm1FaGirY++WrVq3nvkq1WrxtSpU8nMzHQ4KpHQq8wndy2KD6DzZQ3g10071tqZwMwS\nbUMP+bcF7ir6klCI1s7cCBdtL2tKSgrz588nMzOTl19+mQsvDOQNQyLhozKJfgOe2+IOpy2wsRL7\nFjeItmu8USIaX9ZjjjmG7777jjhdjZIoVplL9zOBNGPM/UUT3ngZY2KMMXcDGZSo0iWMRGNGiAKR\n/rIuWbKEefPmlWpXkpdoV5m/gMeA7sATwM3GmP/DU703wjMQrzmwCXg8QDFKqEVbZ26UiORE/+mn\nn9K5c2fy8/OZOXMmqampTock4hqVmRlvkzHmPOA1PFPXHl1ik4+BW6y1unQfriI5I0SxSF3QZsGC\nBXTp0oV9+zzzdF133XX8/PPPxRarEYlmlbqmZa1dBVxkjGmCZ2a8WnhmxltqrS15H7yEm2gbtRUl\nIvH8bd68eXTt2pWcnBwAGjZsyLRp05TkRQ5Rpc6roqSuxB5pIjEjSMS9rDNnzqRnz57k5uYC0Lhx\nY+bPn8/xxx/vcGQi7lKZmfE+MMZ0NsZUZiCfhINIywgCRNbLOmXKFLp37+5N8s2aNWPRokVK8iI+\nVCZZ9wKmA+uNMc8aY04KcEziNA3Gi0iR0kc/ceJEevXqRV5eHgDNmzdn0aJFHHfccQ5HJuJOlUn0\nf8czEC8BuBv4nzHmK2PMbUVT1Eq4Ux99xLE2Ms7fxo0bR58+fcjPzwfg2GOPZdGiRTRv3tzZwERc\nrMKJ3lq7xFr7D6AxcBme++VPAV7EU+VPMsZ0N8bo5tVwFUnXeAWAvXs9SxgcKikJEhOdiaeyVq9e\nTUFBAQCtW7dm0aJFNGvWrJyfEolulU7G1tqDeOa0n2iMqQ9cDVyL5x77bsA2oEEggpQQU6KPOJHy\nkg4ePJjc3FwmTpxIVlYWDRs2dDokEdcLyIA6a+0f1trn8dxqdw+QD9QNxL7FAZFwjVeKiaSX9OGH\nH+aLL75QkhfxU0ASvTGmtTHmSWA18CwQD6wMxL7FAeqjjzjh+pL+97//5cCBA8XajDFUr17doYhE\nwk+lE70xprYxZoAxZjGwHLgfz4p1b+JZq751gGKUUIqUUVtSTDheun/66afp1asXvXr14uDBg06H\nIxK2KnMf/SXGmA/xzG//bzwr1c3D00ffyFp7k7X208CGKSGzbx8UDXbyqlbN8yVhK5wSvbWWRx99\nlMGDBwMwY8YMnnjiCYejEglflRmMN6Xo+wpgDDBW095GEFXzESlcEr21loceeognn3zS23bhhRcy\naNAgB6MSCW+VSfSvAWOstYsDHYy4QLh25sphhcNkOdZaBg0axIgRI7xtHTt2ZPLkySQnJzsYmUh4\nq8zqdQOCEYi4RLiUflIhbn9ZrbX885//ZOTIkd62iy++mIkTJ1JN3UYiVaJJbaQ4t2cEqRQ3v6yF\nhYX84x//4LXXXvO29ejRg/Hjx2sVOpEAKDfRG2PmAxa41lq7ruixP6y1Nr1K0UnouTkjSKW59WUt\nKCjgxhtv5O233/a2XXbZZbz33nvEx8c7GJlI5PCnok/Fk+iTD3nsD1uJeMRpGowXkdzaR79jxw7+\n7//+z/v46quv5u233yYuThcbRQKl3L8ma23M4R5LhNFgvIjk1oq+Xr16zJ8/n/bt25Oamsobb7xB\nbGys02GJRBSdNktxbs0IUiVuflmbNWvG4sWLqVevHjExqiNEAk1/VVKcmzOCVJpbXtYDBw7w+eef\nl2pv0KCBkrxIkFRlCtyrjDFZxpjtxpj8ou/zjDFXBTJACTH10Ucct8xqvH//frp160Zqaipz5swJ\n7cFFolhlpsCNN8ZMAcYCFwI1gD+KvqcBY40xU4wxGjIbjtRHH3EOHIC8vOJt8fGe9ehDZd++fXTp\n0oW5c+dy8OBBunfvzq+//hq6AESiWGUq+sHAJcAXeBJ9NWttY6AankS/BOgC3BeoICWE3HKNVwKm\nrJfUmNAcf8+ePXTq1Ins7Gxv2wMPPMCxxx4bmgBEolxlEn1fPEvQplprF1prCwCstQXW2gV4br/7\nDegXoBgllJToI46TL+nOnTvp2LEjn3zyibft6aefZsiQIaEJQEQqleibAlOstT7XjbTW5uJZ+KZJ\nVQIThyjRRxynXtLt27eTkZHB4sV/LYvx3HPPcd99utgnEkqVub1uA1Be/3t80XYSbtw6s4pUmhMv\n6R9//EGHDh343//+523797//za233hrcA4tIKZWp6McBvYwxNX09aYypDfQC3q9KYOKAAwcgN7d4\nW1wcaOWwsBbqin7Tpk1ceOGF3iRvjOH1119XkhdxSGUq+keBk4AlxphHgUXAZqAh0B4YgmdA3mOB\nClKqKCcH1q8vf7utW0u3hXLUVgSzFtauhYM+O7yC65dfSrcFM9HPnj2bH374AYCYmBjeeustrr32\n2uAdUEQOqzKJPqfouwHe9fG8AVoCB0zxBGGttZqJL5SshUcfhSefrHyGUf98lX3zDfTsCatXOx3J\nX4L5svbr148tW7bwwAMP8O6773LFFVcE72AiUq7KJN7/QwvWhIcvv4Rhw6q2D/XPV1n//u5K8hD8\nl3XQoEF07dqV448/PrgHEpFyVTjRW2tTgxCHBMO8eVXfR8OGVd9HFNu6FQ4Zj+YagXxZV61aRYMG\nDUguMZZDSV7EHTS5dCQLRBl56aVV30cUc1slD1CtGnTuHJh9/fTTT5x33nl069aNAwcOBGanIhJQ\n6jOPZKtWlW5r3Ni/UfR168KVV8J11wU8rGji6yWoXh0aNQp5KAC0agWDB0PTplXf1/fff09GRgab\nN29mw4YNXH311UycOLHqOxaRgCo30Rtj7gH+ba2t1Om6MeZ0oJG1dlZlfl6qwFc5OX06nHFG6GOJ\nUr5egquvhldfDX0sgfS///2PjIwMthbdqVG9enUGDhzocFQi4os/l+6fAH41xtxnjDnKn50aj4uM\nMZOBr4BTqxKkVIK1vrNM8+YhDyWa+arow/0l+Prrr7nwwgu9Sb5GjRrMmTOH1NRUZwMTEZ/8uXR/\nMvAc8BTwuDHmM+ATPAl8I7ADz4I2dYHjgb8D6UAjYBswEHgt4JHL4W3Z4pkA51BHHAFHHulMPFEq\n0s61Fi9eTKdOndhVNN1erVq1mDNnDmeffbbDkYlIWcpN9NbaFUAXY8y5wK3ApcAF+L7F7s8b538G\nhgNvW2v3BChWqYiySklNfhNSvl6Go48OeRgB8cknn5CZmcmePZ4/6Tp16jB37lzatGnjcGQicjh+\nD8az1n4GfGaMuQVoB5wPpOCp5HOALcAyYIG19ocgxCoV4auUDNcME8Yi5dL9ggUL6NKlC/v27QOg\nXr16zJs3j1NPVa+ciNtV5j76PcCMoi9xq0jJMGFs507Yvbt4W0JC+E1NUFhYyL333utN8g0bNiQr\nK4sTTzzR4chExB+6jz5SRVrncBgq67J9TJj91cXExDB16lRatWrFUUcdxcKFC5XkRcKIXxW9MaYv\n8K21dlmQ45FAiaTO4TAVSb0njRs3Zv78+eTk5HDcccc5HY6IVIC/tcU7QPdDG4wx1xpj5gc8IgkM\nVfSOC+fek40bN5Zqa9KkiZK8SBiqykXE5niWpRW3sVYVvQuEa0X/3nvv0aJFC6ZNm+Z0KCISAGHW\nWyh+2b4digZOeSUlQf36zsQTpcKxon/77bfp27cvBw4coFevXsyfr4t2IuFOiT4SlVXN6x76kAq3\niv61116jf//+WOuZIqN169acdNJJDkclIlWlRB+J1D/vCuFU0b/00kvccsst3sennXYa8+fPp0GD\nBg5GJSKBUJFE72smPHEj9c87bs8eTw/KoeLi4Ci/VosIrX/961/cfvvt3sdnnnkm8+fPp169eg5G\nJSKBUpEJc4YZY4aVbDTGFJSxvbXWahlcJ6iid5yvl6BZM4iNDX0sh/Pkk0/y4IMPeh+fc845zJo1\ni1q1ajkYlYgEUkUqelPBL3ULOEUVvePcfq5lrWXYsGHFkny7du2YM2eOkrxIhPGr4rbWKmmHk3Dq\nHI5Qbj/X+vLLL3nkkUe8j9PS0pg6dSrVq1d3MCoRCQYl8EgUbsO9I5Dbz7XOOussRo4cCUCnTp2Y\nPn26krxIhFIfeqTZuROK1gr3SkiARo2ciSdKhcO51m233UbTpk3JzMwkMTHR6XBEJEgqnOiNMacC\nVwJnAfXxjMb/A/gCGGet/S6gEUrF+MowKSnht5JKmHNbRV9YWEhOTk6pqr1Hjx4ORSQioeJ3ojfG\nxAIvATfx14C7Q7UH7jXGvAzcYf+cdUNCy20ZJkq5qaIvKCjg+uuv59dff2X27Nm6RC8SZSpS0Y8A\nbgEOAh8AC4D1eBL+UUAa0Au4FTgADApkoOInN2WYKLV/P2zZUrwtJgaaNg19LPn5+Vx77bWMGzcO\ngEsuuYQZM2aQlJQU+mBExBH+LlN7HHAbsBroZK392cdmbxljHgdmA3caY1611v4WuFDFL6roHbdm\nTem2Jk0gPj60ceTl5XHllVcyceJEb1uLFi1ISEgIbSAi4ih/O26vwVO59ysjyQNgrf0JuBaIBa6u\nenhSYaroHeeGc63c3Fx69+5dLMkPGDCA119/nVi3zdojIkHlb6I/F/jRWruwvA2LtlkOnF+VwKSS\n3JBlopzTk+UcOHCAnj17MmXKFG/bHXfcwahRo4jRoEyRqOPvX/3xeEbV++uLop+RUFNF7zgnJ8vZ\nv38/l1xyCTNnzvS2DRo0iOeffx6j1QtFopK/ib42sKXcrf6yGTiy4uFIlezdC9u2FW9z60oqEcyp\nin7v3r1cfPHFzJs3z9s2ZMgQnn76aSV5kSjm76j76kBOBfabCyRXPBzxy4YNsGlT6XZfpWTTpp5k\nLyHjVEV/9913s2DBAu/jxx57jIceeij4BxYRV1MGCCc5OXDFFXBI32u51D8fck5V9I8//jiffvop\nP/zwA8888wz33ntv8A8qIq5XkUTf3RjT3M9tT694KFKusWMrluRB/fMhlpvrueBSUrNmwT92/fr1\nycrKYtasWfTr1y/4BxSRsFCRRH9a0Ze/NDNeoB1yWdZvxx0X8DCkbGvXlm5r3BiCMZV8fn4+cSW6\nZRo2bKgkLyLF+JvorwtqFOIfX52/hxMfD5ddFpRQxLdQ3d24adMmLrroIh566CF69+4d+AOISMTw\ndz36McEORPzgK4ucdJLvKdeOPhr++U9o1SroYclfQpHo169fT1paGitWrODKK68kPj6e7t27B/Yg\nIhIxHB+MZ4zpBLyIZza90dbap8vY7lJgInCmtfarEIboDgcOlB5pbwx8/bVnGVpxhWBPY7BmzRrS\n0tL49ddfAbDWcuDAgcAdQEQijt/TZBlj/mGMGWyMKXPGbmNMQtE2A/zcZywwCugMnABcYYw5wcd2\nNYA7qNikPZHF1wTqRx2lJO8ywazof//9d9q3b+9N8nFxcUyYMIE+ffoE5gAiEpH8SvTGmHPxLFGb\naK3NK2s7a+1BIAH4tzHmbD92fRaw0lr7W9HPjge6+djuMWA4nlXxopNmvAsLwbq17pdffqFdu3as\nKjqTSEhIYNKkSVx66aVV37mIRDR/K/prgb14lqotzwhgD9Dfj22bAIeOU15X1OZljDkDaGatneFf\nqBFKc9iHhWBMlvPjjz/Svn171q1bB0BiYiJTpkzhkksuqdqORSQq+NtHfwGQZa3dW96G1tp9xpis\nop+pEmNMDPAc0M+PbW8CbgJISUmp6qHdRxW96+Xlwfr1pdur8jJ9//33pKens6VogfukpCSmTZtG\nenp65XcqIlHF34o+BfilAvtdWfQz5VkPHDqVSNOitj/VAE4CFhhjVgF/B6YaY9qW3JG19nVrbVtr\nbdv69etXINQwoYre9datg8LC4m0NGkBSUuX2t2vXrmJJvnr16syaNUtJXkQqxN9EH0vFJsCxfu77\nS6ClMeYYY0wC0AeY6t2JtbustfWstc2ttc2BxUDXqBx1r4re9QLdP1+rVi0ef/xxAGrUqMHcuXNp\n37595XcoIlHJ30v3fwDHVmC/xwJby9vIWptvjBkIzMFzMvGWtfYHY8yjwFfW2qmH30MUUUXvesHo\nn7/xxhsBOPXUUznrrLOqtjMRiUr+JvovgQ7GmFrW2l2H29AYUwvoAMw73HZ/stbOBGaWaBtaxrap\nfkUbaQ4e9D2BeiSORQhjgajorbWllpT9M9mLiFSGv5fu/wPUxHPPe3n+jadv/T+VDUpK8NX527Bh\n5Tt/JSiqWtFnZWWRnp7O7t27AxaTiIi/if6/wGd4JrRZaIzJKOpTB7wT5WQYYxYAVwKfWmv/G/hw\no5RT655KhVSld2XOnDl06dKF7OxsOnfuzN695d7gIiLiF3/nurdFU9DOwXPb3Bwg3xizrWiTukX7\nMsD/gF5BiDV6BaPzVwKusuMlp0+fzqWXXsrBgweL9rOaLVu2cMQRRwQ4QhGJRn5PgWut3QycAwzB\nM8lNPNCo6Cu+qO0h4Fxr7ZbAhxrFNBDP9QoKfC9RW16inzx5Mj179vQm+ZSUFBYuXEiLFi2CEKWI\nRKMKLWpjrc0BngCeMMY0BRoXPbXRWrsu0MFJEd1a53obNkB+fvG2OnWgRo2yf2bChAlcddVVFBQU\nANCiRQvmz5/P0XptRSSAKr16XVFiV3IPBVX0rlfRl+jdd9+lX79+FBYNsmzVqhVZWVk0bdo0KPGJ\nSPTyd1GbdsYYv+/lMsacYozpW/mwpBhV9K5XkZforbfe4tprr/Um+RNOOIEFCxYoyYtIUPjbR59N\nifnmjTH3HTIYr6QewNtViEv+lJ9fuc5fCSl/K/qJEydy/fXXY61nosmTTz6Z7OxsGjduXHpjEZEA\n8DfRGx9t1YDaAYxFfNmwwTPS61B164JGZLuKv3dApqenc/rppwNwxhlnkJ2dTYMGDYIbnIhENb9H\n3YtD1D8fFvy9A/LII49k7ty59OvXj6ysLOrWrRv02EQkulV6MJ6EiCbLCQsVeZnq1avH22+rZ0tE\nQkMVvdtpshzXKyz0nehTUiwPP/wwY8eODX1QIiJFVNG7nSp619u0ybPu0KFq1rQMHz6Y4cOHExMT\nQ0JCAn369HEmQBGJahWp6CuyHr0Eiip61yt9LmZJSLiL4cOHA1BYWMi4ceO8I+1FREKpIhX9MGPM\nsJKNxpgCH9tKoKiid73i52KFwG1s3fqyt6Vr16588MEHpZafFREJhYok+op+Sql8qaqyOn9V0bvK\nXy9RIXAzMNr73KWXXsq4ceNISEjw8ZMiIsHn7+p1GrTnhI0bIS+veFvt2lCrljPxiE+eir4A6A/8\nNfDuiiuuYOzYscTFaSiMiDhHn0Bupmo+LPz+ez7QF/iPty019VreffdNYmNjHYtLRASU6N0tTCfL\n2b8fvv229Ej0SGSt5dNPrwYmHNJ6A8888xqxsboQJiLOU6J3szCs6LOyoEcP2LPH6UhCxQBdgA/w\nDEv5B/ASxxyjJC8i7qBE72ZhVtFbC7feGk1J/k9XA3nA98AIqlc3aGZbEXELJXo3C7OKfutW+Pln\np6NwynXef7VqBbqTTkTcQtcX3SzMKnpf4UaevcCtwPYyt7juujKfEhEJOVX0bmVt2E2W4yvcevXg\nxBNDH0sw5Ofv5rvvMtm9+1Nq1FjCKafMIy7ur1sda9SA7t2hf38HgxQRKUGJ3q22bIEDB4q3HXEE\nHHmkM/H4wVdF36sXvPJKyEMJuB07dtCpUyd2714CwJ49XzFgwHSuuuoqhyMTETk8JXq3Kquad3Hn\nb5gNKfDbtm3b6NChA0uXLvW2vfjii0ryIhIWlOjdKgwXswmzIQV+2bJlCxkZGXz33XfetldffZWb\nb77ZwahERPynRO9WYdY/D2EZ8mFt3LiR9PR0fvzxRwCMMYwePZr+6oQXkTCiRO9WYVbRWxt2IR/W\n+vXrSUtLY8WKFQDExMQwZswYrr76aocjExGpGCV6twqz6+A7d5aeKCcxERo2dCaeqli7di2pqan8\n9ttvAMTGxvL+++9z+eWXOxyZiEjF6T56twqzkW2+zktSUiAmDN9htWrVon79+gDEx8fz4YcfKsmL\nSNgKw4/hKFDWdXAXV/SR1D9fs2ZNZs+ezbnnnsukSZPo0aOH0yGJiFSaLt270fbtsG9f8bakJCiq\nMt0okvrnAWrXrs0nbxysxwAAHRNJREFUn3yCcfHtjCIi/lBF70ZlZU0XJ51wrui/++473nnnnVLt\nSvIiEglU0btRmPXPQ/hW9EuXLqVDhw5s27YNgH79+jkbkIhIgKmid6Mw65+H8KzolyxZQlpamjfJ\n33XXXezYscPhqEREAkuJ3o3CMGuGW0X/2WefkZGRwc6dOwFPn/zcuXM50sVrCYiIVIYSvRuFWdbc\nvRtKFsJxcXDUUc7EU55FixbRsWNH9hTd+F+3bl2ys7Np27atw5GJiASeEr0bhVlF7yvcZs0gNjb0\nsZQnKyuLTp06sa/oroYGDRqwYMECTjvtNIcjExEJDiV6Nwqzij5czktmz55Nly5dyMnJAaBx48Ys\nXLiQk046yeHIRESCR4nebXbuhF27irclJECjRs7E44dwGDs4bdo0unXrxoEDBwBo2rQpCxcu5Pjj\nj3c4MhGR4FKidxtf5bHL55INh7sBc3NzKSgoAKB58+YsWrSIli1bOhyViEjwuTd7RKtwKI9LCIeQ\ne/XqxdixY2nVqhULFy7kmGOOcTokEZGQUKJ3m3Aoj0sIl5CvvPJKli1bRkpKitOhiIiEjBK924RD\neVyCG0P+4IMP+OOPP0q1JyYmOhCNiIhzlOjdJlzK4yL79kHJfBoTA02aOBMPwKhRo7j88svJyMjw\nznonIhKtlOjdxo3l8WGsWVO6rWlTiI8PfSwAzz//PAMHDgRg2bJlDBo0yJlARERcQonebcKsonfT\nLf/Dhw/nrrvu8j7++9//zr/+9S9nghERcQklejfZuxdKXmp281yyuGeynMcee4z777/f+/j8889n\nzpw51K5dO/TBiIi4iBK9m/jKmk2bepK9Szld0VtrGTJkCEOHDvW2XXjhhcyaNYuaNWuGLhAREZdy\nbwaJRmHWPw/OVvTWWu677z6effZZb1uHDh346KOPSE5ODk0QIiIup0Qfavv2wZIlULRyWjFz5pRu\nc2Gi37gRvvkGCgpg2bLSz4ciZGstd955Jy+++KK3LTMzk//+979Uq1Yt+AGIiIQJJfpQ+v576NAB\nNm3y/2dcNhDvzTfhppugsLDsbUIRcm5uLkuXLvU+7tatGxMmTNB98iIiJSjRh9LDD1csyYOrKvr9\n++Guuw6f5I3xLFEbbNWqVWP69OlcdNFFNG3alPfff594p+7pExFxMSX6UPrss4r/TOvWgY+jkpYv\nh927D7/NMcdAqIrqGjVqMGfOHJKSkohz8YBFEREnadR9qBw4UPFq/tRT4eyzgxNPJfgaeFfSzTcH\n59h5eXnMmjWrVHuNGjWU5EVEDkOfkKHiawq55GRITy/dbgycfLLnOrmLlqf1dVNAixZw4omeX6Vz\nZ+jbN/DHPXjwIFdccQWTJk3ilVde4ZZbbgn8QUREIpQSfaj4ypKnnw5Tp4Y8lMry9SvcdBPcd1/w\njpmbm0vv3r2ZNm0aAAMGDOCUU07h3HPPDd5BRUQiiBJ9qITZ1La+hPpXyMnJoUePHsw55LbDu+66\ni3POOSd4BxURiTDuuS4c6cJwMpySQvkr7Nu3jy5duhRL8oMHD2bEiBEYY4JzUBGRCKREHyphXtFb\nG7pfYc+ePXTu3Jn58+d724YNG8YTTzyhJC8iUkG6dB8qYV7R79xZ+ta6xERo2DCwx9m1axedO3fm\n888/97Y9+eSTDB48OLAHEhGJEkr0oRLmFb2v8FNSAntTwI4dO7jooov48ssvvW0jRozg7rvvDtxB\nRESijBJ9KBw8COvXl25PSQl9LJUUigsS33zzTbFpbUeOHMltt90W2IOIiEQZ9dGHwrp1nk7uQzVs\nCElJzsRTCaG4IJGens748eOJj4/ntddeU5IXEQkAVfShEOb98xC6X+HSSy/ll19+4egw6tYQEXEz\nVfSh4OSi7QESjF9h7dq1bNy4sVS7kryISOAo0YeCr3I4zJJZoH+FVatW0b59e9LT09myZUvldyQi\nIoelRB8KquiL+fXXX2nfvj2///47P/74I5mZmRQebu1bERGpNCX6UAjzin73bti+vXhbXBw0blzx\nff3888+0a9eONUWL/CQmJvLoo48S46LFe0REIonjn67GmE7GmJ+NMSuNMff7eP4uY8xyY8wyY0yW\nMSZ8MuSfwnwwXln30MfGVmw/y5cvp3379mzYsAGAatWqMXXqVDIzMwMQpYiI+OJoojfGxAKjgM7A\nCcAVxpgTSmy2FGhrrT0FmAg8E9ooqyg/33N7XUlhVNEH4ta6ZcuWkZqayubNmwFITk5m5syZdOzY\nMQARiohIWZyu6M8CVlprf7PWHgTGA90O3cBam22t3V/0cDHQNMQxVs369VBQULytXj34//buPjqq\n8trj+HdDSECpIIooYIBapNRai7KsWr3ACAURoSrVaFGkXPXSRRdq0bay2nr1Vmu5otel2MqtglhR\nUo3FokUUAr0WWCpUW99TUQKUSoFAFRFC9v1jDmGSTMIkc+b991krKzPPeebMnsfgnv2c55xz+OGZ\niacNkp2QWLt2LcOGDWPr1q0AdO7cmSVLljBs2LBQ4hMRkeZlOtH3Aqpjnm8M2pozGXgupRGFLccv\nfQvJfYQ1a9YQiUTYHhzk79KlC0uXLuXss88OMUIREWlOzlwwx8wmAIOBIc1svwa4BqA0my4tm+PH\n5yG5jzBz5kx27twJwJFHHsnSpUs57bTTQotNRERalumKfhNwfMzz3kFbA2Y2HJgBjHX3z+LtyN0f\ndPfB7j64e/fuKQm2TQq8on/kkUcYOnQoRx99NMuXL1eSFxFJs0xX9C8D/c2sH9EEXwZcHtvBzAYB\nvwJGuXvuXVmlwCv6ww47jGeeeYZNmzYxYMCAMMMSEZEEZLSid/daYCqwBHgLWOjub5jZrWY2Nug2\nE+gMlJvZn81sUYbCbZscr+h374ZgDV29du2gVzMrKd5///0mbZ07d1aSFxHJkExP3ePuz7r7ie5+\ngrv/LGj7ibsvCh4Pd/ce7v7V4Gdsy3vMMjle0cf7ntK7N3To0LT9d7/7HQMHDuTuu+9OfWAiIpKQ\njCf6vFZXB8EV4BrIoYo+0UvflpeXM378ePbu3csNN9zA3LlzUx2aiIgkQIk+lf7+d9i3r2Fb167Q\npUtm4mmDRK7e+9hjj1FWVkZtbS0AJ5xwApFIJPXBiYjIISnRp1IB3Mxm3rx5TJgwof6mNAMGDGDl\nypXZdYqjiEgBU6JPpRy/mQ20/BHmzJnDpEmTcHcATjrpJFasWEHPnj3TF6CIiLRIiT6VcnwhHjT/\nEe6//36uueaa+iR/yimnsHz5cnr06JHW+EREpGVK9KmU46fWQfyP8OKLs5g6dWr988GDB7Ns2TKy\n6kJFIiICKNGnVo5X9Hv2RNcTNvQBs2bdXP/sjDPO4IUXXqBbt25pjU1ERBKjRJ9KOV7RV1c3bevZ\nsy9PPfUUHTp04JxzzuH555+nSw6dRSAiUmgyfQnc/OWe84m+uYV4o0ePZunSpQwePJjDc+h2uyIi\nhUiJPlU++ig69x2rc2fIoSnu6PcUB3YA0bgPHHkYMiTuTQRFRCTLaOo+VZo7Pm+W7kjabP16B64D\nTgc2Azm1xEBERFCiT50cn7avq6ujvHwKcC/wNyAC/DOXPoKIiKCp+9TJ4RX3+/fv5+qrr+a99x6O\naT0F6JIrH0FERAJK9KmSoxV9bW0tkyZN4tFHH41pnQA8DBQp0YuI5Bgl+lTJwYp+3759TJgwgYUL\nF8a0TgLmAO0B0CXsRURyixJ9quRYRb93717KysqoqKiIab0WmM2BpRw9ekCnTpmITkRE2kqL8VLB\nPacq+j179nDRRRc1SPIXXvg94AFi/0Sy+HuKiIg0Q4k+FbZtg08+adjWqRNk6bXgZ86cyeLFi+uf\nT58+nTFj/gdoeCpgln5PERGRFijRp0Jz0/ZZeg79jTfeyMiRIwG4+eab+cUvfsGGDU1jVUUvIpJ7\ndIw+FXLsPvQdO3akoqKC8vJyrrjiCswsl448iIhIC1TRp0K8ij6LsuSexpfmBTp16sSVV16JBbMO\nObaWUEREmqFEnwpZXNFv376ds88+m9tvv73FfqroRUTyg6buUyFLK/qtW7cyYsQIXnvtNV599VWK\ni4uZPn16k361tbBxY9PXZ8l3FRERaQUl+lTIwop+y5YtDB8+nDfeeAMAM6Nr165x+27eHE32sY46\nKnrzPRERyS1K9KmQZRX95s2biUQivPPOOwC0a9eOhx56iIkTJ8btr+PzIiL5Q4k+bDU1sHNnw7bi\nYjj22IyEU11dTSQSoaqqCoD27dszf/58LrvssmZfo+PzIiL5Q4k+bPHK4dJSaJf+dY/r168nEonw\nQZC5i4qKWLBgAePHj2/xdVk2ISEiIklQog9blhyfr6qqIhKJUF1dDUCHDh0oLy9n3Lhxh3xtlnwE\nEREJgRJ92LKgHK6treW8886rT/IlJSVUVFRw3nnnJfR6Td2LiOQPnUcftiwoh4uKinjggQcoKSmh\nU6dO/P73v084yYMW44mI5BNV9GHLgooeYPjw4Tz99NN07NiRoUOHJvy6ujrYsKFpuxK9iEhuUqIP\nW4bmvevq6mjXaMHfqFGjWr2fLVtg796GbV26QDOn3IuISJbT1H3YMjB1v3r1agYNGsSH8WYTWknH\n50VE8osSfZj+9S/Yvr1hW1ER9OyZsrf84x//yIgRI3j99deJRCJsjHft2lbQ8XkRkfyiRB+meFmy\nd+9osk+BZcuWMWrUKD7++GMAdu3axY4dO5Lapyp6EZH8okQfpjQuxHv++ec5//zz2b17NwA9evSg\nsrKSk08+Oan9qqIXEckvSvRhStPx+cWLF3PBBRfU31e+Z8+erFixgpNOOinpfauiFxHJL0r0YUpD\nRV9RUcGFF17I3mBpfGlpKStXrmTAgAGh7F8VvYhIftHpdWFKcUW/cOFCLr/8cvbv3w9Av379WL58\nOX1a8R61tVBZCe++G3+7KnoRkfyiRB+mFFb0lZWVXHbZZdTV1QHQv39/li1bRu/evRPeR10dfPOb\nsHhx4u/buTN069baaEVEJFto6j5MKazozzrrLMaMGQPAwIEDWbFiRauSPMDKla1L8hAN36x1rxER\nkeyhRB+WTz+Fjz5q2GYWPb0uBMXFxSxcuJBp06ZRWVnJcccd1+p9vPpq6983hPV9IiKSQZq6D0u8\naftevaC4OLS3KCkp4Z577mnz6+NNOLSkuBimTm3z24mISBZQog9LyMfn77rrLmpqarjtttvaHlMj\n8UIcNQr69Wva3q0bXHwxDBoU2tuLiEgGKNGHJcTj87fffjszZswAolP2P/7xj5MI7KB4If70p3DG\nGaHsXkREspCO0YclhPPS3J1bbrmlPskDvPDCC/XnzCfDXafOiYgUIlX0YUnySjPuzowZM7jjjjvq\n2yKRCIsWLaI4hOP8NTXRe+7EKimBY45JetciIpLFlOjDkkS57O5Mnz6dWbNm1beNHDmSiooKOnXq\nlLLw+vSBdprTERHJa0r0YWljRV9XV8e0adO477776tvGjBlDeXk5HTt2zHR4IiKS45Tow/DZZ7B5\nc9P20tIWX1ZXV8eUKVN48MEH69suuugiFixYEMp0fSwdnxcRKUyauA1DdXXTtmOPhUNU5NOmTWuQ\n5MvKynj88cdDT/Kgil5EpFAp0YehjeXyJZdcwmGHHQbAFVdcwfz58+nQoUO4sQVU0YuIFCZN3Yeh\njRfLOeecc3jmmWd48sknuffee2nfvn34sQVU0YuIFCYl+jAkcbGcSCRCJBIJN544VNGLiBQmTd2H\nIYGKfs+ePXznO9+hqqoqPTHF2LULduxo2FZUBG24L46IiOQYJfowHKKi3717N2PHjuXhhx8mEonw\nQWvvLpOkeN9DSkshhUcKREQkSyjRh6GFefGPP/6Y888/n6VLlwJQXV1NRUVF+mIj1Mvwi4hIjtEx\n+mTt2webNjVtLy1l165djB49mpdeeqm++dZbb+X6669PY4Ch31hPRERyiBJ9sjZuhLq6hm3du1Oz\nbx+jRo1izZo19c133nknN910U5oDVEUvIlLIlOiTFadc3tarF98491zWrl1b33b33Xdz3XXXpTOy\neqroRUQKlxJ9shqVyx8BI9av5/WdO+vbZs+ezZQpU9IbVwxV9CIihUuJPlkx5fI2YBjwZpDkzYw5\nc+YwefLkzMQWUEUvIlK4tOo+WTHlchfgpOBxu3btmDdvXsaT/CefwNatDdvatYNevTITj4iIpJcq\n+mTFlMtFwG8Avv51Lpo6lbKyskxFVW/DhqZtvXtDii6pLyIiWUaJPlmNDoB3AJ6YPRv7ylcyEk5j\nOj4vIlLYNHWfhPfefpufffgh3qjdsugAuI7Pi4gUNlX0bfTWW28RGTKELXV1fAL8DDCAI4+EI47I\nbHAxVNGLiBQ2VfRt8Je//IUhQ4awJVjldg+w/sDGLCuXddc6EZHCpkTfSuvWrWPYsGFsDZL84cBz\nwOcPdMiycln3oRcRKWxK9K3w8ssvE4lE2LZtGwBHlJTwPDAktlOWlcuq6EVEClvGE72ZjTKzd8ys\nysx+GGd7iZk9EWxfY2Z90x8lrFq1iuHDh1NTUwNA165dWTpyJGc17phF5fKePbBlS9P2449Pfywi\nIpIZGV2MZ2btgfuBEcBG4GUzW+Tub8Z0mwzscPcvmFkZcCdwaapj2/lhDeuu/SUAr21fz4/WzuXT\n/XsBOKJDJ+4cOIETVi1t8ronX+3Lez9PdXSJCb6TNNCzJ5SUpD8WERHJjEyvuj8dqHL39wHM7HFg\nHBCb6McBtwSPfwvcZ2bm7o3PagvVzvXbGbrkRywDbgY+Ddq7Ay/u+5STV90X93X/9Wgf/pzKwJKU\nRRMOIiKSBpmeuu8FVMc83xi0xe3j7rXATuCotEQHdAbaB4+PBSqBk1vo/wF9UxxRcnR8XkSksGQ6\n0YfGzK4xs1fM7JWtjS/unoTTgT8AXwRWAF9qoe9GelFD19DeOxVObulbioiI5J1MJ/pNQOzSsN5B\nW9w+ZlZE9N4x2xrvyN0fdPfB7j64e/fuoQZ5FvBX4MRD9Ludmwkum5OVevaEiRMzHYWIiKRTpo/R\nvwz0N7N+RBN6GXB5oz6LgInAKmA8sCzVx+cBOvfqQuXXfpBQ39qijvytdBhHlA4hsVekX69eMH48\nHHdcpiMREZF0ymiid/daM5sKLCF6KPwhd3/DzG4FXnH3RcCvgflmVgVsJ/plIOW69T+KoasTXz4/\nPIWxiIiItFWmK3rc/Vng2UZtP4l5vAf4VrrjEhERyQeZPkYvIiIiKaRELyIikseU6EVERPKYEr2I\niEgeU6IXERHJY0r0IiIieUyJXkREJI8p0YuIiOQxJXoREZE8pkQvIiKSx5ToRURE8pgSvYiISB5T\nohcREcljSvQiIiJ5TIleREQkj5m7ZzqG0JnZVuDDEHd5NPDPEPdXqDSOydMYJk9jmDyNYfLCHsM+\n7t493oa8TPRhM7NX3H1wpuPIdRrH5GkMk6cxTJ7GMHnpHENN3YuIiOQxJXoREZE8pkSfmAczHUCe\n0DgmT2OYPI1h8jSGyUvbGOoYvYiISB5TRS8iIpLHlOhjmNkoM3vHzKrM7IdxtpeY2RPB9jVm1jf9\nUWa3BMbwBjN708xeN7MXzaxPJuLMZocaw5h+F5uZm5lWP8eRyDia2SXB3+MbZvZYumPMdgn8ey41\ns+Vmti74Nz06E3FmKzN7yMw+MrO/NrPdzOzeYHxfN7NTUxKIu+sneviiPfA34PNAMfAa8KVGfb4L\n/DJ4XAY8kem4s+knwTEcBhwWPJ6iMWz9GAb9PgesBFYDgzMdd7b9JPi32B9YBxwZPD8m03Fn00+C\nY/ggMCV4/CXgg0zHnU0/wL8BpwJ/bWb7aOA5wIAzgDWpiEMV/UGnA1Xu/r677wUeB8Y16jMOmBc8\n/i1wrplZGmPMdoccQ3df7u67g6ergd5pjjHbJfJ3CHAbcCewJ53B5ZBExvFq4H533wHg7h+lOcZs\nl8gYOnBE8LgLsDmN8WU9d18JbG+hyzjgEY9aDXQ1s+PCjkOJ/qBeQHXM841BW9w+7l4L7ASOSkt0\nuSGRMYw1mei3WTnokGMYTO8d7+6L0xlYjknkb/FE4EQze8nMVpvZqLRFlxsSGcNbgAlmthF4Fvhe\nekLLG639f2abFIW9Q5FEmNkEYDAwJNOx5BIzawfMAq7KcCj5oIjo9P1QojNLK83sZHevyWhUueUy\nYK6732VmZwLzzezL7l6X6cDkIFX0B20Cjo953jtoi9vHzIqITlVtS0t0uSGRMcTMhgMzgLHu/lma\nYssVhxrDzwFfBirN7AOix/UWaUFeE4n8LW4EFrn7PndfD7xLNPFLVCJjOBlYCODuq4CORK/hLolJ\n6P+ZyVKiP+hloL+Z9TOzYqKL7RY16rMImBg8Hg8s82BFhQAJjKGZDQJ+RTTJ65hoUy2OobvvdPej\n3b2vu/clus5hrLu/kplws1Yi/56fJlrNY2ZHE53Kfz+dQWa5RMZwA3AugJkNJJrot6Y1yty2CLgy\nWH1/BrDT3f8e9pto6j7g7rVmNhVYQnS16UPu/oaZ3Qq84u6LgF8TnZqqIrrAoixzEWefBMdwJtAZ\nKA/WMW5w97EZCzrLJDiGcggJjuMS4Btm9iawH7jR3TVDF0hwDL8PzDGz64kuzLtKxc9BZraA6JfJ\no4N1DD8FOgC4+y+JrmsYDVQBu4FJKYlD/01ERETyl6buRURE8pgSvYiISB5TohcREcljSvQiIiJ5\nTIleREQkjynRi0iLzOyq4C55V2U6FhFpPSV6EclKZlZpZjr/VyRJumCOiBxKBdEr8IV+xS4RST0l\nehFpkbvvJHqnRhHJQZq6FykwZtY3OOY+18y+aGZPm9l2M/vEzP7PzL7RqH+DY/Rm1tHMaszso+Dm\nTvHe44HgNWMatZ9rZn8I3u8zM3vXzH5uZl0ax0dwZ8NgPwd+KkMeDpG8p0QvUrj6AauAbkRvNFQO\nnAY8Z2aXNvcid98DPAF0B85rvN3MSoBLgX8Af4hpvxZYCnyd6A1l7iZ6z4gfAH8ys65B1xrgP4EP\ng+f/GfMzt02fVKSA6Vr3IgXGzPoC64On/+3uN8ZsG0w0+X8M9HH3XUEl/zAwyd3nBv3OBP4EPOnu\n4xvt/1tEb106y92/H7T1IXob2M+A09397Zj+s4EpwBx3vyamvRIY4u4W1mcXKUSq6EUK107g1tiG\n4Ha3vwG6Ahc298Lg3uPvAheYWbdGmw/cynleTNsEoBi4LzbJB2YA/wKuCGYDRCRESvQihWutu/8r\nTntl8HvQIV4/j2jyrr9ds5n1AEYC69z99Zi+pwa/lzXeibvvANYRvZf5FxOKXEQSpkQvUrj+0Uz7\nluB3l2a2H/AIUMfBCh7g20TP5pnXqO+BfTV3it6B9q7NbBeRNlKiFylcPZppPzb43eIpde6+kWiF\nfrqZHajEJwL7gMcadT+wr2OJ77hE3lNEWk+JXqRwnWpmn4vTPjT4vS6BfcwNfk80s68CXwGec/et\njfod2NfQRu0Eq+2/CuwB3orZtD/Y3j6BOESkGUr0IoWrC/CT2IZg1f23iVbWFQns4ylgF9HFdlcF\nbXPj9HuUaKX/PTP7QqNttwFHAI+6+2cx7duC36UJxCEizdCV8UQK10rg383sa8BLRKfPLyVaAFzr\n7rsOtQN3/9TMyoHJwHeJJufFcfp9YGbXAfcDa81sIbCV6EVxzgTeJno+fawXgW8BT5nZs8CnwIfu\nPr8tH1akUKmiFylc64GzgB3AfwCXAGuB0e7+RCv2Mzf43QFY4O5743Vy99lEV+SvBi4GbgCOAWYC\nZ7r79kYv+V/gDqIzDzcRrfwntyIuEUEXzBEpODEXzJnn7ldlNBgRSTlV9CIiInlMiV5ERCSPKdGL\niIjkMR2jFxERyWOq6EVERPKYEr2IiEgeU6IXERHJY0r0IiIieUyJXkREJI8p0YuIiOSx/wdSwEuL\nP6kZDwAAAABJRU5ErkJggg==\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHpCAYAAABqV/58AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd3hURRfA4d+kFyB0kCC9qIiKRCwI\nhN4hNAVURJSmIPKBKCBVFCmKonQUQVEQpHcIRUEQUcSCSpEivfdA2nx/3CUku5tkk2zf8z5PHtiz\nd+8eIOTsnTtzRmmtEUIIIYR38nN1AkIIIYRwHCn0QgghhBeTQi+EEEJ4MSn0QgghhBeTQi+EEEJ4\nMSn0QgghhBcLcHUCjlCwYEFdqlQpV6chhBBCOMXPP/98TmtdyNpzXlnoS5Uqxa5du1ydhhBCCOEU\nSqkj6T0nQ/dCCCGEF5NCL4QQQngxKfRCCCGEF5NCL4QQQngxKfRCCCGEF/PKWfe2uHLlCmfOnCEh\nIcHVqYhsCAwMpHDhwuTJk8fVqQghhFvzyUJ/5coVTp8+TWRkJKGhoSilXJ2SyAKtNXFxcRw/fhxA\nir0QQmTAJ4fuz5w5Q2RkJGFhYVLkPZBSirCwMCIjIzlz5oyr0xFCCLfmk4U+ISGB0NBQV6chcig0\nNFRuvQghRCZ8stADciXvBeTfUAghMuezhV4IIYTwBVLohRBCCC8mhd5DDR8+nIIFC7o6DYf5448/\nUEqxefNmV6cihBAeTQq9EEII4cVcWuiVUp8ppc4opf5I53mllJqolDqglPpNKfWws3MUaSUkJJCU\nlOTqNIQQQtjI1Q1zPgc+Aeak83xjoLzp61FgiulXkYkLFy7w5ptvsnTpUi5fvszDDz/MhAkTePTR\nO39977//PvPmzWPfvn2EhIRQrVo1JkyYQLly5VKOiY6OpmDBgjRo0IAxY8Zw+PBhDh8+zKeffson\nn3zC+vXr6dmzJ7/99hsVK1Zk4sSJ1KhRI00uM2fOZMKECRw4cICiRYvyyiuvMGDAgDTHTJ48mdGj\nR3PhwgXq1KnDq6++6ti/ICGE650+DUOGwM8/g5deQMTdNP6Yt25af/7UvbWp9csEh+bg0kKvtf5O\nKVUqg0NaAnO01hrYoZTKq5S6S2t90p55uNMqLa1zfo5bt25Rr149Ll26xLhx4yhcuDBTpkyhXr16\n7N+/n6JFiwJw7NgxevXqRcmSJbly5QpTp07liSeeYP/+/URERKScb9u2bRw8eJAxY8YQFhaW8tyN\nGzd4/vnn6du3L0WLFmXEiBG0bt2aI0eOEBYWBsC4ceMYNGgQAwYMIDo6mp9//pkhQ4YQFhZGr169\nAFi6dCmvvPIKPXr0ICYmhi1bttClS5ec/0UIIdzXjRtQuzb89ZerM3GoUKBUqsdxwFmghOnxxVOl\nHZ+E1tqlX6a/gz/SeW4F8GSqx7FAVGbnrFq1qs7I3r170zw2yqt7fNlq2LBhukCBAlafmzlzpg4M\nDNT79u1LiSUkJOgyZcro/v37W31NYmKivnHjhs6VK5eePXt2SrxWrVo6JCREnzp1yuL9AR0bG5sS\n2717twb06tWrtdZaX758WYeHh+vhw4enee2QIUN0kSJFdGJiotZa60ceeUQ3atQozTEvvfSSBvSm\nTZsy/Hsw/7cUQniIN990/Q9cJ39dA10HdAnQh02x7XfF2OWvE9il06mJXjMZTynVTSm1Sym16+zZ\ns65Ox6U2bNhA1apVKV26NImJiSQmJgJQq1Ytdu3alXLcjh07qF+/PgUKFCAgIICwsDCuXbvGvn37\n0pyvatWqFClSxOJ9goKCiI6OTnl83333AcZIAcD27du5fv067dq1S8kjMTGROnXqcPr0aY4dO0Zi\nYiK//PILLVu2THPu1q1b2+XvQgjhhn7/HcaPd3UWTnUV4170RuAoUBu44qT3dvU9+swcB+5O9bi4\nKWZBaz0dmA4QFRVlhwFwz3Xu3Dl27NhBYGCgxXNly5YF4OjRozRo0IBq1aoxbdo0ihUrRlBQEE2b\nNuXmzbQ3k6wVeYDcuXPj53fns2JQUBBAyuvPnTsHQKVKlay+/r///iM4OJikpCQKFy6c5jnzx0II\nL5GcDN27g+kCxBdcxijy21PFugLO2o7L3Qv9MqCXUmoexiS8y9rO9+e9Uf78+YmKimLKlCkWzwUH\nBwOwZs0abty4wdKlSwkPDwcgMTGRCxcuWLwmu61m8+fPD8CKFSusflioWLEioaGh+Pv7W2xOI5vV\nCOGlZsyA7dst4/36wbPPOj8fB3j3XfhmgfH7RC7zLy8Tx96U59947n+0bvoc/wCRd0VYP4kdubTQ\nK6W+BqKBgkqpY8AwIBBAaz0VWAU0AQ4AN4AXHJGH9rLr/7p167Ju3TpKlCiR7pVxXFwcfn5+BATc\n+Rb45ptvUob57eHxxx8nNDSUEydO0LRp03SPq1KlCkuXLqVHjx4psUWLFtktDyG81uXL8MMPcO2a\nqzOxTWIivPGGZbxCBXjnHTBdiLhSXJzxV2rlmscmZ8/CWwvBKCvnMMrWnSL/8ccfp0xEdhZXz7rv\nkMnzGnjFSel4nPj4eBYuXGgRb9y4MVOnTiU6Opr+/ftTpkwZzp8/z86dOylatCh9+/alTp06JCUl\n8cILL/Diiy/y559/Mn78ePLmzWu3/PLmzcvw4cPp06cPR44coWbNmiQnJ7Nv3z42bdrE4sWLARg0\naBCtW7emZ8+etGrVii1btrBmzRq75SGEV9q5Exo2hEuXXJ1Jzk2b5hZF/vRpqFUL/vnHLmcD6gF3\n2sRMmzaNbt262ePkWeLuQ/ciA1evXqVdu3YW8U2bNrFp0yaGDh3KsGHDOH36NIULF6ZatWq0aNEC\ngMqVK/P5558zfPhwFi9ezIMPPsiCBQt4+umn7ZrjgAEDKFasGBMmTOD9998nJCSEChUqpHmfVq1a\n8fHHH/Pee+8xe/ZsoqOj+fTTT2nYsKFdcxHCa1y/Du3aeUeR79wZUk3qdaUxY+xV5JOBZtwu8kop\nPv30U154wSGD0plS2tvGrTEm46WeXW7ur7/+4t5773ViRsJR5N9S+KT+/eH9912dRc4VLAh//w0F\nCrg6E7SGu++G41ane2fHRqApEM+cOXN47rln7HViq5RSP2uto6w9J1f0QgjhSXbvhg8/dHUWOefn\nB1OmuEWRB6M5n/2KPEAdQkOXMmLEZZ57znLk1Zmk0AshhKdISjKWppm3iw0JgWbNXJNTduTPD089\nBXXrujqTFEuWWMZKloRHHrHt9VrrNCuUiheHLl0aULmynRLMASn0QgjhKSZPhp9+sowPHQoDBzo/\nHy9irdC/8Qb07Jn5a//55x+6dOnC3LlzKVWqlN1zyykp9EII95eUBCNHwvLlnrOUzBGOHrWMVapk\nrEEX2bZ/P/z5p2XcNHc5Q3v37k3p9lmnTh22bNnC3XffnfkLnUgKvRDC/fXubdzPFZamTQNTV0qR\nPdau5qtVg8jIjF/322+/Ua9ePW63XT99+jSHDx92u0LvNb3uhRBe6rvvpMinp1s3qF7d1Vl4PGuF\nPiYm49f88ssv1K5dO6XI58qVizVr1lhs0+0OpNALIdzXrVvG5DNhqXBheO89V2fh8U6dst6RN6NC\nv3PnTurWrZvSMjwiIoL169e7ZZEHKfRCCHc2bpyxzlqkVbw4rFoF+fK5OhOPt3y5ZRv0ihUhvfYc\n27Zto169elwyNSvKly8fsbGxPPbYYw7ONPvkHr0Qwj3t2wejRlnGH30UZs+GbG625PGCgox1X776\n57ezrAzbb968mWbNmnH9+nUAChYsyIYNG3jwwQcdmGHOSaH3UMOHD2fEiBE0aNCAtWvXpnmubdu2\nnDt3js2bN9t0rsOHD1O6dGmWL19OM09aiys8y8GD8OOPkJBg2/EzZxpD96n5+8P06cYllxA5dPUq\nbNhgGbdW6Hfv3k2TJk2Ii4sDjO27Y2Nj092G251Iofdw69at46effuIRW7s6WHHXXXexfft27rnn\nHjtmJkQqU6fatiA5M/36wQMP5Pw8QgBr1kB8fNpY0aLGjHtz999/Pw0bNmTJkiUUK1aMjRs3UtFD\nPnDKPXoPlj9/fipXrsw777yTo/MEBwfz2GOP2XXnOiFS7NkD9tiWs1QpGDYs5+cRAkhOhgkTLOMt\nWxrdec0FBgYyb948unbtypYtWzymyIMUeo+mlGLw4MEsW7aM33//3eoxJ0+epEuXLpQpU4bQ0FAq\nVKjAW2+9RXyqj7GHDx9GKcWKFSsA6Ny5s9URgkmTJhEWFsbVq1cBSE5O5r333qNcuXIEBwdToUIF\nZs+e7YA/qfBY6bVszY7JkyEsLOfnEQLjDlBWZ9sHBwczffp0ypUr57jEHEAKvYdr164d5cuXT/eq\n/ty5c+TPn58PPviANWvW8PrrrzNr1ix69+6d7jmffvppdu3axaFDh9LE58+fT5MmTcidOzcAvXv3\nZtSoUXTr1o2VK1fSqlUrunTpkvKBQQimTjXuy+dU9+7QuHHOzyMEcPIkvPmmZfyee6BePeP3X331\nFUOGDMEbdniVe/TgXrNXs/hN5efnx8CBA3nxxRcZOXIkFSpUSPN85cqVGT9+fMrj6tWrEx4eTpcu\nXfj4448JstJRq379+hQoUID58+fzpul/w/Hjx9m6dSvffPMNAAcOHGDKlCnMmjWL559/HoB69epx\n8uRJRowYIZP6BJw4Yb3/esmSUKuWbecICoInn4TnnrNvbsKn9e0Lly9bxqdNg4AAmD17Ni+88AJa\nawIDAxk6dKjzk7QjKfRe4Nlnn2XEiBGMHj2aWbNmpXlOa81HH33E9OnTOXToEDdv3kx57ujRo1aH\noAICAmjdunWaQr9gwQLCw8Np2rQpALGxsfj5+dGqVSsSExNTXlu3bl2+/vprkpKS8Pf3d8QfV3iK\nPn2Mac3mvvzSKN5CuMDq1TB/vmW8SxeoWRNmzJhB9+7dU67kFyxYQL9+/QgPD3dypvYjQ/deICAg\ngAEDBvDll19y5MiRNM99+OGH9O/fn1atWrF06VJ27tzJpEmTANIUfXPt27fn119/Zd++fYAxbN+i\nRQtCQ0MB45ZAUlISERERBAYGpnx17tyZxMRETp486aA/rfAIK1bAwoWW8a5dpcgLh7l+3fh8WaEC\n3H239a82bSxfV7AgjB1rzEPq1q1bSpF/6KGH2LRpk0cXeZAreq/RpUsXRo0axZgxY9LEFyxYQNu2\nbdPcw9+7d2+m56tVqxZFihRh/vz5dOrUiR07djAw1TBs/vz5CQgIYNu2bfhZmaJauHDhHPxphEdL\nTIRXX7WMS8tW4WBvvAGm65gsmTABZs/+gH6pdgGMiopi7dq15M+f344ZuoYUei8RHBxM//79GThw\nIFWrViUwMBCAuLg4goOD0xw7d+7cTM/n7+9Pu3btmD9/PiEhIeTNm5dGjRqlPF+nTh2SkpK4fPky\n9evXt+8fRni2778Hs4mcgPHT1At+aAr3FBcHZncubVK3Lvz333sMGnTnQuaxxx5jzZo1RERE2DFD\n15GhezAmwLnLVw50796d3Llz88MPP6TE6tevz/z585k8eTJr166lU6dOHDhwwKbzPf300/z5559M\nmDCBmJiYNBP3KlasSI8ePWjfvj1jxowhNjaWlStXMnbsWF566aUc/TmEh7PWU7R2bejQwfm5CJ+x\nYQPcuJG11wQFaSpVGpmmyNeoUYN169Z5TZEHKfReJSwsjL59+6aJDR06lA4dOvDWW2/RoUMHgoKC\nmDhxok3nq169OnfffTcnT56kffv2Fs9PmjSJIUOGMGfOHJo0aULnzp1ZuXIlNWvWtMufR3ggra0X\n+i5d3Gt1i/A61r7tMlKkCLRv/w4TJ95pwlS7dm1Wr16dsoTYWyhvWCNoLioqSu/atSvd5//66y/u\nTW9rIuFR5N/SzezeDQ8/nDYWEABnzshOa8JhkpKM1rXnzqWNf/bZnXXxqfn5QbFisG3bVho1asT1\n69dp0KABixcvJsxDmzIppX7WWkdZe07u0Qsh7MfaZVV0tBR54VA//GBZ5END4emnM26m+OSTT7Ji\nxQomT57MnDlzCAkJcWyiLiKFXghhP4sXW8Yy6ikqhB1Y+3zZoIFtHZOjo6OJjo62e07uRO7RCyHs\n4+BBsLbnQosWzs9F+Iz0poWYf75MSkpiwIAB7N+/3zmJuREp9EII+1i61DIWFWV0KRHCQX7/Hf79\nN23Mzw9Sd+FOTEykc+fOjBs3jjp16ljs4+HtpNALIezDlssqIezM2rddjRpGtzuAhIQEnnnmGb78\n8ksAjh07xvTp052Yoev57D16rTVKlvt4NG9cMeKxzpyBbdss41LohYNl9PkyPj6e9u3bszjV3JHu\n3bunu9unt/LJK/rAwEDi4uJcnYbIobi4uJQOgMLFli+H5OS0sXLl4L77XJOP8AlHjhgrOs3FxBh7\nebRu3TpNke/duzdTpkyx2rbbm/nkFX3hwoU5fvw4kZGRhIaGypW9h9FaExcXx/HjxylSpIir0/FM\nX38N48YZPynt4fp1y1hMjDTJEQ5lbVrIQw9B4cI3aNmyFevWrUuJ9+/fn7Fjx/rkz3ufLPR58uQB\n4MSJEyQkJLg4G5EdgYGBFClSJOXfUmTBypXQsaPj36dVK8e/h/Bp1obtmzS5TrNmzdm0aVNKbPDg\nwbz99ts+WeTBRws9GMVeioTwOdeuwcsvO/59ihSBRx91/PsIn3X+PHz3nXn0GqtXN2b37q0pkREj\nRjB06FCn5uZufOtGhRC+bvhwOHrU8e/z7LPg7+/49xE+a8UKo/VtaiVLhlChQmTK49GjR/t8kQcf\nvqIXwufs3g0ffujY9/Dzg+bNYcgQx76P8HnWhu1btQpg7NgvSEhI4Mknn7TY5MtXSaEXwhckJUG3\nbpaXQCEhsGMHFC9un/cJCzOajAvhQDduwNq1lvGYGGP+zoIFC3xuZn1GpNAL4YlOnIB16+DCBduO\n/+svsLaj49Ch8OCD9s1NCAdbvx6MFdKngS+AfhQooKhe3XheinxaUuiF8DS7dxt7b9pa5NNTqRL0\n62efnIRwImPY/gRQF/gbOEezZqMJCPDNWfWZkY89QniS+HhjoltOizzA9OkQFJTz8wjhRImJsGTJ\nf0AtjCIPMJ6HH7ayoZIApNAL4VnGjYO9e3N+nu7d4Ykncn4eIZxs4cJDXLpUEzhgigQQFDSPrl0f\ncGVabk2G7oXwFAcOwNtv5/w8998Po0fn/DxCONmBAwfo2rUO8J8pEggsoGnTljIHNANS6IXwBFpD\njx5w61bauL+/0QAnwMb/yhUqwNNPQ7589s9RCAf6+++/qVu3LteunTBFgoHFQGPZOykTUuiF8ARz\n50JsrGW8b19jOF8IL/bHH39Qr149Tp8+bYqEAsuAevj7p917XliSQi+Eu7lyBXr3Nlp/3bhhxMyv\n5AFKlDA63Qnhxfbs2UO9evU4d+6cKRIOrACiAahVC/Lnd1FyHkIKvRDu5s03Yc6czI+bPBnCwx2f\njxAuFB8fT3x8PAB+frlJTl4NVE95XobtMyez7oVwJzdvwhdfZH5cu3bQtKnj8xHCxR555BFWr15N\nRERxkpPXk7rIA7Rs6Zq8PIlc0QvhTjZuNHaYy0hEhON71gvhRiIjnyA+/gDGBLw7qlc37mCJjMkV\nvRDuxNpOHandf7/R+rZYMefkI4STbd68mb2pekVoDb16QVxc2iKvFLz/vrOz80xyRS+Eu0hKgqVL\nLeNffWXciPTzg+Bgy+eF8BLr1q2jZcuW5M2bly1btlChQgUWLTLmpZp7+WV49FHn5+iJ5IpeCHfx\n449w5kzaWHCwse1raKgUeeHVVq5cSfPmzbl58yanTp2iU6dOXLqk6d3b8ti77oJ33nF+jp5KCr0Q\n7sLasH39+pArl/NzEcKJlixZQqtWrVJm15coUYK5c+cyZIji5EnL4ydONKaqCNtIoRfCHWgNixdb\nxmXtkPByCxYsoF27diQkJABQunRpvvvuO86fL8ukSZbHN2sGbdo4OUkPJ/fohXAHe/cavexT8/Mz\nhu2F8FJz586lU6dOJCcnA1C+fHk2btxIkSLFadnS+PybWlgYfPKJMRFP2E6u6IVwB9aG7atXh8KF\nnZ+LEE4wa9YsnnvuuZQif++997JlyxaKFy/ORx/Bnj2Wrxk5EkqWdHKiXkAKvRDuwFqhl2F74aWm\nTZtGly5d0KZL9sqVK7N582buuusuDh+GYcMsX/PQQ9Cnj3Pz9BZS6IVwtf/+g127LOPS8kt4Ia01\nP/74Y8rjhx56iNjYjRQqVJjkZHjllTtbPNymFEyfbvsmjSIt+WsTwtWWLbOMVa4MZcs6PxchHEwp\nxYwZM7h16xa//rqfxMS1FCmSz+J+fGq9esEjjzgvR28jhV4IV5PZ9sLH+Pv7M3XqbCpUiOPUqdwZ\nHhsZCaNGOSkxLyVD90K40sWLsHmzZbxVK6enIoQjaK1ZuXJlyv3422JjAzIt8gAffwx58jgqO98g\nhV4IV1q50mh9m1qJEsbMIyE8nNaawYMH06xZM1577bU0xT6zbR3AmKYig1s5J4VeCFdKb7a9LBQW\nHk5rTf/+/Rk9ejQAEydO5LPPPgMgMRGWL0//tblzQ8eOMHeu/FewB7lHL4SrxMXBmjWWcbmEER4u\nOTmZPn368Mknn6TEmjVrxjPPPAPA1q1w4ULa14SFwblzxrYOwr6k0AvhKrGxcP162li+fFCjhmvy\nEcIOkpOT6dGjBzNmzEiJtW7dmq+//pqgoCDA+kBWo0ZS5B1Fhu6FcBVrP+2aN5fFwsJjJSUl8eKL\nL6Yp8k8//TTz5s1LKfJaS38oZ5NCL4QrJCVZXz8vP+2Eh0pMTKRTp058/vnnKbHnnnuOL7/8ksDA\nwJTYnj1w5Eja1/r7Q9OmTkrUB0mhF8IVtm+Hs2fTxkJCoEED1+QjRA4kJCTQsWNHvvrqq5RYly5d\nmDVrFgFmI1TWruZr1YL8+R2dpe+SQi+EK1j7adegAYSHOz8XIXLo8OHDbNiwIeXx7Xv0/v7+FsdK\nfyjnk0IvhLPJ3vPCy5QvX57169cTERHBq6++yuTJk/Hzsywv//4Lv/1m+XrZ1sGxZNaPEM5y65Zx\nb37vXuMnXmqy97zwcFWrVuXXX3+lZMmSqHQWvy9dau11Ro8o4Tguv6JXSjVSSv2jlDqglHrTyvMl\nlFKblFK7lVK/KaWauCJPIbLt0CF44gljoXB4uPXdOWrUgIIFnZ+bENlw7do1/vrrL4t4qVKl0i3y\nILPtXcWlhV4p5Q9MAhoD9wEdlFL3mR32FvCN1roK0B6Y7NwshciBuDjj3vv27ZCcnP5x8tNOeIgr\nV67QqFEjatasyR9//GHz6w4eNBrlmJNvfcdz9RV9NeCA1vpfrXU8MA8wv1ujgdtbGkQAJ5yYnxA5\nM2oUHDiQ+XFyk1J4gEuXLtGgQQO2bdvGuXPnqFevHhcvXsz0dVrDyy9bftYtWxYqVXJQsiKFq+/R\nRwL/pXp8DHjU7JjhwDqlVG8gHKjnnNSEyKE//4SxYzM/rm5dKF3a8fkIkQPnz5+nQYMG/PLLLymx\ngQMHki9fvkxfO28erFtnGX/2Well7wyuvqK3RQfgc611caAJ8IVSyiJvpVQ3pdQupdSus+brk4Vw\ntuRk6N7d2L3DXGio8RURYUzAmz3b+fkJkQVnz56lTp06aYr85MmT6dOnT6avvXgRXnvNMl6iBPTv\nb88sRXpcXeiPA3enelzcFEvtReAbAK31diAEsJi1pLWerrWO0lpHFSpUyEHpCmGjTz+Fbdss40OH\nwo0bxtelS0Z3vMhI5+cnhI1OnTpFdHQ0v5nWxSmlmDlzJj179rTp9W+8AWfOWMYnTYJcueyZqUiP\nq4fufwLKK6VKYxT49kBHs2OOAnWBz5VS92IUerlkF+7r9GkYMMAyXr48DBzo/HyEsNGRI/DNN3DC\nNBPq2rXjfPttHS5e3AeAUn40aPA5f/zxHH37Zn6+mzchVdv7FG3aQLNmdkxcZMilhV5rnaiU6gWs\nBfyBz7TWfyqlRgK7tNbLgH7ADKVUX4yJeZ211tp1WQuRib59jat1c1OnGm1uhXBD//xjrPK8c+fz\nKFAHOGh67I/WX7J2bXvWrs3+++TODRMn5ihVkUWuvqJHa70KWGUWG5rq93uB6s7OS4hsWbsWvv7a\nMv7881CnjvPzEcIGycnQpYv59gszuVPkAzAWRbXJ8XuNHg3FiuX4NCILXH2PXgjvceMGWLtvWaAA\njB/v/HyEsNHMmfDDD+bR4UAXIAhYhD2KfLVq0KNHjk8jskgKvRD2MmqU0QXP3Pjx0vVOuK1Tp4wJ\nc5b8gOnADiDn7ZmLFjUWmFjZ50Y4mMuH7oXwCr//DuPGWcajo41heyHc1J0pJUcwFkEZ139DhkC+\nfP5AlRy/R6FC0LCh8atwPin0QuRUemvmg4KMCXjSEUTY0dWrxv5I9rB1q9HMBnYD9YFWwDSef96P\nkSPt8x7C9aTQC5FTM2YYvezNDRoEFSs6Px/hlQ4fhg4dYMcOe5/5J6ABcAmYSUhIPsaPt6Gjo/AY\nco9eiJw4edL6Dc4KFeBNi80Yhci2Xr0cUeS3Y3QVv70cNC8DBrSTKSVeRgq9EDnRty9cvmwZnzYN\ngoOdn4/wSnFxsGaNvc/6HcaV/BXT4wJUrbqR4cOtbKMsPJoUeiGya/VqmD/fMt65szEJTwg72b0b\nkpLsecZYjN3Br5keFyIkZBNz51aRKSVeSO7RC5EdN24Y+26aK1hQ1swLu/vpJ8tYcHD2esXHx6/l\n6tUY4CYAShWlatVYJky4T6aUeCkp9EJkx4gRxuwoc++/bzTIEcKOdu60jI0Ykd769/QtX76ctm3b\nAvEAREZGsnHjRipUqJDzJIXbkqF7IbLqt9+Mgm6uTh147jnn5yO8nrUr+keyeCs9Li6Onj17Eh9v\nFPmSJUvy3XffSZH3AVLohciK22vmzW+YBgfDlCmyZl7Y3cWLsH9/2phSULVq1s4TGhrKqlWryJ8/\nP2XKlGHLli2UKVPGfokKtyVD90JkxbRp1tc4DR5sLKkTws527bKMVawIERFZP9cDDzxAbGwshQoV\nIjIyMufJCY8ghV4IW504YTbIkZMAACAASURBVH1t/D33WN9/Xgg7sDZsX62aba+9cOEC+fPnTxN7\n6KGH7JCV8CQydC+ErV57Da5csYzLmnnhQNYm4tlyf37q1KmUL1+e3bt32z8p4VGk0Athi5UrYcEC\ny/iLL0LNms7PR/iM7EzEmzhxIj179uTChQvUr1+fvXv3OiY54RGk0AuRmevX4ZVXLOOFCsFY6Qku\nHOf4ceOOUWqBgfDgg+m/Zty4cfTp0yflcdmyZbnrrrsclKHwBFLohcjM8OFw5IhlfMIEMLv/KYQ9\nWbuaf+ABCAmxfvyoUaMYkGq+yBNPPMH69evJly+fgzIUnkAKvRAZOX4cPvzQMl6/PnTs6Px8hE+x\ndn/e2kQ8rTVDhw5lyJAhKbGaNWuydu1a8uTJ48AMhSeQWfdCZOTbby33mQ8JkTXzwilsuT+vtWbg\nwIGMGTMmJVa3bl2WLl1KeHi4gzMUnkAKvRAZWbLEMvbaa1C2rPNzET4lOTnzQq+15n//+x8fphp1\natSoEYsWLSI0NNQJWQpPIEP3QqTn/Hn47jvLeIcOzs9F+JwDByx3QA4Ph3vvvfN48eLFaYp8ixYt\nWLJkiRR5kYYUeiHSs3KlZavb0qWhcmXX5CN8irX781Wrgr//ncetWrXiFdOKkDZt2rBgwQKCpaeD\nMCND90KkZ/Fiy1hMjNybF05hS0c8pRQTJ06kSpUqPP/88wQEyI90YUm+K4Sw5sYNWLvWMh4T4/xc\nhE+ydkX/8MOJJCaSpqD7+fnx4osvOjEz4Wlk6F4Ia9avh7i4tLGCBeGJJ1yTj/ApW7da2zspgTlz\n2tOlSxeSzG8pCZEBuaIXwhprs+2bNwcZGhUOFh9v7ISc1i1CQp5izZplAAQGBjJjxgz8/ORaTWRO\nfmoJYS4xEZYvt4zLsL1wgvHjIW1r+jigDTdvrk6J5MmTByVzRYSN5OOgEOa2bjWW1qUWFmZ0wxPC\ngQ4cgJEjU0duAC2AO0X+jTfe4IMPPpBCL2wmV/RCmLM2bN+oEcjaZGFnyclw6NCd5ou9esGtW7ef\nvQY0A7akHD906FCGDx8uRV5kiRR6IVLT2nqhl2F7YWdbtkC7dnD2rLVnLwNNgB9SIqNGjWLw4MFO\nyk54Eyn0QqQWG2u5U52/PzRt6pp8hFc6cQJatIArV6w9exFoCNxZSD9u3Dj69+/vpOyEt5FCL8Rt\nN2/Cyy9bxmvVku1ohV316ZNekQd4jtRF/qOPPuLVV191RlrCS8lkPCFuGz0a9u+3jL/wgvNzEV5r\nxQpYuDCjI8YDhQGYOnWqFHmRY3JFLwTAX38Zhd5c9eqy77ywm2vXwNSaPo2wMChe/Pbv7+Hhh2N5\n8sndvPDCc85NUHglKfRCJCcbHUoSEtLGAwNh2jSQpiTCToYPh6NHzaPJTJ/uxzPPpI7db/oSIuek\n0Avx+efw/feW8QEDoFIlp6cjPNOFC/DJJ/D778biDXNaw9Kl5tEj5M4dQ5kyU4DHnJCl8EVKW/uO\n9HBRUVF6165drk5DeIIzZ+Cee+DixbTxsmWNn9iydl7Y4Pp1ePxx41vGdv8CtYGjREREEBsbS9Wq\nVR2ToPB6SqmftdZR1p6TMUnh2/r1syzyAFOmSJEXNhs+PKtFfh9QEzDG8ePi4jh9+rT9ExMCGboX\nvmzDBvjyS8v4M89Iu1ths19/hQkTsvKKvUBd4BQAISEhLFmyhIYNGzogOyGk0AtfFRcHPXpYxvPl\ngw8+cH4+wiMlJRnzOG3fNfY3oB5gtMMLCwtj+fLl1KlTx0EZCiGFXviqd96Bgwct4+PGQeHCzs9H\neKSpU2HnTst4z55gXrv//fcX3n67PteuXQAgV65crFq1iho1ajghU+HLZDKe8D1//glVqlgup6tR\nAzZvluV0wiYnThjzOK9eTRu/915jOD8o6E5s586dNGzYkEuXLgHGNrNr1qzh8ccfd2LGwptlNBlP\nruiFb5E188JOXn3VssgDTJ+etsgfOXKEevXqcdV0cN68eVm/fj1RUVZ/Jgthd/JTTfiW1ath2zbL\n+JtvGpdiQthg+XL49lvLeNeu8OSTaWMlSpSge/fuABQoUIBNmzZJkRdOJVf0wrd8841lrFw5GDTI\n+bkIj3TtmrFvvLnCheG99yzjSinGjh1LaGgoTz31FPffLx3vhHNJoRe+IzHRuBQzN2YMhIQ4Px/h\nkYYNs9bG1lhil94mh0opRo4c6djEhEiHDN0L3/H995bNccLDoXFj1+QjPM7u3fDhh5bxBg2gQwfj\n98uXL+fZZ58lMTHRuckJkQ65ohe+Y8kSy1ijRtIBT9gkKQm6dTPmc6YWEgKTJ4NSsGjRIp5++mkS\nExPRWjNnzhz8/f1dk7AQJnJFL3yD1tYLfUyM83MRHmnSJLC2anfIEGNrhHnz5vHUU0+lXMn/+OOP\nnD9/3slZCmFJCr3wDbt3W95Y9feHpk1dk4/wKMeOweDBlvFKlaB/f5gzZw7PPPMMSaYWeRUrVmTL\nli0UluZLwg1IoRe+wdrVfHS00fJWiEy8+qox297ctGnwxRef0rlzZ5JNY/r33XcfmzdvJjIy0slZ\nCmGdFHrhG6wV+latnJ+H8DhLl8LixZbx7t1hz57JvPTSS9zuMPrAAw+wefNmihYt6uQshUifFHrh\n/Q4etL6HaIsWzs9FeJSrV62vmS9SBEqU+JBXXnklJfbwww+zceNGChUq5MQMhcicFHrh/axdzUdF\nwd13Oz8X4VGGDjXuz5tr2nQKgwf3TXn86KOPEhsbS4ECBZyYnRC2keV1wvNoDQcOwJkzth0/f75l\nTGbbCyv+++/OnM0TJ2DiRMtjGjaEgQMbsHZtJMePH6d69eqsWrWKPHnyODdZIWwkhV54lkuXoHlz\n2Lo1Z+eRQi9SSUiATp1g3ryMjwsNhSlToHTpssTGxjJs2DBmzpxJrly5nJOoENkghV54lpdfznmR\nL18e7rvPPvkIr/Duu5kXeYDhw6F0aeP3FStWZJ4tLxLCxbJV6JVS9wE1gRJAQSAOOAP8Cnyntbay\neaMQObR2LXz9dc7PExNjtDETAvjnH6PQp08DAylTpgl9+9Z0UlZC2I/NhV4pVRzoBnQB7rodNjtM\nA0lKqQ3AFGCFvr3uRIicuHEDevbM+XmCgox1UUJgTPfo0QPi49M7IhnoBUzh1KlJ7Nq1jscff9x5\nCQphB5kWeqVUfmA40B0IBA4DXwE/AaeAC0AoUAC4B3gciAYaAv8opfpprVfbP3XhU95+Gw4dsow/\n/LDtO88VLw59+xr9SoUAZs+GzZst4/feC3nzJnHwYHfOnPkUgBs3rjFz5kwp9MLj2HJFfwAIBmYC\ns7XWOzN7gVIqD9AeYwRghVKqr9bayvxVIWzw++8wfrxlvHZtiI2VYXiRLefOGe1rzZUuDTt2JNKr\nVxe2b/8iJd6xY0emTZvmxAyFsA9bCv0XwLta69O2nlRrfQWYDkxXSsUAstm3sI3WMGcOLFxodCsB\no+GN+ZafQUEwdaoUeZFt/fuDtT1nPv44ge7dO6WZaNe5c2dmzpwpO9EJj5Rpodda98nJG2itrXQr\nESIdM2cae4FmZvBgqFDB8fkIr7RxozFsb+6pp+L57LMOLFq0KCXWrVs3pkyZgp+f9BcTninL37lK\nqRKmofmMjsmtlCqR/bSEz/rgg8yPqVgR3njD8bkIr3TzpjEBz1xExC0uXWqbpsj36tWLqVOnSpEX\nHi07372HgMyu8l81HSeE7f7+2/jKzLRpEBzs+HyEVxo9GvbvN48mUrx4DOvWLU+J9OvXj4kTJ6Lk\n9pDwcNkp9ArLZXVC5Jy1nvSpKWVc8deq5Zx8hNf5+2+j0JurXj2AVq0eSXk8aNAgxo0bJ0VeeAVH\ndcYrClx30LmFt7JW6Hv3htatjSJfuTLkz+/8vIRXSE42WigkJKSNBwYag0T33TeC+PhbhIWFMXTo\nUCnywmvYVOiVUp3MQg9ZiQH4Y3TLexawsi+oEOk4cQJ+/NEy3quXTLoT2ZKcDL/9ZiyjA9i+Hb77\nzvK4AQOgUiUAxXvvvScFXngdW6/oP8foeofp15amL3O3/4fcAEbkKDPhW5Yts4zde68UeZEtZ89C\ngwbw66/pHXEBGEuZMiMZPDgoJSpFXngjWwv9C6ZfFfAZsARYauW4JOA8sF1rfcmWEyulGgEfYYwG\nzNRav2flmKcwuvNpYI/WuqONeQtPYW3YXnaYE9mgNXTpklGRPwfUB36lWLH9BATMw2j6KYR3sqnQ\na61TVpwqpZ4Hlmit5+T0zZVS/sAkjP91x4CflFLLtNZ7Ux1THhgIVNdaX1RKFc7p+wo3c/mysbDZ\nnBR6kQ2LF8OKFek9exqoB/wBwLZti9myZQv16tVzUnZCOF+WJ+NprWvb8f2rAQe01v8CKKXmYdwS\n2JvqmK7AJK31RdP7n7Hj+wt3sHq15QypYsUgKso1+QiPdfmyMX/TuhNAXcBYwqmU4rPPPpMiL7xe\ntmfdK6XCgNZAFSAvcBn4BVistbZ1xn0k8F+qx8eAR82OqWB6v20Yw/vDtdZrspu3cEPpDdtLkxKR\nRW+9ZczrNPfII//xxx91iIs7AICfnx9z5szhmWeecXKGQjhfdvejbwLMBvKTdk29BiYopV7QWqc7\neJZFAUB5jB3xigPfKaUqm88BUEp1w9hEhxIlpCmfx7h1C1atsozLsL3Iop07YdIky3jt2oc5fLgO\ncXFGD6+AgAC++uor2rVr5+QMhXCN7LTAfRhYhHEVPxdjf/rGpl/nmuILlVJVbTjdceDuVI+Lm2Kp\nHQOWaa0TtNaHgH0YhT8NrfV0rXWU1jqqUKFCWfxTCZfZuPHO5jW3RURIUxyRJYmJxhYJWqeNh4Qc\n5J9/anHItMVxYGAgCxculCIvfEp2xkYHY1y519Bad9Jaf661Xmv6tRPwpOn5QTac6yegvFKqtFIq\nCGNrW/N1VkswruZRShXEGMr/Nxt5C3e0YIFlrGlTY3c6IWw0bRrs2WMe3UdQUE1OnDgKQHBwMEuW\nLKFlS2srg4XwXtkp9DWABVrrHdae1Fr/CCw0HZchrXUi0AtYC/wFfKO1/lMpNVIp1cJ02FrgvFJq\nL7AJeF1rbWVzSeFxdu82tqQ1J8P2IotmzLCM3XtvCPnzGx8YQ0JCWLZsGU2aNHFyZkK4Xnbu0UeQ\ndgKdNUeBDHe4u01rvQpYZRYbmur3Gvif6Ut4i6QkY6w1KSltPFcuaNTINTkJj3TokLWrefj88xIU\nKrSRJk2aMHnyZGrXtueCISE8R3YK/QmMZXEZiQJOZuPcwldMngy7dlnG33wTcud2fj7CY1lbtPHw\nw1CtGkBpfv/9dwICHLWthxDuLztD96uAOkqpN00Nb1IopfyUUv0wOlJYmUotBHDsGAwebBmvVAle\nf935+QiPdqfQ7wQ2AGnv/kiRF74uO/8D3gZigHeA7kqp7zGu3otiTMQrBZwCRtkpR+HpLl821j7F\nxRmPp061nGkPxowqmYQnsuDsWdi6FWAbxuKfRGAVMTHRrkxLCLeSnc54p5RS1YFpGK1rS5odsh7o\nobWWoXsBmzdD8+Zw7VrGx3XrBtWrOyUl4T1WrIDk5M1AM27vjB0Q8AIVKvwDyIdGISCbDXO01oeB\nhkqpSIzOeBEYnfF2a63N18ELX3X+PLRrl3mRL1wY3rPYy0iITE2fvgFoAZhGiyjCM88sJzhYirwQ\nt+Xo5pWpqEthF9YNGHBnM/CMfPgh5Mvn+HyEV/n221Xs2NEauGWK3AVs5KWX7nFhVkK4n+x0xvtG\nKdVYKSWNyEX6tmyBzz7L/Lg2baB9e8fnI7zK0qVLad8+hjtF/m7gOwoVuofHH3dhYkK4oexc0bcF\n2gBnlFJfArO11n/YNy3h0W7dgu7dLeO5c0N0tPH7wEDjnnzv3qCU5bFCpGPhwoV06NCBxMREU6QU\nRi+tUrRsCf7+6b9WCF+UnUL/GNAZeBroB/xPKbUbY5Obr7XWNozVCq82Zgz884/1eM+ezs9HeI2v\nvvqKTp06kZTSaKksRpE3tsyQpopCWMrOrPudwE6l1GsYs2CeBxoCHwHjlVIrgTnAClOLW+HNrlyB\nQYPghx/u7Clvrcg/9pj1q3whbJCUZHxOnDr1SKoiXxHYCBQDIDwc6tZ1VYZCuK9sT8bTWsdj9LRf\nqJQqBDyLUfRjgJbAeaCwPZIUbiopCRo3Nop8RgICYPp02V9eZNv//gcTJwIMxLgvvxCIBYqkHNO4\nMYSEuCQ9IdyaXX7yaq3Paq0nYCy164/RtaKAPc4t3NikSZkXeYB+/aByZcfnI7zSDz/cLvK3DQN+\nJHWRBxm2FyI9din0SqmKSql3gSPAOCAQOGCPcws3lV4bW3OlS8PQoZkfJ4QV8+d/S9euN82iCghP\nE8mTx9jdWAhhKduFXimVVynVUym1A9gLvImxY92nGHvVV7RTjsId9e6deSOckiVh+XIIC3NOTsKr\nvPfee7Rv35a9e9sC8ekeFx4On34KefM6LzchPEmW79ErpZoDnTB6TgYBGmMnidnAIq21+cdv4W2W\nLLG+ZVjnzsbNVIDQUChbVpbOiSzTWvP2228zbNgwU2QlxtYaIwCIijIKu1LGtI9y5SA42FXZCuH+\nsjMZb6np130YxX2OtL31IVevGlfz5ooUgQkT5LJK5IjWmrfeeot33303VbQ2MAAwCvv06fDAAy5J\nTwiPlJ1CPw2jSc4OeycjPMDQocb9eXMffSRFXuSI1poBAwYwfvz4VNEGwGLAuP3z2mtQpYorshPC\ncymttatzsLuoqCi9a9cuV6fhfc6dg2LF7qyXv61RI1i1SobpRbZprXnttdeYmGZ6fVOMZXTGmrkS\nJeDPPyFXLldkKIR7U0r9rLWOsvZcjja1ET5mxQrLIh8aCpMnS5EX2ZacnMzLL7/MtGnTUkVbAfNI\nvdXspElS5IXIjkwLvVJqI8aEu+e11sdMj22htdbSp8qbWJuA162bsYROiGxISkqia9euzJo1K1X0\nKeBLjFW6htatoVkzZ2cnhHew5Yo+GqPQh6V6bAvvuyfgy27cgHXrLONt2jg/F+E1Ll68yPfff5/y\nOCLiWS5fnkXqH025c5s3zBFCZEWmhV5r7ZfRY+Ej1q2DuLi0sYIF4YknXJOP8AoFCxZk48aN1KpV\ni3z5ovnllxlA2u3n3n0XIiNdk58Q3kDu0QvbWBu2b9FC9gQVNjl3Dl5/HbZvt5zmAXeTnLyDPXsK\nYt7Dq1o12fBQiJySQi8yl5hodLgzJ83FhQ3i46FBA9i9G+AmsBt43Owoy/2v/P1h2jT5LClETuWk\nBe4zSqlYpdQFpVSi6dcNSqln7JmgcANbt8KFC2ljYWFQr55r8hEe5f33bxf5GxgbW0YDazN9Xd++\n8NBDDk1NCJ+Q5UKvlApUSi3F2HO+NpAbOGv6tQ4wRym1VCkVmMFphCdZvNgy1qiRsbROiAwcPAgj\nRwJcx+iavQ6jb30McDDd15UsCcOHOyFBIXxAdq7oBwLNMfaJrA2EaK3vwuhqUQfYifE/+g17JSlc\nSGvr9+dl2F5kQmvj/vrNm1eBRsCmVM8OAspafV25ckb/pfBwq08LIbIoO/foO2FsQRuttU7ZUkpr\nnQRsVkpFA38AnYFRdshRuNKvv8LRo2lj/v6yqFlk6uuvYf36S0Bj4E7H7Mcff48vvrB+HRASYjRf\nlP5LQthPdgp9ceDj1EU+Na31LdPQ/is5yky4htZGcd+zx/j9Wiv3UqOjIV8+p6cmPMeFC9CnzwWM\nXvU/p8Tz5v2Adev6Soc7IZwoO4X+BKlbVlkXaDpOeBKtjRlQH32U8XEybC8y8eqrZzl3rj6wJ1X0\nE7744hUp8kI4WXbu0X8FtFVK5bH2pFIqL9AWmJuTxIQLLFqUeZEHaNnS8bkIj7VkySnmzq3NnSKv\ngOm0bfuK3PERwgWyU+hHAruAnUqpjkqp4qaZ+MVNS+t2YEzIe9ueiQoHu3zZ+j7z5qpWhbvvdnw+\nwiPFx8PLL68B/jRF/IBZ5M7d1abPkEII+8vO0P3tPqgK+MLK8wooD9xUaWfUaK21NOhxV4MHw8mT\nmR/31luOz0V4rLFj4eTJzsAZjJn1XwAdePddY5KdEML5slN4v0c2rPEuP/5obDVr7sEHoUoV4/d5\n8kCrVsZEPCGs2L8fRqWssxkAtADu4dFHpY2tEK6U5UKvtY52QB7CVRISjK1mtdlnt/BwWLYMSpRw\nTV7CYxw+fJhChQrTs2cYt26lfuYeaWMrhBuQneh8zaZN8OSTxjjq7a/ffrM8buRIKfIiXXv2QOPG\nULjw35QtW518+VoSG3vT4rj//c8YGBJCuI7cM/clx49D06aW282aq1IFXn3VOTkJj3P8ONSuDRcv\n/gHUA06TnHwCeBZYmHJcyZIwbJiLkhRCpMj0il4p1V8pFZLdN1BKVVFKNc7u64Udffll5kXezw+m\nT4cA+QworOvTBy5e3IPRAfu0KRoO9Epz3JQp0sZWCHdgy9D9O8BBpdQbSimb5s0qQ0Ol1GKMpXgy\neOcOrG1OY65XL4iKcnwuwiMtXw7ffvszRpE/Z4rmxtiNLjrluKeeMob2hRCuZ8tlW2XgA2A0MEop\n9QOwFaOAnwQuYmxoUwC4B3gMqAsUBc5jfMyfZvfMRdacOGHMrk9PYCC0bw/jxjkvJ+FRrl2Dl17a\ngbFBzWVTNAKjyD8KGANCTZvCp5+6JkchhKVMC73Weh/QTCn1BEb/+jZADawvsbu9cP4fYAwwS2t9\n1U65ipxYtswyds89EBtr/D5vXmOPeSHS8dJLWzlzpglw+790fmAdM2ZUpUkTI5I7t/ElhHAfNt+I\n1Vr/APyglOoB1ASeBEpgXMnHYXTI+A3YrLX+M90TCdewttVs69bSxURYlZAAW7bAP/8Yj3/7bTPz\n5zfD2FceoCCwgQYNHuTFF2W3OSHcWXbW0V8FVpq+hCe4fBk2brSMy+Y0wopbt4xvjTVrbkeSgde5\nU+SLALGEhFRi8mQp8kK4O1lH7wtWrTIu0VKLjDT61gthZuzY1EUejB8Ty4AKQDFgC1CJoUOhbFkX\nJCiEyBKbruiVUp2AX7XWVjqrCLdnbdi+ZUtj5pQQqezbB++8Y+2Zu4CNGHfpylGpEvTr59TUhBDZ\nZOtP+s+BNOO8SqnnlVJWxoOFW7l1y7iiNyfD9sKM1tCjB6Y2ttY2OIoEypE7N3z+OQQFOTU9IUQ2\n5aQrSimglp3yEI6ycaOxLiq1iAjZnEZY+OILo0MyfAl0Bb4BmlOvHlSoYBxTpIixRv6ee1yWphAi\ni6T9mbez1iSnWTNj3bwQJufOGX3pYRbwIsbq2bYUKbKapUvryMpLITyY3KT1ZklJsHSpZVyG7YWZ\nAQPg/PlpQBfutMioyIQJ90uRF8LDSaH3Zps2wZkzaWPBwdCokWvyEW5p82aYNetjoEeq6EPExGyk\nQ4fCLspKCGEvWSn01jrhCXcVH2/sPmKufn3Ilcv5+Qi3dOsWtGv3PpB6t8JHyJNnI1OmFHRVWkII\nO8rKPfrhSqnh5kGlVFI6x2uttcwBcJXx42HvXst4+/bOz0W4rSZN3uXcucGpIo8Dqxk7NoKiRV2V\nlRDCnrJSiLPa/0r6ZbnKgQPw9tuW8agoKfQCAK01vXuPYOPGEamiNYEVPPFEbrp2dVVmQgh7s6nQ\na63lXr6n0Bpefhlu3kwb9/c39pn393dNXsKt7Nz5E5MmpS7ydYBlBASEM22a9FISwpvI0Lqnu3wZ\nVq+G//4zHh89CuvXWx732mtQpYpzcxNu4+JFo63tsWPG40OHqgETMe7NNwIWAaG8/jrcf7/L0hRC\nOIAUek92+rTR+ObvvzM+rkQJGD7cGRkJN3TyJNSqBfv3mz/TGygONAGCKVMG3nrL6ekJIRwsy4Ve\nKfUg0BGoBhTCmI1/FvgR+Epr/btdMxTp69078yIPMGmSzLT3YS+/DPv3J2P0qQ83e7ZVyu+mTEHW\nzAvhhWwu9Eopf+BjoBvGRDvzyXa1gNeVUpOBPlprWY7nSCtXwoIFmR/Xtq3RCU/4pCVLYMmSJIxu\ndweBNVgWe+jYERo0cHJyQginyMoV/XiMjhrxGE2wNwPHMQp+MYzZPG2BV4CbwAB7JipSuX4dXnkl\n8+PKlIGJEx2fj3BLV69Cr16JwPPAV6Zoc2AlEJpyXPnyMGGC8/MTQjiHrdvUlsO4oXcEaKS1/sfK\nYZ8ppUZhXDL0VUpN1Vr/a79URYrhw+HIEct4p05Q2NTJrFQpaNfuzmPhcwYNSuD48Y7AwlTRMnTq\nFJTybVG2rDHoU1B64wjhtWy9on8O48q9czpFHgCt9d9KqeeBTcCzwMicpyjS+PVX65df9eoZe4cq\naV8g4IcfbvHJJ08Dqfc66Enjxp/w+ed+8m0ihA+xdbXsE8BfWustmR1oOmYv8GROEhNWJCVBt27G\nr6kFBxszqeSntwCuXbtJ48atSVvk+xASMonJk6XIC+FrbC3092DMqrfVj6bXCHuaPx9++skyPmQI\nlCvn/HyE27lx4wbVqjXnypVVqaIDgAmMHKkoVcpFiQkhXMbWQp8XOJPpUXecBvJlPR2RoSVLLGP3\n3Qevv+78XITbuXbtGk2bNuWvvzakig4B3uPBBxWvveaqzIQQrmTrPfpwjEW4troFyIpce7N2NT9u\nHAQFOT8X4Xb69evH5s2bU0XeBowOOJ98AoGBrshKCOFq0tHaU5w9C4cPp435+Rktz4QARo0aRWRk\nJdOjsdwu8qVLQ/XqLktLCOFiWVlHH6OUKmXjsdJU3d6sXc1XqgThls1PhG8qVKgQ998fy/Hjq4HO\nKfGYGJmnKYQvy0qhf8j0ZSvpjGdP1gr9I484Pw/hNhITEwkIuPNf+MYN+O67IqQu8gCtWiGE8GG2\nFvoXHJqFyNzOnZYxKfQ+69SpUzRs2JC33nqLdu3aAbBuHcSZzaQpWBCeeMIFCQoh3Iat+9HPdnQi\nIgNaW7+ir1bN+bkIu62I4AAAIABJREFUlzt+/Dh16tRh3759dOzYkcDAQGJiYqwuymjRAvz9nZ+j\nEMJ9uHwynlKqkVLqH6XUAaXUmxkc10YppZVSUc7Mzy0cOWJMxkstOBgqV3ZNPsJljh49Sq1atdi3\nbx8AWmtu3rxJYiIsX255fEyMkxMUQrgdmwu9UuplpdRApVS6i3SUUkGmY3raeE5/YBLQGLgP6KCU\nus/KcbmBPmStaY/3sHY1X6WKrJfyMYcOHaJWrVocPHgQgICAAObPn0/79u3ZuhUuXEh7fFiY0RlZ\nCOHbbCr0SqknMLaoDdZaJ6R3nNY6HggCPlFKPWrDqasBB7TW/5peOw9oaeW4t4ExGLvi+R6ZiOfz\n9u/fT82aNTlsWmIZFBTEokWLaNOmDQCLF1u+plEjCA21jAshfIutV/TPA9cwtqrNzHjgKtDFhmMj\ngf9SPT5miqVQSj0M3K21Xmlbql7I2kQ8uT/vM/766y9q1arFsWPHAAgODmbp0qU0b94cMKZwWLs/\nL8P2QgiwfdZ9DSBWa30tswO11teVUrGm1+SIUsoP+ADz9ULWj+0GdAMoUaJETt/afSQlwc8/W8bl\nit4n/PHHH9StW5czZ4wO1KGhoSxfvpy6deumHPPrr3D0aNrX+ftD06bOzFQI4a5svaIvAezPwnkP\nmF6TmePA3akeFzfFbssN3A9sVkodBh4DllmbkKe1nq61jtJaRxUqVCgLqbq5v/+Ga2afryIioHx5\n1+QjnOby5ctpinx4eDirV69OU+QBvvjC8rW1akH+/M7IUgjh7mwt9P5krQGOtvHcPwHllVKllVJB\nQHtgWcpJtL6stS6otS6ltS4F7ABaaK13ZSEXz2bt/nxUlNH+Vni1iIgIRo0aBUDu3LlZt24dtcxa\nHv/+O3z8seVrpUmOEOI2W4fuzwJls3DessC5zA7SWicqpXoBazE+THymtf5TKTUS2KW1XpbxGXyA\nrJ/3aV27dgXgwQcfpJrZv3tyMnTvDomJaV8TFASmOXpCCGFzof8JqK+UitBaX87oQKVUBFAf2JDR\ncbdprVcBq8xiQ9M5NtqmbL2JdMTzKVprlFlj+tvF3tyMGbB9u2V80CC46y5HZCeE8ES2jv9+DeTB\nWPOemU8w7q1/nd2khMmtW7Bnj2VcCr1Xio2NpW7duly5ciXTY0+dgjfesIxXqABvptt2Sgjhi2y9\nov8W+AGjoc3dGOvavzOtfcd0f70mxr6YNYBtWutvHZCvb9mzBxLM2hbcdRdERlo/XnistWvXEhMT\nw82bN2ncuDFr164lV65cKc/v2WN0vrs9L3PbNrhsZWxt2jSjaaIQQtxma697rZRqg3EvvYbp10Sl\n1HnTIQVM51LAHqCtA3L1Pek1ypE9R73KihUraNOmDfHx8QAcOXKEM2fOpBT69euhSRPLe/Hm/t/e\nnUZHVWV/H//uJAQEZR5UEBDaWVEgotAqEGSQVnFARUENoiIOrUC3aDtrO/CgIi5BJlGwxRZUEFAU\nBYR2FqEFh8Y/CMoggsyIDEnO8+IWoZKqJFWpKVX5fdbKSmrfUzebS2DnnHvuOX37QocOMU5WRJJO\nyFO3nXO/Am2B+/AWuakEHO77qOSL3Qu0c85tjH6qFcz+/d5N2KI0ES+lTJs2jUsuuaSgyDdu3JgF\nCxbQrFmzgjYPPFB6ka9bF4YNi2WmIpKswtmPHufcH8CjwKNm1gg4MOXnF+fc2mgnV6GNGBH8/vxZ\nZ8U/F4mJ1157jd69e5OXlwdAs2bNmDdvHk2aNClos3598Al3RT31FNSpE6tMRSSZhVXo/fkKu4p7\nLKxe7XXjimrZEs6OeMFBKQdefvllcnJyyM/PB+DYY49l7ty5NGrUqFC7GSE8YHr11d6HiEgwIRV6\nMzsHWO2c+7nUxl77FsBpzrlJkSRXITkHt9wCu3cXjqelwdixWignBUyYMIHrr78e57w1qE488UQ+\n+OADjgjyTFywzWq6d/d+30tLg1NPhc6dNW1DRIoXao9+PvAQ8PCBgJkNAe50zgUbMLwYuB9QoQ9m\n3z7v0blgZs2Cd94JjN96q7ciniS1119/nX79+hW8PuWUU/jggw+oX79+QNtt22DevMBzPPignrAU\nkdCFWuiD9ReqADWjmEvq27IFcnJg9uzSZ1f5a9gQHnkkZmlJ/HTq1ImWLVuyZMkSWrVqxZw5c6hT\nzM31YD8mDRtC69ZxSFREUobGgePpnnu8h6HDKfLgLWZevXpscpK4qlWrFnPmzCEnJ4e5c+cWW+Sh\n+K1ndfdGRMJR5sl4Eibn4M03w39fjx7aoSTF1K1blxdffLHENnv3Br+Doz3mRSRc6hvEy5o1sDHM\n5QWqVw++NZkkBeccDzzwAJMmhT9VZe7c4LsTF9m8TkSkVOrRx0uwzWnS0+GQQwLjZt506uHD4aij\nYp+bRJ1zjrvvvpuhQ4eSlpZGZmYmvXr1Cvn9wYbtzz8fKlWKYpIiUiGE06MPZz96KSrYcrY33ww7\ndwZ+7NgB//mPZtknKeccgwYNYujQoQDk5+czefLkgsfpSpOXB2+9FRjXsL2IlEU4PfoHzezBokEz\ny4teOilM281WCPn5+dx2222MGjWqIHbhhRcyZcqUgO1ni/P554F3eSpXhm7dopmpiFQU4RT6cJfk\n0AjAAXl58NVXgXEV+pSSn59P//79GT9+fEHs0ksvZfLkyWRmZoZ8nmCL5HTuDH6b2YmIhCzU3es0\naS8Sy5d7Q/L+qlf3Ng+XlJCXl8d1111XaOLdlVdeyaRJk8jICP336bVrva1mi9KwvYiUlQp4PAS7\nP5+VpQeiU0Rubi5XX311oSJ/7bXX8vLLL4dV5AFuvz3wd8L0dLjggmhkKiIVkSpNPAQr9NpuNiU4\n5+jTpw+vvvpqQez6669nwoQJpKenh3WuGTOCL7XQrx8EWSFXRCQkKvTxoIl4KcvMOP/88wsm2t18\n882MGTOGtDBHa3bt8rYzKKp+fXjiiWhkKiIVlZ6jj7W9e4PvK68efcro06cP+/fv55tvvuHJJ58M\neXa9v/vv99ZUKuqZZ6BWrSgkKSIVlgp9rC1d6u1W5+/ww73dSSRl9O3bt8zvXbwYRowIjHfpAmGs\nsSMiEpSG7mMt2P3500/XBuJJateuXdxyyy1s2bIlKufLy4P+/SE/v3C8ShUYNUo/JiISORX6WAt2\nf17D9klpx44ddOvWjVGjRtG1a1e2b98e8Tmfew4WLQqMP/AANG8e8elFRFToY664Hr0kla1bt9K5\nc2c+/vhjABYtWsSsWbMiOueaNXDvvYHxk06CwYMjOrWISAHdo4+lnTvh++8D4yr0SWXz5s107tyZ\nJUuWFMRGjBhB7969IzrvX/8auEMdwNix2rxGRKJHhT6WvvrK24feX/PmULt2YvKRsG3cuJFzzz2X\nZcuWFcRGjx5N//79Izrv9OnBd6i76SZo1y6iU4uIFKJCH0satk9qv/zyC506deJ736iMmTF+/Hiu\nu+66Qu1yc2HiRG86Rl6IWzy9/XZgrEEDePzxSLMWESlMhT6WNBEvaa1bt47s7Gx++OEHANLS0pg4\ncSJ9+vQp1M45uOoqmDo18u85YgTUrBn5eURE/KnQx5J69ElpzZo1dOjQgR9//BGA9PR0XnnlFa64\n4oqAtq+8Ep0if955cPnlkZ9HRKQozbqPlY0b4aefCsfS06Fly8TkIyGrUaMG9erVA6BSpUpMnTo1\naJHfvBkGDoz8+x1yCIwcqWfmRSQ2VOhjJVhv/qSToFq1+OciYalevTrvvvsu7dq148033+Tiiy8O\n2u7OO+G33yL7Xunp8MILcPTRkZ1HRKQ4GrqPFW1kk9Rq1qzJRx99VOy69QsWwIQJgfGuXaFnz9C+\nR+XKcNZZKvIiElsq9LGirWmTxrJly/jqq6/IyckpiOXmwtq1wYt8fr73GFxRNWrAiy/CEUfEKFER\nkTJQoY8F5zQRL0ksWbKEzp07s3nzZgBycnJ44QUYNAh27AjvXE88oSIvIuWPCn0srF4dePO2ShU4\n+eSEpCPBffHFF3Tt2pVt27YBMGjQIOrV68H114e/L+yZZ8KNN0Y7QxGRyKnQx0Kw3nzLllrXtBz5\n5JNP6NatGzt37gS8e/KzZs2hX7/wi3xGhrdsbZqmtopIOaT/mmJBC+WUawsXLqRLly4FRb5OnTrM\nnz+fuXOz+N//wj/fXXfBKadEOUkRkShRjz4WdH++3Jo7dy4XXHABf/zxBwD169dn7ty5ZGaezKOP\nBravXh1qFdPJr1XLW+TmrrtimLCISIRU6KMtL8/bzKYoFfqEe/fdd7n44ovZs2cPAEcccQTz5s3j\nuOOOp1Mn2Lu3cPv0dPjPf6BFiwQkKyISJSr00fb99/D774VjNWvCn/6UmHwqoO++g/Hj4ddfD8bW\nrp3JRx/1JD9/HwBVqzaidet5PPLIMWzfDvPnB55n8GAVeRFJfir00RZs2D4rSzO14mTlSu9y+0bm\n/ewFDmwt15Tdu+cxa1bxK9U0bQr33x+bHEVE4knVJ9o0ES+hnnkmWJEH6AlMAo4FFgAlL0c3apRW\nKxaR1KBCH22aiJcwzsH06SW1uApYCjQu8TxXXOHtJicikgpU6KNpzx74+uvAuAp9XCxeDGvXHng1\nBdgUpFXlEs9x6qnw3HNRTkxEJIF0jz6avv7aWyTd3xFHQMOGicmngjnYmx8J3Aq0oGXLeQweXCek\n9x95JLRt6y1iKCKSKlToo0n35xPKK/TDgUG+yFKqVLmT3r1fSFxSIiIJpqH7aNKOdQmzYgV8881Q\nDhZ5gDMZN+6pRKUkIlIuqNBHkwp9wtxyyyOA/xJ1Z9Gq1XucdFLNRKUkIlIuqNBHS36+160sqnXr\n+OdSgTjnuO+++5gzx/+h947AbHr2rJ6otEREyg3do4+WLVsCJ+IddljxC6VLxJxzDBkyhGHDhvlF\nOwPTgapcdFGCEhMRKUdU6KPFf73VAxo0iH8eFYRzjoEDBzJixAi/aHfgDaAKxx4Lxx+foORERMoR\nDd1Hiwp9XO3du5clS5b4RXoAbwLes3EXXQRmichMRKR8UY8+WjZuDIyp0JfJW2/BG2+Ab7v4YlTh\nsMNmUbt2V7Zta0R+/itApYKjGrYXEfGo0EeLevRRMXky9O4dauvDgPeAQ/D/UW7QAM44I/q5iYgk\nIw3dR4sKfVQ88URxR/YDs4PED6Po76s9emizQBGRA/TfYbSo0Eds5UpYtizYkX1AL7zJdqNLPc/l\nl0c3LxGRZKZCHy0q9BF7661g0b14W8y+6Xs9APik2HNccw1kZ0c9NRGRpKV79NESrNDXrx//PJJY\n4Bazf9Cw4cWsW/deQeSCCwaRk9M26Iz6Zs2gRQvNthcR8adCHy3q0Udk40b4+GP/yO/AhaxbN68g\ncvfdd/Poo49iquQiIiHT0H00OKdCH6GZM71VhD07gfOAg0X+wQcfVJEXESkD9eijYft22LevcOyQ\nQ+DQQxOTTxI6OGy/Ha/If1pw7LHHHuPuu+9OQFYiIslPhT4aiuvNq/cZkl274P33AbYCXYGDuwA+\n+eSTDB48OEGZiYgkPxX6aNCwfUTeew/27gVYDBxc1vaZZ57l9ttvS1RaIiIpQffoo0HL30bk4LB9\nJ+DfQCXOOmuMiryISBSoRx8N6tGX2f79MGuWf+RS4P+4664mCcpIRCS1qNBHg56hL5M1a9bw6acZ\nbNt2RKH4oYc2oVOnBCUlIpJiVOijQT36sK1evZrs7Gy2basCfAgc/MXovPOgSpVEZSYiklpU6KNB\nhT4sK1euJDs7m59//tkX6Q58wYEpI9piVkQkejQZLxpU6EO2fPlyzjnnHL8iXxl4mAM/ilWqQPfu\nicpORCT1JLzQm1k3M1tuZivM7K4gxweZ2XdmttTM5ppZ+ZulpUIfku+++4727duzfv16X6QKMAOv\nR+8ZNAhq1kxEdiIiqSmhhd7M0oGReEuhnQhcaWYnFmm2BMhyzrUAXgf+X3yzDIEKfamWLl1Khw4d\n+LXgWlUF3gG6FLRp3hzuvTcR2YmIpK5E9+jbACuccz865/bhPUTdw7+Bc26+c2637+VnQKM451iy\nXbtg9+7CscxMdUv9LF68mI4dO7Jp0yYAMjIOBd4DOhZqN3q0t3KwiIhET6ILfUNgjd/rtb5YcfoB\ns2OaUbiKe7ROy98C8Pnnn5Odnc2WLVsAqFatBrm57wNnFWrXpw+ce24CEhQRSXFJM+vezPoAWUD7\nYo7fCNwI0Lhx4/glpmH7Eg0bNozt27cDUKtWLapVe5/ff29dqE2tWvDUU4nITkQk9SW6R78OOMrv\ndSNfrBAzOxe4B7jQObc32Imcc2Odc1nOuax69erFJNmggi1/q8VyCkyaNIkOHTpQt25dLr10PmvX\ntg5oM2yYLpmISKwkukf/JXCMmR2NV+B7AVf5NzCzlsAYoJtzLkhVTTD16EtUtWpVZs6cyYcfruOS\nS44LOH722dC3bwISExGpIBLao3fO5QK34s3M+h6Y4pz71sweNrMLfc2GAYcCU83sv2Y2I0HpBqdC\nX8iPP/4YEKta9VCGDj2O/fsLxytVgjFjIC3R40oiIiks4f/FOufecc4d65xr7px71Be73zk3w/f1\nuc65Bs6503wfF5Z8xjhToS/w1ltvccIJJzB8+PBC8RdegI8+Cmx/111wwglxSk5EpIJKeKFPeir0\nAEydOpWePXuyb98+Bg0axEsvvQR4l+fOOwPbH3MM/OMf8c1RRKQiUqGPlAo9kydPplevXuTm5gLQ\nvHlzsrOzAW+lu23bAt8zerQ2rhERiQcV+khV8EI/ceJE+vTpQ35+PgDHHXccCxcupHHjxrz3Hkye\nHPiea64B3+8BIiISYyr0karAhX7cuHH07dsX5xwAJ510EgsWLODII49k924YMCDwPXXq6Jl5EZF4\nUqGPxJ49sGNH4VhamlfNUtzIkSO58cYbC4r8qaeeyvz582ng+yXnn/+EVasC3zdsGNStG89MRUQq\nNhX6SATrzderl/LPiz399NPceuutBa+zsrKYN28eBxYqWrbMK+hFdegAOTnxyVFERDypXZFirQIO\n269evZp/+E2XP/PMM/nggw+oXbs2APn50L8/+OblFcjM9CbgaQsAEZH4UqGPRLDlb1O80Ddt2pQ3\n33yTSpUqcfbZZzNnzhxq1KhRcHzcOPj008D3/eMfcFzgwngiIhJjiV4CN7lVwB49QPfu3Xn//ffJ\nysqiWrVqBfFffoEhQwLbH3ustziOiIjEn3r0kagAhd45V7DFrL/27dsXKvIAAweCb6O6QsaMgcqV\nY5WhiIiURIU+Eile6J1z3HHHHbRp04b169eX2Hb2bHjttcB4To43CU9ERBJDhT4SKVzo8/PzGTBg\nAM8++ywrV64kOzub3377LWjb33+Hm28OjNetC08+GeNERUSkRLpHH4kULfR5eXnccMMNvPjiiwWx\nU089tdCkO38PPwyrVwfGn3qqQiwpICJSrqnQRyIFC/2qVbl07tyXlSv/VRCrXbsPK1a8SNu2wX9c\n/vvfwFh2Nlx9dayyFBGRUKnQRyJYoa9fP/55RMmmTfs5+eQ+7N49xS/aly1bxrFlS3rI56lcGZ5/\nXs/Mi4iUB7pHX1a7dkGQ2ej4VodLNvv27eOMM64oUuT7A+OB0Is8wD33eI/UiYhI4qnQl9XixYGx\npk2hUqW4pxKpPXv20LHjJaxaNc0vehvwPOH+iBx/fPD950VEJDFU6Mvqyy8DY23axD+PKHjiiWF8\n8snbfpG/ASOA8MbemzaF6dP1zLyISHmie/Rl9cUXgbHTT49/HlGQkfF34GPgPeAfwD8B44EH4Pzz\nQztHtWreErcpvp+PiEjSUaEvqyTt0TsHy5fD9997r//4Ax57rAowDZgKXA0YrVvDffdBeni350VE\npJxRoS+LTZsCN1tPS4NWrRKTT4jy8+GGG2DChD1AlSJHDwGuAbw/ypgxKvIiIqlAA61lsWhRYOzE\nE+HQQ+OfSxjGjYMJE7YAZwGPFdvur3+F1q3jlpaIiMSQevRlEWzYvpzfn9+wAf7+901AZ+Br4Csg\nE2/i3UGNGnkr3YmISGpQoS+LJJyId9NNG9i581zgW1/EgJqF2mRkwPjxcNhh8c5ORERiRYU+XM4l\n3US8V15Zz1tvZQPLfZE0YAItWlxLs2ZepH596NMHzj47QUmKiEhMqNCH6+efYePGwrHMTDjllMTk\nU4offlhDTk42sMIXSQdepk6dK5k719thTkREUpcKfbiC9eZbtvSKfTmzatUq2rTJJjd3tS+SAbwK\n9OTpp1XkRUQqAs26D1eS3J9fsWIF7dq1Z/v21b5IJeB1oCcdO2pnORGRikI9+nAlwf353Nxczjvv\nPDZsWOOLVMZbEOc8MjNh9GjtLCciUlGoRx+OvDz46qvAeDnr0WdkZNC9+/N4Bf4QYBZwHqCd5URE\nKhr16MOxfDns3Fk4Vr16uauc69fDSy+dC0zHWwGvA+DtLDdkSAITExGRuFOhD0ewYfusrHKxk0t+\nfj5pvjzuuAN27ADoVqjNmDHaWU5EpKJJfIVKJuVwIt6+ffD8859x9NEtGTHiJx5/HKZODWx33XVw\nzjnxz09ERBJLPfpwlLOJeNu3w5///B++/bY7sIs77sgGFgCNCrWrVw+GDUtEhiIikmjq0Ydq3z74\n+uvAeAJ79FddNY9vv+0G7PJFdgBbA9o9/TTUrh3PzEREpLxQoQ/VunVesfdXt663C0wCPP30HN55\n5y/Abl+kAfAhUHiFvk6doHfv+OYmIiLlhwp9qNatC4w1aZKQB9KnT3+bv/3tAmCPL3Ik3pD9SYXa\n1avnTcDTM/MiIhWX7tGHKlihb9gw7mlMmzaNyy67Auf2+yKNgXlAczp39oo7QNOm3gS85s3jnqKI\niJQjKvShWr8+MHbkkXFNYcqUKVx11VXk5eX5IkcD84EmtGkDs2dDenpcUxIRkXJOhT5UCe7Rf/jh\nh1x55ZXk5+f7Isfg9eQbkZ7uDdGryIuISFG6Rx+qBBf6du3acdpp5/tenYD/Y3QDB8Jpp8UtFRER\nSSIq9KEKNnQfx0K/c2cmP/00Bbgdb3b9EYA3H/DBB+OWhoiIJBkN3YcqWI8+jvfohwyBzZsrA88U\nio8cCdWqxS0NERFJMurRh8K5uA/dP/XUU9x3330ALFwIL7wQ2Oayy+Avf4lZCiIikgLUow/F1q2w\nZ0/h2CGHQM2aMfl2jz32GPfccw8AaWmZTJlyX0Cb6tXhmWcCwiIiIoWo0IeiuEfryrASza+/woIF\nB3aXK8w5x8yZDzFz5kMFsbFjP2DDhiFAZqG2TzwR96f7REQkCanQhyJKw/affQZduwYv8uCAe4DH\n/WLZbNgwg6JF/owzoH//sL+9iIhUQCr0oYhCof/9d+jVq6Qi/zfgab9YV2AacEihlhkZMHYspGl2\nhYiIhECFPhRReLTuoYfgp5+CHcnHe2TuOb/Y+cBUoEpA68GDoUWLsL61iIhUYCr0oYjw0bqvv/a2\nig2UDwwAxvrFLgFepehwPUCrVnD//SF/WxERERX6kEQwdJ+XBzfe6H32V7kyNGt2O99/f7DIN2vW\ni/btJ5GWVqlQWzM4+WS49lqoWjXs7EVEpAJToQ9FBIV+9Gj44ovA+L33Qvv2l9Ot2wR2797N1Vdf\nzYQJE8jI0F+JiIhEj6pKKEK8R791KwwaBHPnwt69XmzLlsC3nnAC3HknZGaezcyZM3njjTd49tln\nSdeuNCIiEmXmnEt0DlGXlZXlFi1aFJ2T7d/vjbMXvU579nhxP1ddBa++WvopFy6Es8+OTnoiIiJm\n9pVzLivYMT2kVZoNGwKLfN26AUV+yxaYMqWkE+0BruOyy1aoyIuISNyo0JcmxGH7t98OnHB30G7g\nQuBFPvkkm9WrV0cvPxERkRKo0JcmxEfrpk0r7gS7gL8A7/tOt4ZpxTcWERGJKhX60oQw4373bnj3\n3cBmr766gzZtuuHtH+95+OGHGThwYHRzFBERKYZm3ZcmhEL/wQfwxx+Fm9SqtY3hw7vxxRefF8SG\nDh3KnXfeGYssRUREglKhL00I9+inTy/aYDNpaV344ovFBZHhw4dzxx13RD8/ERGREqjQl6aUe/S5\nuTBjhv/BjUBnNm9eWhAZNWoUAwYMiFmKIiIixVGhL00pQ/cffwybNx94tRnoCHwHgJkxbtw4+vXr\nF+ssRUREgtJkvNKUMnRfeNi+BnASAGlpaUycOFFFXkREEko9+pLs3Ol9+KtUCerUAbx1dAoX+gzg\nFU4/HQYNuoRevXrFK1MREZGgVOhLUtz9+TRvIGTpUii69k16eiXeeec16ta12OcnIiJSCg3dl6SU\n+/Pjx/8f8ChwcIncc85BRV5ERMoN9ehLUsL9+WXLvuf557OBDcDveAXfuOiiOOYnIiJSCvXoS1LM\n0P2yZcto1649eXkbfMFngFUA9OgRt+xERERKpUJfkiCFfgnQvn1Hdu3a5ItUA2YDzWjXDpo0iWN+\nIiIipdDQfUmKDN1/CXQZN45tu3f7ItXxinw7zODJJ+Ocn4iISCkS3qM3s25mttzMVpjZXUGOVzaz\n13zHPzezpnFLzq9H/ylwLvgV+Zp4O9K1A+Cmm6Bt27hlJiIiEpKEFnozSwdGAucBJwJXmtmJRZr1\nA7Y65/4EDAeGxiO3bdtgx/+8Qr8Q6ALsKDhaG5gLtAHg8MPh8cfjkZWIiEh4Et2jbwOscM796Jzb\nB/wbKDqdrQcw0ff160AnM4v582tbfsun6vZfmIf3W8iugiN18badbVUQefZZqFEj1hmJiIiEL9GF\nviGwxu/1Wl8saBvnXC6wHagT68TSN28kgzwOBdJ9sQYYsAA4paBd9+7Qs2essxERESmbRBf6qDGz\nG81skZkt2rRpU+lvKEX6Bm/Yvg3wLnA88ALN8O4weKpWhZEjIfbjCyIiImWT6EK/DjjK73UjXyxo\nGzPLwNs5ZnMsKE82AAAIu0lEQVSRNjjnxjrnspxzWfXq1Ys4sYxfD6bRDvgGqESzgpiZV+SbNo34\nW4mIiMRMoh+v+xI4xsyOxivovYCrirSZAVyLN/G9JzDPOeeIscrtWvPqBZOpvnMdNXato/rOdeQ3\nOI0hbb2efJcucOaZsc5CREQkMgkt9M65XDO7FXgP71b4BOfct2b2MLDIOTcDeAF42cxWAFvwfhmI\nuVonN+TKGVcWirUAusXjm4uIiERJonv0OOfeAd4pErvf7+s9wGXxzktERCQVJPoevYiIiMSQCr2I\niEgKU6EXERFJYSr0IiIiKUyFXkREJIWp0IuIiKQwFXoREZEUpkIvIiKSwlToRUREUpgKvYiISApT\noRcREUlhKvQiIiIpTIVeREQkhanQi4iIpDAVehERkRRmzrlE5xB1ZrYJ+CmKp6wL/BbF81VUuo6R\n0zWMnK5h5HQNIxfta9jEOVcv2IGULPTRZmaLnHNZic4j2ek6Rk7XMHK6hpHTNYxcPK+hhu5FRERS\nmAq9iIhIClOhD83YRCeQInQdI6drGDldw8jpGkYubtdQ9+hFRERSmHr0IiIiKUyF3o+ZdTOz5Wa2\nwszuCnK8spm95jv+uZk1jX+W5VsI13CQmX1nZkvNbK6ZNUlEnuVZadfQr92lZubMTLOfgwjlOprZ\n5b6fx2/NbHK8cyzvQvj33NjM5pvZEt+/6e6JyLO8MrMJZrbRzL4p5riZ2bO+67vUzFrFJBHnnD68\n2xfpwEqgGZAJfA2cWKTNzcBo39e9gNcSnXd5+gjxGnYEqvq+HqBrGP419LU7DFgIfAZkJTrv8vYR\n4s/iMcASoJbvdf1E512ePkK8hmOBAb6vTwRWJzrv8vQBnAO0Ar4p5nh3YDZgwJnA57HIQz36g9oA\nK5xzPzrn9gH/BnoUadMDmOj7+nWgk5lZHHMs70q9hs65+c653b6XnwGN4pxjeRfKzyHAI8BQYE88\nk0sioVzHG4CRzrmtAM65jXHOsbwL5Ro6oLrv6xrA+jjmV+455xYCW0po0gOY5DyfATXN7Iho56FC\nf1BDYI3f67W+WNA2zrlcYDtQJy7ZJYdQrqG/fni/zcpBpV5D3/DeUc65t+OZWJIJ5WfxWOBYM/vY\nzD4zs25xyy45hHINHwT6mNla4B3gtvikljLC/T+zTDKifUKRUJhZHyALaJ/oXJKJmaUBTwM5CU4l\nFWTgDd93wBtZWmhmpzjntiU0q+RyJfCSc+4pM2sLvGxmJzvn8hOdmBykHv1B64Cj/F438sWCtjGz\nDLyhqs1xyS45hHINMbNzgXuAC51ze+OUW7Io7RoeBpwMfGhmq/Hu683QhLwAofwsrgVmOOf2O+dW\nAT/gFX7xhHIN+wFTAJxznwJV8NZwl9CE9H9mpFToD/oSOMbMjjazTLzJdjOKtJkBXOv7uicwz/lm\nVAgQwjU0s5bAGLwir3uigUq8hs657c65us65ps65pnjzHC50zi1KTLrlVij/nqfj9eYxs7p4Q/k/\nxjPJci6Ua/gz0AnAzE7AK/Sb4pplcpsBXOObfX8msN0590u0v4mG7n2cc7lmdivwHt5s0wnOuW/N\n7GFgkXNuBvAC3tDUCrwJFr0Sl3H5E+I1HAYcCkz1zWP82Tl3YcKSLmdCvIZSihCv43tAFzP7DsgD\n/u6c0widT4jXcDAwzswG4k3My1Hn5yAzexXvl8m6vnkMDwCVAJxzo/HmNXQHVgC7gb4xyUN/JyIi\nIqlLQ/ciIiIpTIVeREQkhanQi4iIpDAVehERkRSmQi8iIpLCVOhFpERmluPbJS8n0bmISPhU6EWk\nXDKzD81Mz/+KREgL5ohIaabhrcAX9RW7RCT2VOhFpETOue14OzWKSBLS0L1IBWNmTX333F8ys+PN\nbLqZbTGz383sIzPrUqR9oXv0ZlbFzLaZ2Ubf5k7BvsfzvvecXyTeycze9X2/vWb2g5k9YWY1iuaH\nb2dD33kOfHwY5cshkvJU6EUqrqOBT4HaeBsNTQVaA7PN7Iri3uSc2wO8BtQDzit63MwqA1cAvwLv\n+sX7A+8Df8bbUGY43p4RQ4BPzKymr+k24CHgJ9/rh/w+XirTn1SkAtNa9yIVjJk1BVb5Xj7pnPu7\n37EsvOK/C2jinNvh68m/CPR1zr3ka9cW+AR4wznXs8j5L8PbuvRp59xgX6wJ3jawe4E2zrn/+bUf\nBQwAxjnnbvSLfwi0d85ZtP7sIhWRevQiFdd24GH/gG+721eAmsDFxb3Rt/f4D8AFZla7yOEDWzlP\n9Iv1ATKB5/yLvM89wE7gat9ogIhEkQq9SMW12Dm3M0j8Q9/nlqW8fyJe8S7YrtnMGgBdgSXOuaV+\nbVv5Ps8rehLn3FZgCd5e5seHlLmIhEyFXqTi+rWY+Abf5xrFHD9gEpDPwR48QG+8p3kmFml74FzF\nPaJ3IF6zmOMiUkYq9CIVV4Ni4of7Ppf4SJ1zbi1eD72NmR3oiV8L7AcmF2l+4FyHE9wRoXxPEQmf\nCr1IxdXKzA4LEu/g+7wkhHO85Pt8rZmdBrQAZjvnNhVpd+BcHYrE8c22Pw3YA3zvdyjPdzw9hDxE\npBgq9CIVVw3gfv+Ab9Z9b7ye9bQQzvEmsANvsl2OL/ZSkHb/wuvp32Zmfypy7BGgOvAv59xev/hm\n3+fGIeQhIsXQyngiFddC4HozOwP4GG/4/Aq8DkB/59yO0k7gnPvDzKYC/YCb8Yrz20HarTazO4CR\nwGIzmwJswlsUpy3wP7zn6f3NBS4D3jSzd4A/gJ+ccy+X5Q8rUlGpRy9Sca0C2gFbgZuAy4HFQHfn\n3GthnOcl3+dKwKvOuX3BGjnnRuHNyP8MuBQYBNQHhgFtnXNbirxlPPA43sjDnXg9/35h5CUiaMEc\nkQrHb8Gcic65nIQmIyIxpx69iIhIClOhFxERSWEq9CIiIilM9+hFRERSmHr0IiIiKUyFXkREJIWp\n0IuIiKQwFXoREZEUpkIvIiKSwlToRUREUtj/B29qUHmfUqIQAAAAAElFTkSuQmCC\n",
       "text/plain": [
        "<Figure size 576x576 with 1 Axes>"
       ]
@@ -237,15 +214,6 @@
     "results = pd.concat(dfs)\n",
     "pivot_plot(results, fig=fig);"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From f01f3c0cbcbd4c6a8ff351c45a94be04a9c8a310 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Sep 2019 08:52:05 -0700
Subject: [PATCH 004/187] BF: fixing seed in some tests that can fail randomly

---
 selectinf/algorithms/tests/test_lasso.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/selectinf/algorithms/tests/test_lasso.py b/selectinf/algorithms/tests/test_lasso.py
index d5ab6a38c..172535b10 100644
--- a/selectinf/algorithms/tests/test_lasso.py
+++ b/selectinf/algorithms/tests/test_lasso.py
@@ -30,6 +30,7 @@
 except ImportError:
     statsmodels_available = False
 
+@set_seed_iftrue(True)
 def test_gaussian(n=100, p=20):
 
     y = np.random.standard_normal(n)
@@ -64,6 +65,7 @@ def test_gaussian(n=100, p=20):
                np.dot(L.constraints.linear_part, L.onestep_estimator),
                L.constraints.offset)
 
+@set_seed_iftrue(True)
 def test_sqrt_lasso(n=100, p=20):
 
     y = np.random.standard_normal(n)
@@ -91,7 +93,7 @@ def test_sqrt_lasso(n=100, p=20):
                np.dot(L.constraints.linear_part, L.onestep_estimator),
                L.constraints.offset)
 
-
+@set_seed_iftrue(True)
 def test_logistic():
 
     for Y, T in [(np.random.binomial(1,0.5,size=(10,)),
@@ -118,6 +120,7 @@ def test_logistic():
 
         return L, C, P
 
+@set_seed_iftrue(True)
 def test_poisson():
 
     X = np.random.standard_normal((10,5))
@@ -139,6 +142,7 @@ def test_poisson():
 
     return L, C, P
 
+@set_seed_iftrue(True)
 @dec.skipif(not statsmodels_available, "needs statsmodels")
 def test_coxph():
 

From a1879b2774cf9fc000ef615bf7712872cf0cb5a5 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Sep 2019 09:56:27 -0700
Subject: [PATCH 005/187] added Lee and ROSI examples to docs

---
 doc/source/algorithms/LASSO.Rmd   | 111 +++++++++++++++++
 doc/source/algorithms/LASSO.ipynb | 194 ++++++++++++++++++++++++++++++
 doc/source/algorithms/ROSI.Rmd    | 110 +++++++++++++++++
 doc/source/algorithms/ROSI.ipynb  | 186 ++++++++++++++++++++++++++++
 doc/source/algorithms/index.rst   |   2 +
 selectinf/algorithms/api.py       |   1 +
 6 files changed, 604 insertions(+)
 create mode 100644 doc/source/algorithms/LASSO.Rmd
 create mode 100644 doc/source/algorithms/LASSO.ipynb
 create mode 100644 doc/source/algorithms/ROSI.Rmd
 create mode 100644 doc/source/algorithms/ROSI.ipynb

diff --git a/doc/source/algorithms/LASSO.Rmd b/doc/source/algorithms/LASSO.Rmd
new file mode 100644
index 000000000..770d31eda
--- /dev/null
+++ b/doc/source/algorithms/LASSO.Rmd
@@ -0,0 +1,111 @@
+---
+jupyter:
+  jupytext:
+    cell_metadata_filter: all,-slideshow
+    formats: ipynb,Rmd
+    text_representation:
+      extension: .Rmd
+      format_name: rmarkdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+# Conditioning on signs and active set
+
+One of the first works in this line of conditional inference
+is [Lee et al.](projecteuclid.org/euclid.aos/1460381681) which
+considers the LASSO (squared-error loss) and conditions
+on the active set and their signs.
+
+
+```{python collapsed=TRUE}
+import numpy as np, pandas as pd
+import matplotlib.pyplot as plt
+import statsmodels.api as sm
+# %matplotlib inline
+
+from selectinf.tests.instance import gaussian_instance # to generate the data
+from selectinf.algorithms.api import lasso
+
+```
+
+We will know generate some data from an OLS regression model and fit the LASSO
+with a fixed value of $\lambda$. In the simulation world, we know the
+true parameters, hence we can then return
+pivots for each variable selected by the LASSO. These pivots should look
+(marginally) like a draw from `np.random.sample`. This is the plot below.
+
+```{python}
+np.random.seed(0) # for replicability
+
+def simulate(n=500, 
+             p=100, 
+             s=5, 
+             signal=(5, 10), 
+             sigma=1): 
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0., 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p)
+    L = lasso.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat)
+    soln = L.fit()
+    active_vars = soln != 0
+    
+    if active_vars[truth != 0].sum() == s: # ensure we have screened for ease of interpretation
+        projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth))
+        S = L.summary(truth=projected_truth)
+        S0 = L.summary()
+
+        pivot = S['pval'] # these should be pivotal
+        pvalue = S0['pval']
+        return pd.DataFrame({'pivot':pivot,
+                             'pvalue':pvalue})
+```
+
+Let's take a look at what we get as a return value:
+
+```{python}
+while True:
+    df = simulate()
+    if df is not None:
+        break
+df.columns
+```
+
+```{python collapsed=TRUE}
+dfs = []
+for i in range(200):
+    df = simulate()
+    if df is not None:
+        dfs.append(df)
+```
+
+```{python}
+results = pd.concat(dfs)
+import statsmodels.api as sm
+thresh = 0.001 # POSSIBLE BUG? several very small pivots -- fine for pvalues
+grid = np.linspace(0, 1, 101)
+fig = plt.figure(figsize=(8, 8))
+plt.plot(grid, sm.distributions.ECDF(results['pivot'][results['pivot'] > thresh])(grid), 'b-', linewidth=3, label='Pivot')
+plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value')
+plt.plot([0, 1], [0, 1], 'k--')
+plt.legend(fontsize=15);
+```
+
+```{python collapsed=TRUE}
+
+```
diff --git a/doc/source/algorithms/LASSO.ipynb b/doc/source/algorithms/LASSO.ipynb
new file mode 100644
index 000000000..7e505805f
--- /dev/null
+++ b/doc/source/algorithms/LASSO.ipynb
@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Conditioning on signs and active set\n",
+    "\n",
+    "One of the first works in this line of conditional inference\n",
+    "is [Lee et al.](projecteuclid.org/euclid.aos/1460381681) which\n",
+    "considers the LASSO (squared-error loss) and conditions\n",
+    "on the active set and their signs.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np, pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import statsmodels.api as sm\n",
+    "%matplotlib inline\n",
+    "\n",
+    "from selectinf.tests.instance import gaussian_instance # to generate the data\n",
+    "from selectinf.algorithms.api import lasso\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will know generate some data from an OLS regression model and fit the LASSO\n",
+    "with a fixed value of $\\lambda$. In the simulation world, we know the\n",
+    "true parameters, hence we can then return\n",
+    "pivots for each variable selected by the LASSO. These pivots should look\n",
+    "(marginally) like a draw from `np.random.sample`. This is the plot below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.random.seed(0) # for replicability\n",
+    "\n",
+    "def simulate(n=500, \n",
+    "             p=100, \n",
+    "             s=5, \n",
+    "             signal=(5, 10), \n",
+    "             sigma=1): \n",
+    "\n",
+    "    # description of statistical problem\n",
+    "\n",
+    "    X, y, truth = gaussian_instance(n=n,\n",
+    "                                    p=p, \n",
+    "                                    s=s,\n",
+    "                                    equicorrelated=False,\n",
+    "                                    rho=0., \n",
+    "                                    sigma=sigma,\n",
+    "                                    signal=signal,\n",
+    "                                    random_signs=True,\n",
+    "                                    scale=False)[:3]\n",
+    "\n",
+    "    sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p)\n",
+    "    L = lasso.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat)\n",
+    "    soln = L.fit()\n",
+    "    active_vars = soln != 0\n",
+    "    \n",
+    "    if active_vars[truth != 0].sum() == s: # ensure we have screened for ease of interpretation\n",
+    "        projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth))\n",
+    "        S = L.summary(truth=projected_truth)\n",
+    "        S0 = L.summary()\n",
+    "\n",
+    "        pivot = S['pval'] # these should be pivotal\n",
+    "        pvalue = S0['pval']\n",
+    "        return pd.DataFrame({'pivot':pivot,\n",
+    "                             'pvalue':pvalue})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's take a look at what we get as a return value:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['pivot', 'pvalue'], dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "while True:\n",
+    "    df = simulate()\n",
+    "    if df is not None:\n",
+    "        break\n",
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "dfs = []\n",
+    "for i in range(200):\n",
+    "    df = simulate()\n",
+    "    if df is not None:\n",
+    "        dfs.append(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeMAAAHSCAYAAADfUaMwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzdd1iV5RvA8e8roDgQFy4UR6GZKxO3\npgnuPSFzZcNylWlqpblN0zRnmubItMy9N6KVObCs3CaOHD8V3IoDeH5/PB4PR0hR4bznHO7PdXHF\n+7xvnBtF7vOs+zGUUgghhBDCPGnMDkAIIYRI7SQZCyGEECaTZCyEEEKYTJKxEEIIYTJJxkIIIYTJ\nJBkLIYQQJnM364Vz5MihChYsaNbLCyGEEHa1Z8+eSKWUT2L3TEvGBQsWJDw83KyXF0IIIezKMIyT\n/3VPhqmFEEIIk0kyFkIIIUwmyVgIIYQwmSRjIYQQwmSSjIUQQgiTmbaaOimuXbvGhQsXuHfvntmh\niEfw8PAgZ86cZM6c2exQhBDCKTlsMr527Rrnz5/H19eX9OnTYxiG2SGJRCiliI6O5syZMwCSkIUQ\n4ik47DD1hQsX8PX1JUOGDJKIHZhhGGTIkAFfX18uXLhgdjhCCOGUHDYZ37t3j/Tp05sdhkii9OnT\ny3SCEEI8JYdNxoD0iJ2I/F0JIcTTc+hkLIQQQqQGkoyFEEIIk0kyTkGDBg3CMIwHH3nz5qVFixYc\nO3YMgI4dOxIQEJDsr3vkyBEGDRrElStXkv1rCyGESH4Ou7XJVXh7e7Nu3ToAIiIiGDBgAIGBgezf\nv58BAwYQHR2d7K955MgRBg8eTMeOHcmSJUuyf30hhBDJ67HJ2DCMmUBD4IJSqkQi9w1gPFAfuAV0\nVEr9ntyBOit3d3cqVqwIQMWKFfHz86NatWqsWbOGVq1amRydEEIIR5CUYerZQN1H3K8H+N//eAf4\n+tnDcl1ly5YF4MSJEzbD1MePH8cwDFavXm3zfGxsLLlz56Z///4P2kJDQ6lQoQKenp7kypWLLl26\ncOPGDQDCwsJo1KgRAIUKFcIwDAoWLGiH70wIIVzH1atwcuf/ICbGLq/32GSslNoGXHrEI02A75S2\nA8hiGEae5ArQ1Zw4cQKA3Llz27QXKlSI8uXL89NPP9m0b926lfPnzxMSEgLA/v37qVu3Ljly5GDx\n4sUMHjyY+fPn07JlSwBefvllxowZA8CSJUv47bffWLp0aQp/V0II4SKU4u7mn9njH0Leivk59tVK\nu7xscswZ+wL/xrs+fb/tXDJ8bRuOsJVVqSf/f2Luv7OKiIigS5cueHl5ERQUxObNm22eCwkJYfDg\nwdy5c4d06dIBsGDBAooXL06JEnqGYOjQoRQoUIAVK1bg5uYGQLZs2QgODua3336jUqVKFC1aFIAy\nZcpIr1gIIZLi+nX4/nvUlCmwbx+LgcLAyX5TyNi2GQ/1n5KdXVdTG4bxjmEY4YZhhF+8eNGeL22a\nqKgoPDw88PDwoGjRokRERLBgwQLy5Ek4eNC6dWuuXbv2YMFXTEwMS5YsITg4+MEzu3btolmzZg8S\nMUCLFi1wd3fnl19+SflvSAghXM2KFVC4MHTpgrFvHz2BKcAWwD//HXJnuZ3iISRHMj4D5I93ne9+\nWwJKqW+UUgFKqQAfH59keGnH5+3tze7duwkPD+f06dOcOHGCevXqJfqsr68vVatWZcGCBQBs3ryZ\nyMjIB0PUAOfOnSNXrlw2/5+bmxvZs2fn0qVHzSYIIYSwcfs2dO8OTZpAZOSD5q6kpwOvcqHNn+SL\n2AaenikeSnIMU68AuhmG8SNQAbiqlEr2IWp4uiFis7m7uz/RXuLg4GD69etHdHQ0CxYsoEyZMvj7\n+z+4nydPngQHMsTGxhIVFUW2bNmSLW4hhHBpBw5ASAj8/TcAN4HPycwVhjCXjtRt7c1Hc+03PfrY\nnrFhGD8AvwFFDcM4bRjGm4ZhvGsYxrv3H1kDRAD/ANOBLikWbSrQqlUroqOjWbp0KUuXLrXpFQNU\nqFCBpUuXEhsb+6BtyZIlxMTEULVqVQDSpk0LwO3bKT+0IoQQTuXCBejZE15++UEivg6UJzvDucFk\nyhNQ05vvvoM0dpzIfWzPWCn12mPuK6BrskWUyuXMmZMaNWrQu3dvrly5QuvWrW3u9+/fnzJlytC0\naVPee+89Tp8+Td++falTpw6VKlUCeLCAa9q0aYSEhJAhQwZKlixp9+9FCCEcxuXLMGYMjB8PN28+\naL6UNh0lYvJyLu4UMJ+XXqrE0qVwfw2t3Ug5TAcUEhLCuXPnqFixYoLV0MWLF2ft2rVcuHCB5s2b\n079/f1577TUWLVr04JkCBQowZswYlixZQpUqVR7sOxZCiFTnwAE9L1ygAIwYYZOIL5Ypy3PG85yL\nOw0sJE+eYFavhsyZ7R+moUyaiA0ICFDh4eH/ef/gwYMUK1bMjhGJZyV/Z0IIhxAXB0uXwqRJEBaW\n8H6pUqghQwmakI3Q0EbAXDw8GrJ1K9wfYEwRhmHsUUoluohIalMLIYRwDUrpbUr9+8O+fQnvv/AC\nDBzInSZNmDApPaGhAMeBLEyenLKJ+HEkGQshhHBucXGwcSN89hns2mV7z81Nb13q0gVq1uTsuXOU\n9y/L2bMfAW8AWXj3XXj7bTMCt5JkLIQQwjkdOQJz58L338P9UsMPZMyo54q7doV8+QD4999/KVeu\nJufP/w94HoAqVfSaLrNJMhZCCOE8bt2CH3+EadMS9oJBL4Pu0gX69YOcOR80Hz9+nHLlahIVdQnY\nAFSiWDFYvBju7wY1lSRjIYQQju/oUZg6FWbN0tuUHpYlC7RtC337PugJW1y9epWyZatz+fINYDMQ\nQKlSemQ7Xr42lSRjIYQQjmvHDhg1CpYtS3jPwwMaNIB27fR/E9kcrBRMnuzN5csfAjWAl3j5Zdiw\nAbJnT+ngk06SsRBCCMeiFKxbp5Pw1q0J7xcuDO+9Bx07Qo4c//ll/vprP0OH3mLRonLABwBUqKC/\ndJYsKRP605JkLIQQwnEcOaLnfB86YhaA+vWhWzeoU+extSp37fqTatWCuHs3J/AX4EbNmnr7sRlF\nPR5HkrEQQgjz3bkDI0fqKll371rb3d3h9dfho4+gePEkfamwsHBq1apNTExGYBngRnAwzJlj/zKX\nSSXlMFPQoEGDMAzjwUfevHlp0aIFx44ds8vrG4bBpEmT7PJaQgjx1DZuhNKlYdAgayJOk0ZvSzp2\nDGbPTnIiXrlyB4GBgcTEeAPbAH/efx/mz3fcRAzSM05x3t7erFu3DoCIiAgGDBhAYGAg+/fvJ2PG\njCZHJ4QQJjpyBHr3hpUrbdvLldNbl8qUeaIvd/gwvPbaZOLifIBQwI9Ro3Sn2l5HIT4tScYpzN3d\nnYoVKwJQsWJF/Pz8qFatGmvWrKFVq1YmRyeEECa4eBE+/xwmToSYGGu7l5cepn7vPV056wn89pui\nUSODmzdnAFdwd8/Ft99C+/bJG3pKkWFqOytbtiwAJx6uFnNfoUKF+OijjxK0t2rV6sF5xTdv3qRb\nt24ULVqUDBkyUKhQIbp27cq1a9ce+doFCxakd+/eNm2zZ8/GMAxu3LjxoO3SpUu888475MqVC09P\nTypXrszOnTuf5NsUQghbSsFvv+ltSPnywbhxtom4Y0fdte3W7YkS8Z078M47G6hatSJRUZFAOjJk\nyMXKlc6TiEGSsd1ZknDu3LkTvd+6dWsWLlxo03bjxg1Wr15NSEgIALdu3SI2Npbhw4ezdu1ahg4d\nSmhoaLL0tO/cuUNQUBCbNm1i9OjRLFu2DB8fH4KCgvjf//73zF9fCJHKHDsGY8dC2bJQubIuXRl/\ngVbVqhAerot55MmT5C+rFCxYAH5+q5k+vRFxcXcARY4csGUL1K2b/N9KSpJhajuIuf/uLyIigi5d\nuuDl5UVQUFCiz4aEhPDFF1+wY8eOB8PbK1eu5O7duw+SrY+PD19//bXN1y9UqBBVq1bl1KlT+Pn5\nPXWs33//Pfv27WP//v34+/sDEBQURNGiRfnyyy8ZPXr0U39tIUQq8fff8NNPulBHYqcngd7w27s3\ntGjxxBO6+/fDm2/Czp3LgNZAKWADRYtmY8UKKFLkWb8B+3OuZOwIM/BPeP5zVFQUHh4eD679/PxY\nsGABuXPnfpCkQa98dnNzo0yZMhQpUoQFCxY8SMYLFiygevXq5MqV68Hzc+fOZezYsRw9epSb8Q7L\nPnLkyDMl402bNlG2bFkKFSpkE1/16tV51PnTQohU7tw5vWR57lz488/En/H0hDZt9D7i+1N2T+q7\n7+DddyE6ehXQCggga9a1DB6chc6dHaPO9NNwrmTshLy9vdm0aROGYZA7d27y5s2LYRiEhYXx6quv\nPniuevXqhN0/BDs4OJiZM2cyduxYrl+/zrp165g4ceKDZ5cuXUr79u157733GDFiBNmyZePcuXM0\na9aM27dvP1O8kZGR7Nixw+YNhMVzzz33TF9bCOGCLOUqV6zQRxk+LF06qF1bH2PYrBlky/ZULxMd\nDT16wIwZlpaXSZOmDd27T2TQoMwOV1HrSUkyTmHu7u4EBAQkaC9btiy7d+9+cO3l5fXg8+DgYIYO\nHcovv/zC8ePHiYuLo3nz5g/uL1y4kAoVKjBlypQHbVsTKxn3EE9PT+7Gn6sBLj9UcD1btmwEBATY\nDINbpHPkTXpCCPtRCtav10U6Evvd4+mpk2/r1joRZ8r0TC8XEaFHs/fuBdgCVOOFF/KyaNGcpG4/\ndnjOlYyfcIjYkXl5eSWapAGKFy9OiRIlWLBgAcePHycoKIjs8SqaR0dHJ0iM8+bNe+xr5suXj4MH\nD9q0bdiwweY6MDCQDRs24OfnR05HOc5ECGGumzchLAx279bHFu7eDZGRCZ+rXl2vlm7ZEry9k+Wl\nt27ViTgqCuAboDNlyoxm27bez5rjHYpzJeNUJDg4mPHjx3P16lWmT59uc69WrVp07dqV4cOHU6FC\nBdasWcPmxOq4PqRZs2Z0796dESNGUK5cORYvXsz+/fttnmnfvj1Tp06lRo0a9O7dm8KFCxMVFcWu\nXbvInTs3PXv2TNbvUwjh4JYtg86d4cKFxO+7u+t54I8+ghIlkvWlp0/X08t6+cokoDslStTn11+7\nkT59sr6U+ZRSpnyULVtWPcqBAwceed8ZDBw4UGXPnv2p/t+jR48qQKVLl05duXLF5l5MTIzq1auX\n8vHxUV5eXqp58+Zqx44dClArV6588BygJk6c+OD67t27qmfPnipXrlwqS5YsqkePHmratGkKUNev\nX3/w3JUrV1SPHj1Uvnz5lIeHh/L19VXNmjVTv/zyyyNjdoW/MyHEfZcuKdW2rVJ6TDLhR5YsSvXo\nodTJk8n+0vfuKfX++/Ff7ksFqFdeaaJu376d7K9nL0C4+o+caCiThn4DAgLUo1bnHjx4kGLFitkx\nIvGs5O9MCBcQG6t7wz16wNmz1vY8efQccLly+uP55x97ctLTUErX//juO0vLGdKkKUq9evVZunRe\nootLnYVhGHuUUonOT8owtRBCCF2icuZMmDoVHq4Q2LYtTJgAWbOmeBhffRU/EUPz5r706fMbZcsW\nw93ddVOW635nQgghHu/MGfjss4SVsQBy5tQHNjRtapdQQkP11DMooD8VKuRh4cJupElT0i6vbyYp\nhymEEKnRrVswdKguVzVzpm0izp4d+vTRpa7slIhPnNCj4LGxCugNjKBUqf0YhuvsonkU6RkLIURq\ncvo0rFkDw4bBv//a3qtQQS9fbt1a7xW2k1u3dD2QqCgF9AAm0alTd6ZNG4/hCJUX7UCSsRBCuLK4\nOFi7Flatgs2b4ejRhM+ULq1PUYpXFdBerl2D4GDYu1cB7wHTeO21XsyYMTrVJGJw8GSslEpVfxnO\nzKxV+UKI/3D3rp4HHj0aDh1K/JmcOWH4cHjjjSc+Pzg5RERAo0Zw4ACAARSlbt2PmTdveKr73e+w\nydjDw4Po6GgyZMhgdigiCaKjo516y4EQLuPWLb0ieuxYvTjrYZ6eUKUK1Kmji3lkzmz/GIlfWSsG\nOAK8SP/+PRk61JRwTOewyThnzpycOXMGX19f0qdPn+reJTkLpRTR0dGcOXPG5lQpIYSd3bmjT1EY\nNgwePns8c2Z4+21o2BAqVrTrfPDDYmL0Lqm+fSEm5h7wOrCOiRMP061b0s8zdjUOm4wz33+3dvbs\nWe7du2dyNOJRPDw8yJUr14O/MyGEHZ09CytXwuefw8mTtvdy54aePXUPOJlqRT+LPXvgnXfg998B\n7gDBwHJ69PgyVSdicOBkDDohyy94IYSI5+ZN2LABNm3SG3MTmw/Omxf699dzwSb2gi2uX4cBA2Di\nRMspi7eBFsAaBg+eyGefdTM3QAfg0MlYCCEE+pCGlSth+XLYuBH+69zyHDng44/hvfdwhJMUlNKV\nNbt3t52+dnefQGzsWiZNmkaXLu+YF6ADkWQshBCOSCnd850yRSfh2NjEn0ubVi/IatBAjwHHOxvd\nTKdOQbdu+j1EfLVrw/jxPTlzpiyBgYHmBOeAJBkLIYQjuX4dZs3SSfjw4cSfKVFCL8YKCoLKlR2i\nF2yhlA69b189om7h43ONYsXeZ86ckeTOnYsXXpBEHJ8kYyGEcAQXLuhlxpMnw5UrCe9Xrqz3AjVp\nAs89Z//4kuDaNXjrLVi40La9Y8cr/P13XbZv38OePS1p0KCBOQE6MEnGQghhpn//hZEjdX3oh+eC\nvbygQwc9B/zii+bEl0R//QUtW9oW+CpeHMaMuUT//rX566+/WLRokSTi/yDJWAghzHD+PIwYoQt0\nPHxa0vPPw4cfQrt2kCmTOfE9gXnzdI84/nuJLl3g448v0rBhLQ4dOsSyZcuoX7++eUE6OEnGQghh\nTxcu6EN7x4/X1bLiK1tWT7Y2b25KecqnsXKlfs9gqYibMSNMnw6vvQYXLijc3d1ZuXIltWrVMjdQ\nByfJWAghUppSsH27Xtm0cCE8XMioQgUYMgRq1QInqja4d69OupZE/OKLsHgxZM16nnv3spEzZ052\n7dpFmjRyWu/jyJ+QEEKklNhYmD8fXnoJqlbVn8dPxKVL667lb7/pPT9OlIjPntWHPFhWTBcqBGFh\nkCHDKapUqULnzp0BJBEnkfSMhRAiuVmqXQwYAPv3J7xfuTJ88IFeHe2EyermTWjcWB+NDLr09apV\ncOPGcWrWrMnly5cfJGORNJKMhRAiOYWG6nnf8HDb9gwZ4PXX9cqml14yJ7ZkEBcH7dvrOtOgp7YX\nLQIPj6O88kpNbt26xebNmylbtqy5gToZScZCCJEc/vkHevfW1bLiy5RJ94I//BCyZjUntmQ0bBgs\nWWK9njQJataMpWTJJty+fZvQ0FBKly5tXoBOSpKxEEI8i6tXdYYaP952PjhdOujaFfr1Ax8f8+JL\nRitWwMCB1uv334d33wVwY+bMmXh5eVG8eHGzwnNqzjdZIYQQjiA2FqZNA39/GDPGNhF36KB7yl9+\n6TKJ+NAhaNvWel2zJrRtu5dJkyYBULFiRUnEz0B6xkII8aRCQ/XQ899/27ZXrqz3EJcrZ05cKeTq\nVWjaVJfNBihQAD7+OJzatWuTKVMm2rdvL8fdPiPpGQshRFIopffu1K0LgYG2idjPD378EX75xeUS\nsWXBluXMivTpYdCg32jRIhBvb2+2bdsmiTgZSM9YCCEeJS5OL8oaNQp27rS9lzGjnhPu1cuhTk5K\nTkOG6Llii969t9G9ewNy585NaGgo+fPnNy84FyLJWAgh/su+fdCxo3Ufj0WaNLoG5IgRkDevKaHZ\nw7JlMHiw9bpXL8if/zD58uVj8+bN5HXh793eDGWpY2ZnAQEBKvzhfXhCCOEIYmJg9GgYNMj2EId0\n6XRy7t1bH+bgwg4ehPLl4cYNfV2jxjU2bsyMuzvcvn0bT09PcwN0QoZh7FFKBSR2T+aMhRAivkOH\ndOnKTz6xJuJ06XQhjxMn9ClLLp6ILQu2LIk4V65V7N1bkPDwHQCSiFOADFMLIQTo8/9GjoTPP7ft\nDZcrB3PmQLFi5sVmRzExegvTkSP6Om3apURFBfPSS6UpUqSIucG5MEnGQggRGgrvvWfNQAAeHnrC\n9KOPwD11/KqMi4M339R1prWfiIlpQ/ny5Vi3bh3e3t5mhufSUsdPmBBCPOzyZVi9Gn76SZ+cFF+F\nCvpQ3pIlzYnNBErpstnffWdp+RXDeI0qVaqwevVqvLy8zAzP5UkyFkKkHrdvw/ffww8/wNatuopW\nfJkz62Hqzp31CQiphFK6dPa0ada2Tp0q4u8/gu7du5ExY0bzgkslJBkLIVzf1avw9de6Otb584k/\n06qVri+dJ499YzOZUvDpp/qPRptLs2av8s03+XBz62tmaKmKJGMhhOu6eVP3dCdOhGvXEt4vX14v\nG27aNNUs0IpPKb1ofORIS8sE4H3y5OmGm9tEEyNLfSQZCyFc06pV+tSkU6ds2319dV3p117Tn6dS\nSukiHuPGWVpGA31o0qQZ48Z9aWJkqZMkYyGEazlzBnr0sD10F6BoUb1X+PXXIW1ac2JzEHFx0K2b\nHrnXhgP9adUqmHnz5uLh4WFidKmTJGMhhGu4ckUfZfjVV3p42iJ7dn2UYbt2uoxlKhcdrQcMZs16\n0IK39080bNiO2bNn4p5KtnE5GvlTF0I4txs3YMIEXb7yyhXbe506wRdf6IQs2LhRb6c+dgxAATG0\naZOe8eO3kjWrF26paAW5o5FkLIRwTseP6704M2ZAVJTtvZIl9aKt6tXNic3BnD+vty7Nn29pUUAv\n8uf/h2+/XYynZxYToxMgyVgI4UzOn4ft2+Hbb2HNGr0KKT5/f101KzhYhqTv++03aNgQLl2ytMSR\nNm0P7t6dTNOmPUiXTtKAI5C/BSGEY4qNhb17YcsW2LEDdu9OuDLaokABGDAAOnRINaUrk2LrVmjQ\nIP4UehzPPdeZY8dm0Lt3b7744gsMwzAzRHGf/NQKIRzHtWt6LHX9eggLSzgH/LC6dXUNx/r1U1XF\nrKTYtAkaN9YLtgB8fKBKlZ4sWzaDTz/9lKFDh0oidiCSjIUQ5jt/Xi/CmjxZV8v6L56eUKaMngt+\n802XP8rwaa1ZA82bw507+jpPHti8GW7dak+lSr706dPH3ABFApKMhRDmiYjQ25FmzrRmjvhy54bA\nQHjlFV0tq3hxfZqSSJRSek1bjx5w755u8/W9R9++yylWrCVQlrJly5oao0hckpKxYRh1gfGAGzBD\nKTXyoft+wBwgy/1n+iml1iRzrEIIV7F3L4wapU9Miouzvefvr4ee69SBF14AGUpNkhs39PkW1hXT\nUKDAHfz9W9OjxwoqVdpNQECAeQGKR3psMjYMww2YDNQCTgO7DcNYoZQ6EO+x/sBPSqmvDcN4EVgD\nFEyBeIUQzuzQIV2Kcv36hPfKloV+/aBZM5n/fUIHDkDLlnDwoLWtZMlocuRowaZNa5k8ebIkYgeX\nlLX/5YF/lFIRSqm7wI9Ak4eeUUDm+597A2eTL0QhhEtYuBDKlUuYiGvV0quNdu/WGUUS8RP5/nv9\nxxo/EXfseAsfn8aEha1j+vTpdOnSxbwARZIkZZjaF/g33vVpoMJDzwwCNhiG0R3ICAQlS3RCCOd3\n757u8Y4da21Lk0Yn3j59dI9YPLHbt/UgQ/wziNOn1/WmfXzCaNIkjFmzZtGhQwfzghRJllwLuF4D\nZiulvjQMoxIw1zCMEkopm8kgwzDeAd4B8PPzS6aXFkI4rDNn9OlIP/9sbXv+eVi0CEqXNi8uJxcR\noY9f/v13a1vRovDTT4pSpQygPocPH6Zw4cKmxSieTFKGqc8A+eNd57vfFt+bwE8ASqnfAE8gx8Nf\nSCn1jVIqQCkV4OPj83QRCyEcX2wsTJqkzwiOn4gbN9bD0ZKIn9ry5fDyy7aJODgYNmy4TPfuNdi4\ncSOAJGInk5RkvBvwNwyjkGEYaYEQYMVDz5wCAgEMwyiGTsYXkzNQIYST+OMPqFQJuneH69d1W5o0\nMGIELF0KWaQO8tO4dw9694amTa1bsT089HueSZOiaNo0kN9++41oS5UP4VQeO0ytlIoxDKMbsB69\nbWmmUmq/YRhDgHCl1AqgFzDdMIye6MVcHZV6uGisEMKl3bgBn30G48fbblcqWhS++UbvFRZP5cwZ\n3fv99VdrW4ECek1cgQIXqFkziCNHjrB8+XLq1atnXqDiqSVpzvj+nuE1D7V9Fu/zA0CV5A1NCOE0\nli/Xp9WfPm1tS5cOPvkE+vbVn4un8vPPuppWZKS1rWFDmDMHDOMyVarU4MSJE6xatYqgIFk766yk\nApcQ4ulFROiz+ZYvt22vWVMv6y1SxJy4XMTSpXr9m6U4mWW0/6OP9Odxcd4EBQXRokULqstxkU5N\nkrEQ4sn9VwUtHx+9hen116Vy1jOaOhW6drX+8ebMqf+4q1eHkydPopSiYMGCTJgwwdxARbKQZCyE\nSLpff4Vhw2DduoT33npLJ+hs2ewflwtRCgYNgiFDrG3PP69rpRQuDBEREbz66qtkz56dPXv2yMlL\nLkKSsRDi8fbsgf79E0/CtWrBwIFQRZaNPKuYGF2We/p0a1u5crBqle4ZHzlyhJo1axIdHc3SpUsl\nEbsQScZCiMQppfcEjxoFS5bY3jMMXXVCKmglm+hoCAmBFfE2jtapo+ujZMoEBw4cIDAwkNjYWLZs\n2UKpUqXMC1YkO0nGQghbJ07ogsdz58KRI7b3DAPatoUBA/TpSiJZXLoEjRrB9u3Wtnbt4NtvrSdG\n9unTB6UUYWFhvPjii+YEKlKMJGMhhHbypC52vGxZ4vdbtNATmZIIktXp07oHfCDeOXgffQQjR+oV\n0xZz584lMjISf3kT5JKSUoFLCOHKYmLgyy91kn04EWfKBB066DnjRYskESezq1cTJuKxY+GLL3Qi\n3rVrF8HBwdy+fZusWbNKInZh0jMWIjXbvl2vGPrzT9v2evX0OGmTJpAhgzmxubiYGGjd2pqIPTx0\nIY/XXtPX27dvp27duuTIkYOoqCh8fX3NC1akOEnGQqRGf/6p531XrrRtL1FCn8lXubI5caUSSkGP\nHrBhg7Vt1ixrIt62bRv162Owzb8AACAASURBVNcnb968hIaGSiJOBWSYWojUQildrCMkBF56yTYR\np0+vJyl//10SsR1MmKALlFl89pmukwIQGhpK3bp18fPzY+vWreTLl8+cIIVdSc9YCFcWE6MLdSxf\nrueDjx+3vW8YOjkPHw6FCpkTYyqzcCH07Gm9DgnRRT4scuTIQYUKFViwYAE5c+a0e3zCHJKMhXBF\nN27AjBl6YVb8wxvia9IEhg6FkiXtG1sqde8efPopjB5tbatYUQ9PGwYcPHiQF154gVKlShEaGioF\nPVIZGaYWwpVERupqWH5+uvv1cCL28tJdsZ07dU9ZErFdnD2rz86In4ife07/FXh6wuLFiylVqhQz\nZswAkEScCknPWAhXcPs2fPWVHm6+ccP2no8PtGype8I1ashxhna2eTO0aQMXLljb6teH776D7Nnh\nxx9/pG3btlSoUIHWrVubF6gwlSRjIZyZUrB4sa4SceKE7b3ChXV7hw56gZawq7g4fabGoEH6rwn0\n3uFhw/QRz2nS6EIeHTt2pGrVqqxatQovLy9TYxbmkWQshDO6d0+PcX71lW0NRYBixfTy3JYtwV3+\niZvh4kVdNTT+1qVcueDHH/XgBOhjEDt16kSNGjVYsWIFGTNmNCVW4RjkX6oQzuTMGX2kzzffwLlz\ntveyZ9flKt95R5KwiXbu1JVDz5yxttWoAT/8ALlzW9sKFCjAmjVrqFq1Kull5CLVk3+xQjiDa9f0\nfPBXX8Hdu7b33N2hWzfdG86a1Zz4BAChofrAh1u3rG2ffAKDB1vfH02cOJFChQrRsGFDatWqZU6g\nwuHIamohHFlsrO4J+/vrgsXxE3GePHrl9IkTMG6cJGKTrV8PDRpYE3G2bLB6tX4PZUnEo0ePpkeP\nHvzwww/mBSockvSMhXAkMTG6WPHu3fpjy5aExxiWLw+9e0PTptbz9YSpVq7UU/SW90q+vnoVddGi\n1meGDRvGgAEDCAkJYc6cOeYEKhyWJGMhHMGNG7pG4pdf6sNtE5M/P4wapfcJyz5Uh7FkCQQH6/dR\nAAUK6OHqwoX1tVKKgQMHMnToUNq1a8esWbNwc3MzL2DhkCQZC2Gm6GiYOhU+/1wvwU1MhgzQrx/0\n6iUnKDmYvXv1HmJLIn7uOd0jLlDA9rlLly7x5ptvMm3aNEnEIlGSjIUww40bei74yy9tl92CXnJb\nqRKUK6c/ypeHzJnNiVP8p6tXoVUruHNHXxcponvElgOWlFJcuHCBXLlyMWHCBADSpJFlOiJxkoyF\nsKfISJg4UX9cvmx7z89Pr4ju0EG2Jjk4peCtt+Cff/R1pkywYoU1EcfFxdGtWzdWrFjBH3/8gY+P\nj3nBCqcg/+KFsIebN/Vq6DFjbPe9gO4Jf/opvP22lKp0EhMnwqJF1usZM6yLtWJjY+ncuTPffvst\nffr0IUeOHOYEKZyKJGMhUlJcHMybp+d8z561vSflKp3Szp16MbtFly56ARdATEwMnTp1Yu7cuQwY\nMIDBgwfLoQ8iSSQZC5FSdu+Grl31f+MrVUpXgmjRQoajncxff0GzZroaKUDZsjB2rPX+yJEjmTt3\nLkOHDqV///7mBCmckvwmECK5Xb0K/fvD5MnWEwJAD0d//jm0b69PCRBOJSxMH3x17Zq+9vaGhQtt\nZxZ69OhBgQIFaNeunSkxCuclvxGESC5K6YnEYsVg0iRrIk6XTveEjxyBjh0lETuhRYugTh1rIs6c\nGZYvh0KF4M6dO3z22WfcvHmTzJkzSyIWT0V6xkI8q5s3Yf58mDJFbzyNr25d3UO2VIAQTmfqVD0v\nbHlvlTs3rFsHpUtDdHQ0zZo1Y/369ZQtW5YmTZqYG6xwWpKMhXhaZ8/qilhz5uih6fhy54bx4/VG\nVFnA47RWrrRNxEWK6BrUBQvCzZs3ady4MVu2bGHGjBmSiMUzkWQsxNNYuRLeeAOiomzb06fXG1CH\nDIEsWcyJTSSLw4f1mcSWRFyunD74wccHrl+/ToMGDfj111+ZM2eODE2LZybJWIgncfu23o40aZJt\nu7+/7kJ16CCnJ7mAa9f0ORyWOWI/P2siBjh//jwRERHMnz+fYMu+JiGegSRjIZLqjz/0Aqy//rK2\n5cunJxXr1ZOFWS4iLg7atYNDh/R1+vSwbJlOxDdv3iRDhgw8//zzHDlyhAxSK1wkE/ntIcTjHDqk\nqzq8/LJtIm7WDP78Ux9iK4nYZQwdqktbWsyYAWXKQGRkJFWrVn2wf1gSsUhO8htEiP9y7JjuCRcv\nDj/9ZG339ISvv4bFi/UJ8sJlzJ0LgwZZr3v10qcynT9/nldffZVDhw5RrVo10+ITrkuGqYV42B9/\n6FXSCxfqMcv4mjaFESP0XmLhUtatg06drNeBgTByJJw9e5bAwEBOnjzJqlWrCAwMNC9I4bIkGQth\nsX27XgW9fn3Ce7Vrw7BhekmtcDm7dunqpJZziUuW1IU+lLpHUFAQp0+fZt26dbzyyivmBipcliRj\nIU6e1Ac5/Phjwnu1a+sTleSXsMs6ckRP+1sO0/Lz071kvTPNgyFDhpA3b14qV65sZpjCxUkyFqnX\njRt6OHrMGL1lySJNGl2so08fvWhLuKw9e/Q6vMhIfZ09ux4YiY4+xooV+2ncuDEtW7Y0N0iRKkgy\nFqnP4cN6Adbs2QkrZ7VuDcOHw/PPmxKasA+l9I60Dz6Au3d1W/r0sGoVGMZhXnmlJnFxcQQGBpIx\nY0ZzgxWpgiRjkTpcuaKrNsyaBZs3J7xftix89RVUrWr/2IRd3bgBnTvrcuIWmTPrBfOZMx+gevWa\nKKXYtGmTJGJhN5KMhes6exaWLtUVG8LCrKtz4nv+eT0nLMcapgoHDkDLlnDwoLWtdGm9WOvWrb+o\nUSMINzc3tmzZQjFZMS/sSJKxcC03bsCSJXrD6ObNtucJW6RJA40aQdeuev+KJOFU4fvvdY/YslAL\ndBnxCRP0EPWgQUtImzYtoaGhFClSxLxARapkqMR+WdlBQECACg8PN+W1hQs6dw4GDoR582x/28ZX\nrpw+Hb5dO71kVqQKt2/D++/DN99Y29Kn18sGOnSAmJgY3N3dUUoRGRmJj6UAtRDJzDCMPUqpgMTu\nSc9YOLfoaBg3ThfiuHnT9p5hQI0aelyycWNdR1qkKpcu6d1pe/ZY24oW1fVcSpaEX3/9lTfeeIOV\nK1dStGhRScTCNJKMhXOKjtYTfQMG6H3C8RUvrnu/bdpA/vzmxCdMd/cuNG9um4iDg2H6dPDygrCw\nMBo2bIivry+ZMmUyL1AhkGQsnElcHGzdqueDFy2C69dt75coAWPHQlCQ7hWLVEspePdd/eNiMWEC\ndOumfzQ2bdpE48aNKVSoEJs2bSJPnjzmBSsEkoyFs1i3Dnr0gKNHE97LkUMftfPWW+AuP9JC13KZ\nNct6/fnn0L27/nz79u00bNiQIkWKsGnTJnLmzGlOkELEI8tIhWM7dw5CQvR5wQ8nYn9/nYSPHtXd\nIEnEAn2Y1scfW6/feAP69rVely5dmk6dOrFlyxZJxMJhyG8v4Zhu3YJvv4X+/eHaNWu7tze0bavn\nhMuXl+FoYeP33/WPhkX16rrSlmHAxo0bqVixIl5eXkyZMsW8IIVIhPSMhWM5ehQ+/BB8ffWwdPxE\n3K6druo/aRJUqCCJWNi4cEGfcBkdra/9/fWW87Rp4YcffqBevXoMGDDA3CCF+A/SMxaOYe9e+OQT\nWLs24T1/f70pVM6RFf/h3j1dVvzff/W1t7euM50tG8yZM4dOnTpRrVo1hg0bZm6gQvwH6RkLc50/\nD2+/rU9HejgRFy6sT1T66y9JxOKReve2rpw2DF13ukgRmDFjBm+88QY1a9ZkzZo1soVJOCzpGQtz\nXLwIM2boZa7xtygZhj5ctksXqFNHSlWKx5o9W29bshg6FOrXhxs3bjB48GDq1q3LkiVL8PT0NC1G\nIR5HkrGwn9u3YcUKvU943bqEBzfUr697wlKgXyRBbCxMm6aXGFg0b65nO5RSZMqUiZ9//pk8efKQ\nLl068wIVIgkkGYuUd/48jB+v532vXEl4v1gxXayjbl37xyac0p9/6kMfdu60tr34ou4ljxo1krNn\nzzJ+/HgKFixoVohCPBEZAxQp59gxeO89KFBAD0c/nIgrV9ZD1X/+KYlYJMmlS/DRR/r46fiJuEgR\nWL5cMW7cED7++GMiIyOJjY01L1AhnpD0jEXyu3gRPvtMH5MTF2d7r1AhfVRO27bw3HPmxCeczvnz\nevBkyhR9SqZF2rS6wEffvophw/ozYsQIOnTowLfffoubm5t5AQvxhCQZi+Rz967eAzxkCFy9anuv\nXDno108fYSi/JEUSXb+u39dNm2bdP2xRo4Yu6FG0KHzyyad8/vnnvP3220ydOpU0svBPOBlJxuLZ\n3bih95KMGZOwZGXNmvDpp/Dqq1KkQzyREyegUSPYt8+2vXhxfVhX69bWH6lKlSrx/vvvM3bsWEnE\nwikZSilTXjggIECFh4eb8toimRw6pBdlzZ5tWykL9CTel1/qbUqShMUT+uUXaNYMIiOtbWXL6uqo\njRvrHW9xcXHs3r2bChUqmBeoEE/AMIw9SqmAxO7JW0jxZKKidAKuXFmvgp4wwTYRZ8kC48bB339D\nw4aSiMUTmzVLD6hYEnHatDBzJuzerctdpkkDsbGxvPXWW1SuXJm//vrL3ICFSAYyTC0eTykIDdXz\nwatX69qDD/P314U6OnbUCVmIJ6SUXm4waJC1LWdOWLpUv/eziImJoWPHjsybN4+BAwdSsmRJu8cq\nRHKTZCz+W2ys/k04ahQkNqXg7q57v1266HKVMlcnnpJSelX0qFHWtlKldI2YAgWsbffu3eP1119n\n4cKFDB8+nE8++cT+wQqRAiQZi4Tu3IHvvoPRoxMuyAJ9dGG7dhAcDD4+9o9PuBSloGdPXRfGok4d\nWLQIHi4lvWjRIhYuXMiYMWPo1auXfQMVIgVJMhZW167pPSTjxsG5c7b3PD31Ke3vv6/3kgiRDOLi\noGtXvUXJolEjWLgQEqtgGRISgp+fH1WqVLFfkELYgYwrCr0neOBA8PODPn1sE7G3ty72e+KErrgg\niVgkkytXoEUL20TcsqXuEcdPxNHR0bRp04Z9+/ZhGIYkYuGSpGecmt28CRMnwhdfwOXLtvfy5NEV\n+N95BzJnNic+4bJ+/x1atYKICGtbmzYwZ45eimBx8+ZNGjVqRFhYGA0aNKBEiRL2D1YIO0hSz9gw\njLqGYRw2DOMfwzD6/cczrQ3DOGAYxn7DMOYnb5giWd25o7ckPfecXjUTPxHrQ2Dh+HF9SKwkYpGM\nlNJVUitXtk3EH3yglynET8TXr1+nXr16bN26le+++47XX3/d/gELYSeP7RkbhuEGTAZqAaeB3YZh\nrFBKHYj3jD/wMVBFKXXZMIycKRWweAb37umux5Ah8O+/tvcKF4bBg+G116RcpUgxQ4fqGRELLy+9\nh7hlS9vnrl69Sr169di1axfz588nODjYvoEKYWdJGaYuD/yjlIoAMAzjR6AJcCDeM28Dk5VSlwGU\nUheSO1DxDJSCxYt1L/iff2zv+frq4r9vvAEeHubEJ1KF8HD9PtCiVCm9UKtIkYTPpk2blqxZs7Jw\n4UKaNWtmvyCFMElSkrEvEL8bdRp4uP5cEQDDMH4F3IBBSql1yRKheDbHj+vlqmvX2rb7+OiFWe++\nq1dKC5GC7t6FTp301nWAatVg/XpIn972ucjISNzc3MiaNSurVq3CkApuIpVIrgVc7oA/UAPIB2wz\nDKOkUsrmAFvDMN4B3gHw8/NLppcWibp3T9eGHjLE9ribLFn0iunu3RNu4hQihXz+ua6QCjoBz5qV\nMBGfP3+ewMBAcuTIwZYtWyQRi1QlKcn4DJA/3nW++23xnQZ2KqXuAccNwziCTs674z+klPoG+Ab0\nQRFPG7R4jIgIPQn3xx/WNsPQlbKGDZNylcKu/v4bhg+3Xo8YkfAo67NnzxIYGMipU6eYMGGCJGKR\n6iRlNfVuwN8wjEKGYaQFQoAVDz2zDN0rxjCMHOhh6wiE/a1erY+3iZ+IS5eGHTt0bWlJxMKOYmL0\n8LSlnHmlSnpQJr5///2X6tWrc/r0adatW0fNmjXtH6gQJntsMlZKxQDdgPXAQeAnpdR+wzCGGIbR\n+P5j64EowzAOAFuAj5RSUSkVtEhEbKxeiNWwoa6mAPq4m9Gj9cqZ8uXNjU+kSiNHWsuap00L336b\ncLF+hw4duHDhAhs2bKBatWr2D1IIByDnGTu7K1d0yaLp02HXLmt7/vx6BXW5cubFJlK1efOgbVvr\n9YgRekH/wyIiIrh06RIBAYke8yqEy5DzjF1RWJguYZQ7N7z9tm0iDgrSJY4kEQuTbNigT9O0qFZN\n15CxOHToEP369SMuLo7ChQtLIhapniRjZ3P7tj6s4dVXdY/4zh3rPTc36N8f1q2DHDnMi1GkauHh\n0Ly5ni8GKFECli+3bmPft28fNWrUYNasWZw58/BaUCFSJ6lN7UwOHYKQEPjzT9v2MmX0kYavvaZ7\nykKY5J9/oH59XfYc9GzJ2rWQNau+/vPPPwkKCsLDw4PQ0FDy58//319MiFREkrEzuH1br3zp0wdu\n3bK2N2qkN3AWL25ebELc9++/eobk4kV9nTWrLuyRL5++3rNnD7Vq1SJjxoyEhobi7+9vXrBCOBhJ\nxo4sIkKfLzdzJkTFW5yeLp0u6NGli94/LITJzp2DmjXh5El9nT49rFoFxYpZn4mKiiJXrlysWbOG\nQoUKmROoEA5KkrEjioyEzp1h6VJdVzq+YsXgxx91YV8hHMDFi7pHbCl77uEBS5bok5lAl7jMkSMH\ntWvX5u+//8bdXX7tCPEwWcDlaPbv13uClyyxTcQFClg3bUoiFg7i8mWoXRsO3D82xs0NfvoJ6tbV\n12FhYRQuXJjFixcDSCIW4j/IvwxHsnq1XoR1/bq1rW5dfdBDvXpytKFwKDdv6sVae/fqa8OA77+H\npk319caNG2nSpAmFChWiSpUq5gUqhBOQZOwI4uL0HHDfvtbecMaMMH8+NG786P9XCBPcuwfBwbrK\nqsXMmXqxP8CaNWto3rw5L7zwAhs3bsTHx8ecQIVwEpKMzfbzz9CzJ+zZY20rUABWrJDhaOGQlNJL\nGlavtrZNmGAt8nH06FGaNm1KqVKl2LBhA9myZTMlTiGcicwZm+XECWjdGl55xTYRV6miq2lJIhYO\n6tNP9RGIFh9/bHv4g7+/P19//TWbNm2SRCxEEkkyNsOGDfDii7BwobXN01NXz9q8GXLmNC82IR5h\n/Hi9td3ijTesxyP++OOP/P777wC8+eabZJETwoRIMknG9rZzJzRrBtHR1raQEDh8GIYO1XuIhXBA\n48bBBx9Yrxs0gG++0Qu3Zs+eTZs2bRgxYoR5AQrhxGTO2J4OHNDLTy1VtPz84IcfrBsyhXBQI0fa\nnrhUqZLewuTuDt988w2dO3emVq1afPfdd+YFKYQTk56xvZw6BXXqwKVL+jpHDj1cLYlYODClYNAg\n20Rctao+iyRDBpg0aRKdO3emQYMGrFixggwZMpgWqxDOTJKxPURE6MoIp0/r60yZdPX8okXNjUuI\nR4iJgY8+gsGDrW01a+pEnDkzxMXFsX79epo2bcqSJUvw9PQ0L1ghnJwMU6eku3f1/uEhQ/RhDwBp\n08KyZSDntwoH9r//6fozYWHWtrp1dWG49OkhOjqa9OnTs3DhQtzc3PCwnI8ohHgq0jNOKb/8Ai+/\nDJ98Yk3Ebm4wbx4EBpobmxCPEBamT+WMn4ibNNHvIT09FYMGDaJy5cpcvXoVT09PScRCJANJxsnt\n5k296bJaNV1n2uKll2D7dmjZ0rzYhHiMceP0e8X//U9fG4Yepl68GNKmVXz66acMHjyYl156iUyZ\nMpkbrBAuRIapk9Mvv+gyRMeOWdsyZtRblrp310tPhXBQS5bAhx9ar318dEXWoCBQStG7d2/Gjh1L\n586dmTJlCmnSyHt5IZKL/GtKDrdvQ69euppW/ETcsCEcPKjLXUoiFg7s2DFdwMOiUiX44w+diAFG\njBjB2LFj6d69O19//bUkYiGSmWSIZ/W//+ljanbutLZ5e+tSRe3b63E+IRzY7dvQqhVcu6avCxbU\ndaezZrU+0759e9zc3Ojbty+G/EwLkezk7e2z+P13KFfONhHXqQP79kGHDpKIhVP44APdCwa92H/h\nQp2IY2NjmTFjBrGxseTPn59+/fpJIhYihUgyflqLF+vqB5a9w2nS6NUva9dCvnzmxiZEEs2bB9Om\nWa/HjdO77mJiYmjfvj1vv/02q1atMi9AIVIJGaZ+UrGxennp0KHWNm9vXRuwdm3z4hLiCf39tz4K\n0SI4GN57D+7du8frr7/OwoULGTFiBE2aNDEvSCFSCUnGTyIqCtq00WUsLZ5/HlauhBdeMC8uIZ7Q\npUt6qcPNm/q6SBGYPh3u3r1DSEgIy5Yt48svv+TD+MurhRApRoapkyo8XBfxiJ+IAwP1fLEkYuFE\nYmN1da2ICH2dMaPe1uTlBQcOHGDDhg1MnDhRErEQdiQ946SYMQO6dtXlLS0+/lgPVbu5mReXEE/h\n009t31N+9x288EIs4EaZMmX4559/yJMnj2nxCZEaSc/4UaKj4c034e23rYk4c2ZdF3DECEnEwuks\nWACjRlmvP/0Uate+Qa1atZh2fyWXJGIh7E+S8X85fhyqVIGZM61tJUvCnj26UK8QTuavv6BTJ+t1\n/frQq9c16taty9atW/Hy8jIvOCFSOUnGiVm+HMqWtW6+BGjbFnbs0Au2hHAyUVF6wdatW/ra3x+m\nTLlCvXq12blzJz/++CNt2rQxN0ghUjFJxvFdvqyrZjVtqj8H8PCAyZP1xJocnC6cUEyMXrB1/Li+\nzpQJfvrpDi1aBPH777+zaNEiWrVqZW6QQqRykowt1q6FEiVg7lxrm68vbNsGXbpINS3htD7+GDZu\ntF7PnQsvvZSO1157jWXLlsk+YiEcgKymBvjsM9siHgCvvw4TJkC2bObEJEQy+OEHGDPGev3hh//D\n1/dfoBy9evUyLS4hhC3pGX//vW0i9vHRmy6//14SsXBqq1fbLtgKCjrDqlXVadasGbdv3zYvMCFE\nAqm7Z/z773rbkkWtWrpYr4+PeTEJkQxmzdI/2rGx+rpw4VMcO1aTyMgLrF27Fk9PT3MDFELYSL09\n44sXoVkzfX4cQLFisGiRJGLh1JSCzz/XPWJLIvb1Pc6dO69w6VIkGzdupEqVKuYGKYRIIHUm43v3\noHVrOHVKX3t760IemTObG5cQzyAmBnr0gE8+sbaVLg1BQWOIjr7O5s2bqVChgnkBCiH+U+obplYK\nevaEsDB9bRh6aLpIEVPDEuJZnDsHISF68b/Fq6/C0qWQPv04Tp78AH9/f/MCFEI8UurrGY8YofcN\nWwwbBg0amBePEM8oNBReesk2Edepsw+lArl79yJp06aVRCyEg0tdyXjaNOjf33rdurXehCmEE4qN\nheHD9brDCxd0W5o00KXLXsLDa3DkyCEuW4rXCCEcWupJxosW6ZPTLYKCdFUtKeYhnNDx43oYun9/\niIvTbTlzwqRJ4fzwQ00yZMjA1q1bKSLTL0I4hdSRjDdv1kU8lNLXAQF6L3G6dObGJcQTUkqfXVKq\nFPz8s7W9WjWYPTucfv0C8fb2Ztu2bTwvddSFcBqun4z379dbmCxHIBYtCmvW6JPUhXAiV69C8+b6\nVM8bN3SbmxsMHKjnjcuUyUe1atXYtm0bBQsWNDVWIcSTce3V1BcvQqNGcP26vvb11aeqy15i4WTO\nnYN69eDPP61tRYroOtNp0+4FSpA7d25WrVplWoxCiKfnuj3jO3d0N8JyVE3GjLo+oJ+fuXEJ8YQO\nH4ZKlWwTcdeu+oTPy5fXU6lSJQYOHGhegEKIZ+aayVgpePdd+OUXfW0YMH++roAghBPZuROqVIGT\nJ/W1m5sudTlpEoSGrqJx48a88MIL9OzZ09xAhRDPxDWT8dixMHu29XrkSGjc2LRwhHhSt27pLfE1\na0JUlG7LkAFWrICOHWHp0qU0b96cUqVKsXnzZnLkyGFqvEKIZ2MoywpjOwsICFDh4eHJ/4Xv3NFz\nwpZ54o4d9fJT2cIknEBcnD4w7NNP4fRpa3v27HqWpUIFuHr1KoUKFaJo0aKsW7cOb29v8wIWQiSZ\nYRh7lFIBid1zvQVcly5ZE3GWLDB1qiRi4RT++QeCg/VhYvEVLw6LF+uNAADe3t5s2LCBokWL4iW7\nAoRwCa43TH3njvXzzJllL7FwCgcPwiuv2CbiXLl00bi9e3UinjlzJuPHjwcgICBAErEQLsS1k7Ek\nYuEE/voLqlfX25cAPD1hwAA4ehTeeQfc3WHq1Km8+eabrF27lljL2YhCCJfhesPUkoyFE9mzB2rX\n1rMrAJky6bnhV16xPjNhwgTef/99GjRowKJFi3BzczMnWCFEipGesRAmCQ2FwEBrIs6cWdekiZ+I\nx4wZw/vvv0+zZs1YsmQJnp6e5gQrhEhRkoyFsLO4OH1yZ61ausQlQNasuoR6pUq2z6ZLl47g4GAW\nLFhA2rRp7R+sEMIuJBkLYUeRkfr47AEDrKct5cqle8kB9zc8KKU4ceIEAN27d+eHH37Aw8PDnICF\nEHYhyVgIO/n7b3j5ZVi3ztpWrZpeQf3SS/paKcXHH39MiRIlOHz4MACGbM0TwuVJMhbCDs6c0Qc9\n/Puvta1vX90jzptXXyul6NWrF6NGjaJdu3b4+/ubE6wQwu5kNbUQKezmTV2N9cwZfZ05s66y1aiR\n9Zm4uDh69OjB5MmT6dGjB1999ZX0iIVIRVy7ZywLXoTJ4uKgbVtrMQ83N11NK34iBpg9ezaTJ0+m\nd+/ekoiFSIWkZyxECurXD5Yts15PmQJBQQmfa9++PRkyZCA4OFgSsRCpkGv3jCUZCxNNmQKjR1uv\ne/XSFbUsYmJi6NOndvq6CAAAIABJREFUD+fOncPd3Z2QkBBJxEKkUq6XjO/etX4uyViYZPx46NrV\net24MYwaZb2+e/cuISEhjB49mtWrV9s/QCGEQ3G9ZCw9Y2GyL76ADz6wXpcvD/Pm6fligDt37tCq\nVSsWL17M2LFjeeutt8wJVAjhMGTOWIhkohQMHQoDB1rbqlSBNWt0zWmA6OhoWrRowdq1a5k8eTJd\nunQxJ1ghhEORZCxEMnk4EdeoAStXWhMxwK1btzh9+jTTp0+XHrEQ4gFJxkIkg6lTbRNx7dqwdClk\nyKCvb9y4Qdq0acmePTvh4eFSZ1oIYUPmjIV4RkuWQPzR5jp1YPlyayK+evUqderUoW3btiilJBEL\nIRKQZCzEM9i6Fdq00fPFAOXKwaJFYDnp8PLly9SuXZtdu3bRunVr2bokhEiUDFML8ZT+/huaNLH+\nyPn7w+rV1jniqKgoatWqxb59+1i0aBFNmjQxL1ghhENLUs/YMIy6hmEcNgzjH8Mw+j3iuRaGYSjD\nMAKSL8QnJMlY2MG5c1C/vvU84ty5Yf168PHR10opmjdvzoEDB1i+fLkkYiHEIz22Z2wYhhswGagF\nnAZ2G4axQil14KHnvID3gZ0pEWiSSTIWKezWLV3E4/Rpfe3lBWvXQqFC1mcMw+CLL77g+vXrBCVW\n/1IIIeJJSs+4PPCPUipCKXUX+BFI7G3+UGAUcDsZ43tykoxFCoqLg3btIDxcX7u56Tliy3nEZ86c\n4ZtvvgGgQoUKkoiFEEmSlGTsC8Q7hZXT99seMAzjZSC/Usr8un6SjEUK+uQTvXraYuJEvY0J4NSp\nU1SvXp3evXtz7tw5cwIUQjilZ15NbRhGGmAs0CsJz75jGEa4YRjhFy9efNaXTpwkY5FCZs60rS/9\nwQfw3nv684iICF555RUiIyPZuHEjefLkMSdIIYRTSkoyPgPkj3ed736bhRdQAggzDOMEUBFYkdgi\nLqXUN0qpAKVUgI9lpUtyk/OMRQpYs8b2xKWGDWHMGP350aNHqV69OtevXyc0NJQKFSqYE6QQwmkl\nZWvTbsDfMIxC6CQcArSx3FRKXQVyWK4NwwgDeiulwpM31CSSnrFIZjt3QqtWEBurr0uXhvnzrQc/\n7Nixg7t377JlyxZKlSplXqBCCKf12J6xUioG6AasBw4CPyml9huGMcQwjMYpHeATk2QsktGhQ9Cg\ngV5BDVCwoF457eWlT18CaNeuHUeOHJFELIR4akmaM1ZKrVFKFVFKPaeUGn6/7TOl1IpEnq1hWq8Y\nJBmLZHPmjC5tGRWlr3Pk0HuJ8+SBP/74A39/f7Zu3QqAt7e3iZEKIZyd65XDvHvX+rkkY/GUTp6E\nwEA4dUpfZ8igq2sVKQK7du2iZs2aGIZBvnz5zA1UCOESXC8ZS89YPKO//4bKleHwYX3t7g6LF0P5\n8rB9+3aCgoLImjUr27Zt47nnnjM3WCGES5BkLEQ827ZBtWpw9qy+TptWL9aqWxcOHjxI7dq1yZ07\nN9u2baNAgQLmBiuEcBmulYyVkmQsntrSpbqAh6XetKXMZatW+rpo0aL07NmTrVu3yvC0ECJZuVYy\njomxnmXn5mbdeyLEYyxfrpOu5b1c7ty6l1yzJmzatIlTp06RJk0ahg4dKgU9hBDJzrWSsfSKxVNY\ntw5at7buI/b3h+3bdb3plStX0qBBAz788ENzgxRCuDRJxiJVCwuDZs2si/Cffx62btUnMC1evJjm\nzZtTunRppk+fbmqcQgjXJslYpFrbt+uylrfvnzNWoABs3qz3Ef+/vTuPr6q4/z/+GpIgjShQpFSj\ngCKKiFb5IlpFCGGLrGUVEQShBQSqgKJWK+6CVbC2BVwgZRWQRYhRQHYtLaA1lB+bCIgsKiBQZEnM\nNr8/JulNIMtF7n7fz8eDR8+cnHPz6THkzcyZM2fWrFncfffdNGrUiGXLllGlSpXgFisiEU1hLFFp\n7Vo3Q/rUKde+7DIXxDVqQG5uLq+99hp33HEHS5Ys0YIeIuJ33qxNHT4UxuKF1atdj7ggiH/xCxfE\ntWtDXl4eMTExLF68mAoVKnDhhRcGtVYRiQ7qGUtU+egjuOsuTxBXr+6CuG5dmDBhAh06dODHH3+k\natWqCmIRCRiFsUSN1FRo395zj/iyy9xkrfr14c9//jNDhgyhXLnI+ishIuEhsn7zKIylGEeOQL9+\n0LGjZ9Z0jRruOeJrr4U//elPDB8+nC5dujBv3jwu0M+OiARY5IZx+fLBq0NCgrUwdaobgv773z37\nr7rKBXHt2jB27Fgee+wxevTowezZsymvnxsRCYLIDWP1bqLav/8NiYnQty98/71nf5cubiZ1wbLS\nzZo1Y8iQIcyYMYPY2Miazygi4UNhLBFl717o3RsaNnS93wI1asD778O8eVC9umXZsmUANGjQgL/9\n7W/EaOlUEQkihbFEhNxcGDXKvW94xgzP/thYePhh2LrVPc5krWX48OG0atWKJUuWBK9gEZFCImtc\nrmB2DiiMo0hmJtx7LyxYUHT/b34DY8a4SVrgniEeOnQoEydOZNiwYbRu3TrwxYqIFCOywlg946jz\n3/+6WdKFh6RvuQXGjnXvJS6Ql5fHgAEDmDx5Mo8++ihjxozBGBP4gkVEiqFhaglbBw64wC0cxMOG\nwbp1RYMY4J///CcpKSk89dRTCmIRCTnqGUtY+vRTNzN63z7PvldecfeHi8vZxo0b8/nnn3PTTTcF\nrkgRES+pZyxhxVoYPx7uuMMTxLGxMG0aPPJI0SDOysqiZ8+e/5uopSAWkVClMJawceIE9OwJQ4dC\ndrbbV6mSe2Spd++ix/7444907dqVWbNm8eWXXwa+WBGRc6BhagkL+/ZBq1awfbtn3803w9y5biWt\nwjIyMujcuTNLlixhwoQJPPDAA4EtVkTkHKlnLCHvu++gefOiQTxgAPzzn2cHcWZmJu3bt2fp0qVM\nmjRJQSwiYUE9Ywlp338PLVpAwUhzXBxMnnz2sHSB8uXLc80119CnTx96l3SQiEiIURhLyPrvf93Q\n9JYtrh0TA3PmQKdOZx97/Phxjh07Rq1atZgwYUJgCxUROU8KYwlJJ07AXXdBerprGwPTpxcfxMeO\nHaN169YcO3aMLVu26M1LIhJ2FMYSck6fdutIr1vn2Td5Mtxzz9nHfv/997Rq1YotW7Ywb948BbGI\nhKXIDWP9Ug5LmZluTenCq2qNHw/333/2sYcOHaJFixZ8+eWXLFq0iOTk5MAVKiLiQ5EbxuoZh52s\nLOjWDfLfbgjAq6/C4MHFHz9y5Eh27txJWloazZs3D0yRIiJ+oEebJCTk5ECvXpCW5tn3/PNuecuS\nvP7666xatUpBLCJhT2EsIWHkSLeAR4E//AGefPLs4/bs2UO/fv3IyMigcuXK3HrrrYErUkTETxTG\nEnTTp8Of/+xpDxsGL7549gsfdu3aRdOmTXnvvffYvXt3YIsUEfGjyArjrCzPtsI4LHz+uVtNq0Cn\nTu5dxGcG8RdffEGTJk04deoUK1eu5Prrrw9soSIifqQJXBI0hw65mdOZma5drx5MnQrlzvgn4tat\nW0lKSsJay6pVq7jhhhsCX6yIiB9FVs9YYRw2fvgB7r7b8xrESpVg4UK46KKzj83Ly6N69eqsXr1a\nQSwiEUk9YwmozZthwgR3n/jkSbfPGHjnHahTp+ix+/fvJyEhgfr165Oenk65M7vMIiIRIrJ+uymM\nQ9aXX0JSEtxwA0yc6AlicI8wtWlT9Pj169dTv359xo0bB6AgFpGIpp6x+N2GDdC2rXsDU2H16sEj\nj0DfvkX3r127lrvuuotq1arRtWvXgNUpIhIskRPG1iqMQ9DixdC1q1tvGiA21s2YHjIEmjQ5e9b0\n6tWradeuHQkJCaxcuZKEhITAFy0iEmCRE8bZ2Z7t2Nizp+RKwE2bBv37u9W1AKpWhQ8+gJLW6Thy\n5AgdOnSgZs2arFixgl/+8peBK1ZEJIgiJ7HUKw4Z1sILL0CfPp4grlkT1q4tOYgBqlatysyZM1m9\nerWCWESiSuT0jBXGISEjw/WGZ83y7LvxRjdcfdllxZ+zaNEiADp27Ej79u0DUKWISGhRz1h85ptv\noGnTokGclOReh1hSEM+bN4+uXbsyduxYrLWBKVREJMQojMUnNm2CW26BTz/17HvgAViyxC3oUZxZ\ns2bRo0cPbr31VtLS0jBnzuYSEYkSkRnG5csHr44otHs3tGrlesYAMTEwfrxb3CMurvhzpk6dSq9e\nvWjcuDFLlizh4osvDlzBIiIhRveM5bwcOgStW8PBg65dqRLMmwctWpR+3saNG0lKSmLRokXEx8f7\nv1ARkRCmMJaf7MQJt3LWzp2uXaECpKVB48Yln3P8+HEqVarEuHHjyMrK4gL9txIRidBhav2C97us\nLOjSBf79b9cuVw5mzy49iF977TXq1avH3r17McYoiEVE8imM5Zzk5MDMmXDTTbBsmWf/G29Ax44l\nn/fyyy8zYsQIbr/9di699FL/FyoiEkYiZ5g6K8uzrTD2uexst6LW6NGwa1fRrz37LPzudyWf+/zz\nzzNq1Cjuuecepk2bRmxs5PzYiYj4QuT8VlTP2G/27oXu3WH9+qL7L7oInnwSHn205HMnT57MqFGj\n6NOnD5MnTyYmJsa/xYqIhCGFsZRq8WLo1QuOHvXsq1IFhg2D3//ebZeme/fuHD16lIcfflivQRQR\nKUHk/HZUGPtUTo7r9bZp4wni2Fg3JP311zBqVMlBbK3lr3/9KydPnuSiiy5i5MiRCmIRkVKoZyxn\nOXYM7r676ASthASYMwfuuKP0c/Py8hgyZAhvvPEGcXFxDBo0yL/FiohEAIWxFLFjB7Rv7/63QMuW\nbgZ1tWqln5ubm8uAAQNISUnh8ccfZ+DAgf4tVkQkQkTO2KHC+LwtX+5ecVg4iEeNcveNywrinJwc\n+vbtS0pKCk8//TQvvfSS1poWEfGSesYCuLWkH3oIcnNd+2c/g6lToVs3787/9ttvWb58OS+++CJP\nPPGE/woVEYlACuMol53tQnjiRM++hARYtAj+7/+8OT+b2NhYrrjiCrZs2cLPf/5z/xUrIhKhNEwd\nxY4eheTkokF8yy2wYYN3QZyZmUnnzp0ZOXIkgIJYROQnUhhHqe3b3f3hlSs9+3r0gDVr4LLLyj4/\nIyODjh07kpaWRp06dfxXqIhIFIjMMNb7jEu1ZIkL4oK3LQE8/zy88467V1yWU6dO0bZtW5YtW0ZK\nSopmTYuInCfdM44i1sJf/gIjRkBentsXH+/WnO7SxdvPsHTs2JE1a9Ywbdo0evXq5b+CRUSihMI4\nSmRlwdCh8Pbbnn2XXw6pqXDzzd5/jjGGwYMHM2DAALp37+77QkVEopDCOAocPw6dOsGqVZ59t94K\nCxfCL3/p3WccPXqUDRs2kJycTOfOnf1TqIhIlFIYR7hvvoG77oJNmzz77r0XJk2CChW8+4zvv/+e\nli1b8uWXX/LVV19RrawVQERE5JxE5gQuhTEAX3wBt99eNIhfeAGmT/c+iA8ePEhiYiLbt29nwYIF\nCmIRET+InJ5xVpZnW2HMhg3ujUtHjrh2TAxMngx9+nj/Gd988w3Nmzdn7969fPDBByQlJfmnWBGR\nKBc5Yaye8f/s2OFe7vDDD64dHw/z5rnh6nMxa9Ys9u/fz5IlS7jzzjt9X6iIiABeDlMbY5KNMV8Y\nY3YaYx4v5usjjDFbjTGbjDErjDE1fV9qGRTGAGRkuPWkC4K4alW3sMe5BLG1FoARI0awadMmBbGI\niJ+VGcbGmBhgPHAXUA+4xxhT74zD0oGG1tobgXnAn3xdaJkUxgA8+KDnHvEFF8BHH7mZ097auXMn\njRo1Ytu2bRhjuPLKK/1TqIiI/I83PeNGwE5r7W5rbRYwG+hY+ABr7Spr7en85jrgct+W6QWFMdOm\nuVnSBV5/HRo08P787du306RJE/bs2cOPha+niIj4lTdhnADsK9Ten7+vJP2BxedT1E8S5WG8eTMM\nGuRp33svDBhwLudvJjExkdzcXFatWsVNN93k+yJFRKRYPp3AZYzpBTQEmpbw9QHAAIAaNWr48ltH\ndRh//bVbzjIjw7Wvuw7eeAOM8e787du306xZM+Li4li5ciV169b1X7EiInIWb3rGB4ArCrUvz99X\nhDGmBfAk0MFaW+wYp7X2LWttQ2ttQ58/rxqlYbx2rXvt4Y4drl0wc7piRe8/o0aNGrRu3Zo1a9Yo\niEVEgsCbMP4UqGOMudIYUx7oAaQWPsAYczPwJi6ID/m+TC9EYRhPnQpJSXD4sGvHxbn7xvXOnF5X\ngvT0dI4fP058fDwzZszQqxBFRIKkzDC21uYAQ4GlwDbgXWvtFmPMc8aYDvmHvQJUBOYaYzYaY1JL\n+Dj/sLbooh8R/gpFa+Hxx6FvX8//7WrV3NrT3r596R//+AdNmjRhyJAhfqtTRES849U9Y2vth8CH\nZ+wbVWi7hY/rOjeFgzguDspFziqfxXnpJXj5ZU/7hhvc25dq1fLu/NWrV9O2bVuuuOIKXi78QSIi\nEhSRkVqFh6gjvFf87rvwxz962u3bu/vG3gbxsmXLaNOmDbVq1WL16tUkJJQ2MV5ERAIhMpbDjJL7\nxevXF11bulkzN1nL239/ZGdnM3jwYOrUqcPy5cv10gcRkRChMA4TX38NHTtCZqZrX3MNzJ9/bgMB\ncXFxLFmyhMqVK1O1alX/FCoiIucs8oapIzCMv/sO2rWDgwdd++c/hw8+gCpVvDt/7ty5PPjgg1hr\nqV27toJYRCTEKIxDXHo6NGrkVtgCNz/tvffg6qu9O3/mzJn06NGD9PR0MgpWBRERkZCiMA5hCxZA\n48awL38x0nLlICUFmjTx7vwpU6bQu3dvmjZtyuLFi4mPj/dfsSIi8pMpjEOQtfDii+6Z4dP5r9+4\n+GI3NN2rl3efMWnSJO6//35atGhBWloaFc9lSS4REQmoyAjjws8ZR0AYT5hQ9PGl2rVh3TpITvb+\nMy699FI6depEamqqesQiIiEuMsI4gnrGn34Kw4d72omJ7pGm667z7vxt27YB0LZtWxYsWECFChV8\nX6SIiPiUwjiEHDsG3btDdrZr33wzLF4M3k5+Hj16NPXr1+fjjz/2X5EiIuJzCuMQYa1b0GPPHteu\nVAnmzgVvOrbWWp599lmeeOIJ7rnnHm6//Xa/1ioiIr6lRT9CxKuvwvvve9p//7u7V1wWay1PPvkk\no0ePpm/fvkyaNImYmBj/FSoiIj6nnnEImD8f/vAHT3vECOjUybtzly1bxujRoxk4cCCTJ09WEIuI\nhCH1jINswgQYOtQNUwP8+tcwZoz357ds2ZL333+ftm3bYozxT5EiIuJX6hkHibXu8aUhQzxBfO21\n7j5xXFzp5+bl5TFy5Ej+85//YIyhXbt2CmIRkTCmnnEQ5OTAwIFuNa0Ct94KaWlwySWln5ubm8tv\nf/tbpkyZQpUqVfjVr37l32JFRMTv1DMOMGvhoYeKBnGbNrBiRdlBnJOTw3333ceUKVN45pln+EPh\nG80iIhK2Iq9nfC7vFAyCv/7V3Scu0KcPvP122UPT2dnZ3HvvvcydO5eXXnpJQSwiEkEiL4xDuGf8\nwQdFV9e6+27XQy7nxfhEXl4eJ06cYOzYsYwYMcJ/RYqISMApjANk0ybo0QPy8lz7ttvcs8RlBXFm\nZiYZGRlUqVKFtLQ0PbokIhKBdM84AL79Ftq1g5MnXbtmTVi4EH72s9LPO336NB06dCA5OZmcnBwF\nsYhIhFLP2M8OH4YWLTzvJL7oIjdrunr10s87efIk7du3Z82aNaSkpBAbGxn/qURE5GyR8Rs+RMP4\n2DFo1Qq2bnXtmBh4912oX7/083744QfatGnDv/71L2bMmEHPnj39X6yIiASNwthPfvjBvX9440bX\nLlcOZs707p3EAwcOZP369cyePZtu3br5t1AREQm6yLhnnJXl2Q6BMD51yt0j3rDBsy8lxc2e9saY\nMWNYuHChglhEJEpERhiHUM84Nxd69oRPPvHsmzjRPU9cmsOHD/Pcc8+Rl5dHzZo1adu2rX8LFRGR\nkKEw9rHHHoPUVE977FgYNKj0c7777jsSExMZPXo0WwtuMIuISNTQPWMfeustF74FHnnEvQ6xNAcO\nHCApKYn9+/fz4YcfUr+s2V0iIhJxFMY+snw5DB7saf/mN2W/CnHv3r0kJSVx6NAhli5dSuPGjf1b\npIiIhCSFsQ9s2wZdu7r7xQANGsCMGe5RptLs3r2bU6dO8dFHH3Hbbbf5v1AREQlJCuPzdPgwtG0L\nx4+79mWXuXvGF15Y8jknT56kYsWKJCYmsmvXLuLj4wNTrIiIhCRN4DoPmZnQqRN89ZVrx8e71bUS\nEko+Z9u2bdStW5fp06fnn6MgFhGJdgrjn8ha6N8f1q51bWNg1iy4+eaSz9m8eTOJiYnk5OTQoEGD\ngNQpIiKhL/LCOEDvM37+eXjnHU/71VehQ4eSj9+4cSOJiYnExsayZs0arr/+ev8XKSIiYSHywjgA\nPeN334Wnn/a0Bwwo+p7iMx08eJCkpCTi4+NZs2YN1157rd9rFBGR8BH+YZyXB9nZnrafe8a7d8Nv\nf+tpt2gBf/ubG6YuSfXq1Xnuuef4+OOPufrqq/1an4iIhJ/wn01deF3quDj3RgY/yc52S12eOOHa\ntWvD3Lnu2xbnk08+oUKFCtxyyy0MHTrUb3WJiEh4C/+ecQCHqJ99Ftavd9uxsW7CVuXKxR+7YsUK\nkpOTGTZsGNZav9YlIiLhTWHspdWr4aWXPO0XXoBbbin+2KVLl9KuXTuuuuoqFixYgCltDFtERKKe\nwtgLR45Ar17ucSaA5s1h5Mjij01LS6NDhw7UrVuXVatWUb16db/UJCIikUNhXAZrYeBAOHDAtatW\nhWnTSr41PXXqVG688UZWrFjBJZdc4vN6REQk8kTWBC4/hPHMmTB/vqedkuKWvDxTTk4OsbGxzJgx\ng8zMTCpVquTzWkREJDKFf8/4uutcIJ84AevW+fSj9+2DwpOgBw4sfmGPGTNm0KhRI44cOcIFF1yg\nIBYRkXMS/mFsjHu2qGJF8GEI5uVBv36eF0BcdZVbZetMKSkp3HfffVSuXJkKFSr47PuLiEj0CP8w\n9pOJE907isHl/bRpLu8Le/PNN+nfvz8tW7YkLS2NC0t7VZOIiEgJFMbF2LGj6GzpRx+FO+4oesy0\nadMYNGgQbdu2ZdGiRXr7koiI/GQK4zNs3gytW0NGhmvfcINb7ONMzZs358EHH2TBggUanhYRkfOi\nMC4kLQ1+/WvYs8e14+Jg+vSik7QXLlxIbm4uCQkJvP7665QP0FuiREQkcimMcc8Sv/KKmyl98qTb\nV7EivPce/OpXBcdYnnnmGTp16kRKSkrwihURkYgT/s8Z+8Ajj8C4cZ52rVqQmuqGqMEF8RNPPMGY\nMWO4//776devX1DqFBGRyBT1PePFi4sG8Z13woYNRYP44YcfZsyYMQwaNIhJkyYRExMTnGJFRCQi\nRXUYHzkC/ft72u3auceZqlXz7Nu5cydvvvkmDz74IBMmTKCcH1/RKCIi0Smqh6mHDIFvv3Xb1au7\npS4L5mNZazHGUKdOHTZu3MjVV1+tty+JiIhfRG03b/ZsmDPH0377bU+PODc3l379+jFhwgQA6tSp\noyAWERG/icowPnAABg/2tPv3h/bt3XZOTg69e/dmypQpHDlyJDgFiohIVIm6YeqsLOjdG44dc+1a\ntTwTuLKysujZsyfz589nzJgxPPbYY0GrU0REokdUhXFeHtx/P6xa5drGwJQpcPHFkJeXR7du3UhN\nTWXcuHEMHz48qLWKiEj0iKowHjkS3nnH0372WWja1G2XK1eOpk2b0qpVK4YMGRKcAkVEJCpFTRi/\n+mrR54kHDoQ//hFOnz7Njh07uOmmmxgxYkTwChQRkagVFRO4Zswo+hamTp1g/Hg4deokbdq0oVmz\nZhwruIksIiISYBHfM969GwYM8LTvvNMNVZ869QNt2rRh3bp1TJ8+nSpVqgSvSBERiWoRHcbWwgMP\neF6HeN11bs3pjIxjJCcn8/nnnzNnzhy6dOkS3EJFRCSqRfQw9ezZ8NFHbrtg5nTlyjB27FjS09OZ\nP3++glhERILOWGuD8o0bNmxoP/vsM799/tGjrid86JBr//738Je/uO3s7GzS09Np1KiR376/iIhI\nYcaYf1trGxb3tYjtGT/2mCeIExJg6NBv6dy5MwcPHiQuLk5BLCIiISMi7xl/8glMmuRpP/30ftq1\nS+Kbb75h165dVK9ePXjFiYiInCHiwjgryz1DXKBly68ZMyaJw4cPs3TpUm6//fbgFSciIlKMiAvj\n11+Hbdvcdnz8V2zdmsipUz+wfPlyDU2LiEhIiqh7xvv3uyUuCzz6aDy1al3BihUrFMQiIhKyIiqM\nH34YTp0C+Irrr8/miSeq88knn9CgQYNglyYiIlIir8LYGJNsjPnCGLPTGPN4MV+/wBgzJ//r640x\ntXxdaFmWL4d33wX4f8Ct1KkzjLg4MMYEuhQREZFzUmYYG2NigPHAXUA94B5jTL0zDusPHLPWXg28\nBrzs60JLk5UFQ4cCpAPNiI8vz8svPxTIEkRERH4yb3rGjYCd1trd1tosYDbQ8YxjOgJT87fnAc1N\nALukr70GX3zxKZCEMReyYsUarrnmmkB9exERkfPiTRgnAPsKtffn7yv2GGttDnAcqOqLAsuybx88\n++yPQGegCk899TG33VY7EN9aRETEJwL6aJMxZgAwAKBGjRo++cwFCyAj4wJgLnXrXs5TT13uk88V\nEREJFG96xgeAKwq1L8/fV+wxxphYoBJw5MwPsta+Za1taK1tWK1atZ9W8RkeegiWLYO6dW/jrbcu\nJzbinpwWEZFI5010fQrUMcZciQvdHkDPM45JBfoA/wK6AittAN9A0aIFbN4MMTGB+o4iIiK+U2YY\nW2tzjDFDgaWEOmlCAAAEy0lEQVRADJBird1ijHkO+MxamwpMBqYbY3YCR3GBHVAKYhERCVdeDepa\naz8EPjxj36hC25lAN9+WJiIiEh0iagUuERGRcKQwFhERCTKFsYiISJApjEVERIJMYSwiIhJkCmMR\nEZEgUxiLiIgEmcJYREQkyBTGIiIiQaYwFhERCTKFsYiISJApjEVERIJMYSwiIhJkCmMREZEgUxiL\niIgEmbHWBucbG3MY+NqHH3kJ8L0PPy9a6TqeP13D86dreP50Dc+fr69hTWttteK+ELQw9jVjzGfW\n2obBriPc6TqeP13D86dreP50Dc9fIK+hhqlFRESCTGEsIiISZJEUxm8Fu4AIoet4/nQNz5+u4fnT\nNTx/AbuGEXPPWEREJFxFUs9YREQkLIVdGBtjko0xXxhjdhpjHi/m6xcYY+bkf329MaZW4KsMbV5c\nwxHGmK3GmE3GmBXGmJrBqDOUlXUNCx3XxRhjjTGa1VoMb66jMaZ7/s/jFmPMO4GuMdR58fe5hjFm\nlTEmPf/vdJtg1BmqjDEpxphDxpjNJXzdGGP+kn99NxljGvilEGtt2PwBYoBdwFVAeeA/QL0zjhkM\nvJG/3QOYE+y6Q+mPl9ewGRCfv/2AruG5X8P84y4CPgbWAQ2DXXeo/fHyZ7EOkA5UyW//Ith1h9If\nL6/hW8AD+dv1gD3BrjuU/gBNgAbA5hK+3gZYDBjgNmC9P+oIt55xI2CntXa3tTYLmA10POOYjsDU\n/O15QHNjjAlgjaGuzGtorV1lrT2d31wHXB7gGkOdNz+HAM8DLwOZgSwujHhzHX8HjLfWHgOw1h4K\ncI2hzptraIGL87crAd8EsL6QZ639GDhayiEdgWnWWQdUNsZc6us6wi2ME4B9hdr78/cVe4y1Ngc4\nDlQNSHXhwZtrWFh/3L8KxaPMa5g/lHWFtfaDQBYWZrz5WbwGuMYYs9YYs84Ykxyw6sKDN9fwGaCX\nMWY/8CHw+8CUFjHO9XfmTxLr6w+UyGGM6QU0BJoGu5ZwYowpB4wD+ga5lEgQixuqTsSN0HxsjLnB\nWvvfoFYVXu4Bplhrxxpjfg1MN8bUt9bmBbsw8Qi3nvEB4IpC7cvz9xV7jDEmFjcscyQg1YUHb64h\nxpgWwJNAB2vtjwGqLVyUdQ0vAuoDq40xe3D3mVI1iess3vws7gdSrbXZ1tqvgB24cBbHm2vYH3gX\nwFr7L6ACbs1l8Y5XvzPPV7iF8adAHWPMlcaY8rgJWqlnHJMK9Mnf7gqstPl34QXw4hoaY24G3sQF\nse7Rna3Ua2itPW6tvcRaW8taWwt3372Dtfaz4JQbsrz5+7wQ1yvGGHMJbth6dyCLDHHeXMO9QHMA\nY8x1uDA+HNAqw1sqcF/+rOrbgOPW2m99/U3CapjaWptjjBkKLMXNIkyx1m4xxjwHfGatTQUm44Zh\nduJuyvcIXsWhx8tr+ApQEZibP/dtr7W2Q9CKDjFeXkMpg5fXcSnQyhizFcgFRlprNdKVz8tr+DDw\ntjFmOG4yV191UDyMMbNw/+C7JP+++tNAHIC19g3cffY2wE7gNHC/X+rQfxMREZHgCrdhahERkYij\nMBYREQkyhbGIiEiQKYxFRESCTGEsIiISZApjERGRIFMYi4iIBJnCWEREJMj+P50h0JPWRc/hAAAA\nAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 576x576 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "results = pd.concat(dfs)\n",
+    "import statsmodels.api as sm\n",
+    "thresh = 0.001 # POSSIBLE BUG? several very small pivots -- fine for pvalues\n",
+    "grid = np.linspace(0, 1, 101)\n",
+    "fig = plt.figure(figsize=(8, 8))\n",
+    "plt.plot(grid, sm.distributions.ECDF(results['pivot'][results['pivot'] > thresh])(grid), 'b-', linewidth=3, label='Pivot')\n",
+    "plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value')\n",
+    "plt.plot([0, 1], [0, 1], 'k--')\n",
+    "plt.legend(fontsize=15);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "all,-slideshow",
+   "formats": "ipynb,Rmd"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/doc/source/algorithms/ROSI.Rmd b/doc/source/algorithms/ROSI.Rmd
new file mode 100644
index 000000000..b53c6a8c5
--- /dev/null
+++ b/doc/source/algorithms/ROSI.Rmd
@@ -0,0 +1,110 @@
+---
+jupyter:
+  jupytext:
+    cell_metadata_filter: all,-slideshow
+    formats: ipynb,Rmd
+    text_representation:
+      extension: .Rmd
+      format_name: rmarkdown
+      format_version: '1.1'
+      jupytext_version: 1.1.1
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+# Conditioning on less: ROSI
+
+Instead of conditioning on the active set and signs, 
+one can work in the full model and for each feature $j$ selected
+construct p-values and confidence intervals
+conditional only on the event $j$ was selected.
+This is the approach of [Liu et al.](https://arxiv.org/abs/1801.09037), which
+can be extended as ROSI (Relevant One-step Selective Inference)
+beyond squared-error loss (described in forthcoming work, though
+code is already available).
+
+
+```{python}
+import numpy as np, pandas as pd
+import matplotlib.pyplot as plt
+import statsmodels.api as sm
+# %matplotlib inline
+
+from selectinf.tests.instance import gaussian_instance # to generate the data
+from selectinf.algorithms.api import ROSI
+
+```
+
+We will know generate some data from an OLS regression model and fit the LASSO
+with a fixed value of $\lambda$. In the simulation world, we know the
+true parameters, hence we can then return
+pivots for each variable selected by the LASSO. These pivots should look
+(marginally) like a draw from `np.random.sample`. This is the plot below.
+
+```{python collapsed=TRUE}
+np.random.seed(0) # for replicability
+
+def simulate(n=500, 
+             p=100, 
+             s=5, 
+             signal=(5, 10), 
+             sigma=1): 
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0., 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p)
+    L = ROSI.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat)
+    soln = L.fit()
+    active_vars = soln != 0
+    
+    if active_vars.sum() > 0:
+        projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth))
+        S = L.summary(truth=projected_truth)
+        S0 = L.summary()
+
+        pivot = S['pval'] # these should be pivotal
+        pvalue = S0['pval']
+        return pd.DataFrame({'pivot':pivot,
+                             'pvalue':pvalue})
+```
+
+Let's take a look at what we get as a return value:
+
+```{python}
+while True:
+    df = simulate()
+    if df is not None:
+        break
+df.columns
+```
+
+```{python}
+dfs = []
+for i in range(200):
+    df = simulate()
+    if df is not None:
+        dfs.append(df)
+```
+
+```{python}
+results = pd.concat(dfs)
+import statsmodels.api as sm
+grid = np.linspace(0, 1, 101)
+fig = plt.figure(figsize=(8, 8))
+plt.plot(grid, sm.distributions.ECDF(results['pivot'])(grid), 'b-', linewidth=3, label='Pivot')
+plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value')
+plt.plot([0, 1], [0, 1], 'k--')
+plt.legend(fontsize=15);
+```
diff --git a/doc/source/algorithms/ROSI.ipynb b/doc/source/algorithms/ROSI.ipynb
new file mode 100644
index 000000000..0ca401472
--- /dev/null
+++ b/doc/source/algorithms/ROSI.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Conditioning on less: ROSI\n",
+    "\n",
+    "Instead of conditioning on the active set and signs, \n",
+    "one can work in the full model and for each feature $j$ selected\n",
+    "construct p-values and confidence intervals\n",
+    "conditional only on the event $j$ was selected.\n",
+    "This is the approach of [Liu et al.](https://arxiv.org/abs/1801.09037), which\n",
+    "can be extended as ROSI (Relevant One-step Selective Inference)\n",
+    "beyond squared-error loss (described in forthcoming work, though\n",
+    "code is already available).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np, pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import statsmodels.api as sm\n",
+    "%matplotlib inline\n",
+    "\n",
+    "from selectinf.tests.instance import gaussian_instance # to generate the data\n",
+    "from selectinf.algorithms.api import ROSI\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will know generate some data from an OLS regression model and fit the LASSO\n",
+    "with a fixed value of $\\lambda$. In the simulation world, we know the\n",
+    "true parameters, hence we can then return\n",
+    "pivots for each variable selected by the LASSO. These pivots should look\n",
+    "(marginally) like a draw from `np.random.sample`. This is the plot below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "np.random.seed(0) # for replicability\n",
+    "\n",
+    "def simulate(n=500, \n",
+    "             p=100, \n",
+    "             s=5, \n",
+    "             signal=(5, 10), \n",
+    "             sigma=1): \n",
+    "\n",
+    "    # description of statistical problem\n",
+    "\n",
+    "    X, y, truth = gaussian_instance(n=n,\n",
+    "                                    p=p, \n",
+    "                                    s=s,\n",
+    "                                    equicorrelated=False,\n",
+    "                                    rho=0., \n",
+    "                                    sigma=sigma,\n",
+    "                                    signal=signal,\n",
+    "                                    random_signs=True,\n",
+    "                                    scale=False)[:3]\n",
+    "\n",
+    "    sigma_hat = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) / np.sqrt(n - p)\n",
+    "    L = ROSI.gaussian(X, y, 2 * np.sqrt(n), sigma=sigma_hat)\n",
+    "    soln = L.fit()\n",
+    "    active_vars = soln != 0\n",
+    "    \n",
+    "    if active_vars.sum() > 0:\n",
+    "        projected_truth = np.linalg.pinv(X[:, active_vars]).dot(X.dot(truth))\n",
+    "        S = L.summary(truth=projected_truth)\n",
+    "        S0 = L.summary()\n",
+    "\n",
+    "        pivot = S['pval'] # these should be pivotal\n",
+    "        pvalue = S0['pval']\n",
+    "        return pd.DataFrame({'pivot':pivot,\n",
+    "                             'pvalue':pvalue})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's take a look at what we get as a return value:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['pivot', 'pvalue'], dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "while True:\n",
+    "    df = simulate()\n",
+    "    if df is not None:\n",
+    "        break\n",
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs = []\n",
+    "for i in range(200):\n",
+    "    df = simulate()\n",
+    "    if df is not None:\n",
+    "        dfs.append(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeMAAAHSCAYAAADfUaMwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzde3yO9R/H8de12cz5fByico7CnI/Z\nHEKI2CgSRUgnogMRUTlVkgoVyWEIySlnOklUv4SSYznkfDY7Xr8/vubebM7bfd27934+HnvY9b2v\nts8ye+/7vb4Hy7ZtRERExDk+ThcgIiKS3imMRUREHKYwFhERcZjCWERExGEKYxEREYcpjEVERByW\nwalPnDdvXrt48eJOfXoRERG32rx58zHbtvMl95pjYVy8eHE2bdrk1KcXERFxK8uy9l3tNQ1Ti4iI\nOExhLCIi4jCFsYiIiMMUxiIiIg5TGIuIiDjMsdnUN+LMmTMcOXKE6Ohop0uRa/Dz8yN//vxkz57d\n6VJERNIkjw3jM2fOcPjwYQIDA8mUKROWZTldkiTDtm0iIiI4cOAAgAJZROQWeOww9ZEjRwgMDCRz\n5swKYg9mWRaZM2cmMDCQI0eOOF2OiEia5LFhHB0dTaZMmZwuQ25QpkyZ9DhBROQWeWwYA+oRpyH6\nuxIRuXUeHcYiIiLpgcJYRETEYQrjVDRkyBAsy7r8VrhwYdq2bcuuXbsA6NKlC0FBQSn+eXfs2MGQ\nIUM4depUin9sERFJeR67tMlb5MiRg2XLlgGwe/duBg0aRHBwMFu3bmXQoEFERESk+OfcsWMHr7/+\nOl26dCFnzpwp/vFFRCRlXTeMLcv6FGgBHLFt+55kXreA94BmwAWgi23bv6R0oWlVhgwZqFGjBgA1\natSgWLFi1K1blyVLltCuXTuHqxMREU9wI8PUU4Cm13j9AaDkpbfuwIe3X5b3qlKlCgB79+5NNEy9\nZ88eLMti8eLFie6PjY2lYMGCDBw48HLb6tWrqV69OgEBARQoUIBevXpx7tw5ANauXcuDDz4IQIkS\nJbAsi+LFi7vhKxMR8R4XL8KuRdvd9vmuG8a2ba8HTlzjllbA57axAchpWVahlCrQ2+zduxeAggUL\nJmovUaIE1apVY/bs2Yna161bx+HDhwkLCwNg69atNG3alLx58/Lll1/y+uuvM2PGDB5++GEAKleu\nzOjRowGYN28eP/74I/Pnz0/lr0pExHtE7D7EhhJhlHiwPH9N3eCWz5kSz4wDgX8TXO+/1HYoBT52\nIp6wlNW2b/6/iYmJAcwz4169epEtWzZCQkJYtWpVovvCwsJ4/fXXiYyMJGPGjACEh4dTvnx57rnH\nPCEYNmwYd9xxBwsXLsTX1xeA3LlzExoayo8//kjNmjUpXbo0AJUqVVKvWETkRsXFEfX+x1x8YQBz\n4s5SHIju1oP/Gm6iYFG/VP3Ubp1NbVlWd8uyNlmWteno0aPu/NSOOX78OH5+fvj5+VG6dGl2795N\neHg4hQolHTxo3749Z86cuTzhKyYmhnnz5hEaGnr5no0bN/LQQw9dDmKAtm3bkiFDBr777rvU/4JE\nRLzR//5HbI1a+D/Xi4FxZ5kArAFiy99LwRwpP9H2SinRMz4AFE1wXeRSWxK2bU8EJgIEBQXdQh8z\n7cmRIwcrV67EsiwKFixI4cKFr7pbVWBgIHXq1CE8PJxWrVqxatUqjh07dnmIGuDQoUMUKFAg0X/n\n6+tLnjx5OHHiWk8TREQkiV27YMgQ7OnT8b009NkfuIMC+D0+g3s/beiWMlKiZ7wQ6GwZNYDTtm2n\n+BA1mCFip99uVoYMGQgKCqJKlSoEBgZed9vI0NBQvv76ayIiIggPD6dSpUqULFny8uuFChVKciBD\nbGwsx48fJ3fu3DdfoIhIerR/P/ToAWXKwBdfcMG2GQ1E4MdnDMYaupdH3RTEcANhbFnWTOBHoLRl\nWfsty+pmWdZTlmU9demWJcBuYCcwCeiVatWmA+3atSMiIoL58+czf/78RL1igOrVqzN//nxiY2Mv\nt82bN4+YmBjq1KkDgL+/PwAXL150X+EiImnBjh3w5JNw110wcSLExHAWsyxoAFCaz8kycggvDgpw\na1nXHaa2bbvDdV63gd4pVlE6lz9/fho0aEC/fv04deoU7du3T/T6wIEDqVSpEq1bt6Znz57s37+f\nAQMG0KRJE2rWrAlweQLXxx9/TFhYGJkzZ6ZChQpu/1pERDyCbcOGDTBmDMybl2iY8xRQm2xs4wIw\nnefGhPLCC+4vUdtheqCwsDAOHTpEjRo1ksyGLl++PEuXLuXIkSO0adOGgQMH0qFDB+bOnXv5njvu\nuIPRo0czb948ateufXndsYhIuhEdDStXQp8+cMcdUKsWfPlloiD+o0gQd1CKbVwE5jBggDNBDGDZ\nt/IgNAUEBQXZmzZtuurr27dvp2zZsm6sSG6X/s5ExFG2DT/+CNOmQXg4nDyZ/H3NmrGq6kuEvG4B\nDwLTCAtrwfTp4JOKXVTLsjbbtp3sgQTam1pERNK2Cxdg7FiYMsXMjk5OrlzQqhU8/zxLD5SmdeuM\nl17YQ716OZkyJXWD+HoUxiIiknbt3w8tW8KvvyZ9rWhReOghaN0a6tQBPz8WLDhImzaVsO0Xgccp\nUyYn8+dDxoxJ/3N3UhiLiEja9NNPJmj/+8/VliMHtGsHnTtD7dqJurtff/0vbdo0xLb/A+4mMBCW\nLgVPWBWqMBYRkbRnxgzo2hUiI811hgwwerRZOxyQdFnS4sV7aN26IbZ9AlhOgQI1Wb0aPGXHYIWx\niIikDUePwqxZZoLWzz+72nPnhrlz4f77k/3PVq48TcuW9YmLOwesIk+eIFauhFKl3FP2jVAYi4iI\n5zp2DBYtMsuSli2DSwfvXFa2LCxcCHffneQ/tW2YMAGefz4HcXEvAA3ImfM+VqyAS2fveAyFsYiI\neJbdu+Grr2DBAvjuO4iLS3qPnx906ADjxpnnxFc4fx7CwrayaNEFoCrwHLlymWfElSql+ldw0xTG\nIiLiLNuG336D+fNNAG/ZcvV7a9WCTp2gffurzrzauxcaNfofO3eGAPmB36lc2Ze5c6FEidT4Am6f\nwlhERJxz8CB07w6LFyf/umVBzZpm1nSbNmZP6WvYvBkaN97EiRONgSzAArp182X8+GTndXkMbYeZ\nioYMGYJlWZffChcuTNu2bdl1tUXpKcyyLMaPH++WzyUiclNsG6ZPNw9vrwzijBmheXOYNAkOHYLv\nv4cXX7xuEC9dCnXqbODEiWAgB35+65k0qSSTJ3t2EIN6xqkuR44cLFu2DIDdu3czaNAggoOD2bp1\nK1myZHG4OhERBxw+DD17mmHphEJDzRrhJk0ga9ab+pCffmo62LGxHwD5yJ59NV9/XYx69VKu7NSk\nME5lGTJkoEaNGgDUqFGDYsWKUbduXZYsWUK7du0crk5ExM3mzDFBfPy4q614cfjsM2jQ4JY+5Oef\nQ7duNmABkwkMPMXy5QUoVy4F6nUTDVO7WZUqVQDYu3dvsq+XKFGCF198MUl7u3btLp9XfP78eZ5+\n+mlKly5N5syZKVGiBL179+bMmTPX/NzFixenX79+idqmTJmCZVmcO3fuctuJEyfo3r07BQoUICAg\ngFq1avHTTz/dzJcpIpLY8eMQFmYmXiUM4h494PffbzmIDx6Enj2XAzWAY9x3X0Y2bkxbQQwKY7eL\nD+GCBQsm+3r79u2ZM2dOorZz586xePFiwsLCALhw4QKxsbEMHz6cpUuXMmzYMFavXp0iPe3IyEhC\nQkJYuXIlo0aNYsGCBeTLl4+QkBD+S7jlnIjIjTh2DD74AMqXNycpxStSBL75Bj76CLJlu+UP//DD\ni7lw4UEgkhIlbNatg8KFb79sd9MwtRvEXFqkvnv3bnr16kW2bNkICQlJ9t6wsDBGjhzJhg0bLg9v\nf/3110RFRV0O23z58vHhhx8m+vglSpSgTp06/PPPPxQrVuyWa/3iiy/4448/2Lp1KyVLlgQgJCSE\n0qVLM2bMGEaNGnXLH1tE0onISPj6a7NT1pIlSTfqePxxeOedZNcH34wBAxbw44/tgYrAcj75JDfZ\ns9/Wh3RM2gpjy3K6gkQHU9+I48eP4+fnd/m6WLFihIeHU7BgwcshDWbms6+vL5UqVaJUqVKEh4df\nDuPw8HDq169PgQIFLt8/bdo0xo4dy99//8358+cvt+/YseO2wnjlypVUqVKFEiVKJKqvfv36XOv8\naRER9u0zPd1PPjFbV16pUCGYOBFatLjtTzVr1iJGjmwHBAFLeeKJnFfbDTNN0DB1KsuRIwc///wz\nmzZtYv/+/ezdu5cHHniAdevW4efnd/ktODj48n8TGhrKnDlzsG2bM2fOsGzZsstD1ADz58+nc+fO\n1KxZkzlz5rBhwwbmX5qVePHixduq99ixY2zYsCFRbX5+fnz22Wf8+++/t/WxRcQLnTsH8+aZYwzv\nvBPeeitpENeoYYaqt29PkSAG+PrrykBH4BsKFsxJWh+0S1s94zQoQ4YMBAUFJWmvUqUKPyfY6Dxb\ngmcmoaGhDBs2jO+++449e/YQFxdHmzZtLr8+Z84cqlevzoQJEy63rVu37rq1BAQEEBUVlajt5MmT\nia5z585NUFBQomHweBmdPvBTRDzDiRMmgL/6ClascJ2clFDRomY4+tFH4dIjr5SwZs0azp2ry4wZ\nhYGpgMn5nDlT7FM4Im2F8U0OEXuybNmyJRvSAOXLl+eee+4hPDycPXv2EBISQp48eS6/HhERkSQY\np0+fft3PWaRIEbZv356obfny5Ymug4ODWb58OcWKFSN//vw3+uWISHrw778wdqwZar5wIfl7GjeG\nXr3Mph0ZUjZiJk6cSI8ePfDxGQWYlSEPPWQ25krr0lYYpyOhoaG89957nD59mkmTJiV6rVGjRvTu\n3Zvhw4dTvXp1lixZwqpVq677MR966CH69OnDiBEjqFq1Kl9++SVbt25NdE/nzp356KOPaNCgAf36\n9ePOO+/k+PHjbNy4kYIFC/L888+n6NcpIh7Ots0ek+PHmx2zrpyMBVChArRqZfaMTqVzCd9/fzzP\nPNMHaEZc3NMABAaaXrE3UBh7qLCwMAYNGkTGjBlp3bp1otd69OjB7t27ee+997h48SKNGjVixowZ\nlyd8XU337t3ZtWsX48aNIzIyks6dOzNw4EB69Ohx+Z6AgADWrFnDa6+9xuDBgzl8+DD58+enWrVq\ntGzZMlW+VhHxMNHRsG6d6+Sk/fuT3lOhAnTpYkL4OttU3q5Ro8bSv39foBUQDmSkfHmz/WWhQqn6\nqd3Gsh0a+g0KCrKvNTt3+/btlC1b1o0Vye3S35lIGmbbsHGjWY40a1bijTkSqlcPXnoJmjZ1ywqX\n2bMP0KFDaeLimgHTAT8aNDA7aaa158SWZW22bTvZ55PqGYuIpGfnzsGECWY50o4dyd+TK5eZBd2z\npzlByQ127jRnQyxYEAj8CJQFMhAWBlOmmLMkvInCWEQkPbp4ET7+GEaMgCNHkr4eGAht25ph6Lp1\nIcF+CakpOhqGD7cZNmwgcXGFgKeBCmTODIMGQf/+4OOFi3IVxiIi6YVtw59/wrJlZlb0lc+Cs2aF\nhx82E7EaNHB76m3ZAp072/z2Wz9gLPAUYNO5s8WIEeb3A2+lMBYR8WbR0TB3rjkzePVqcz7wlYoU\nMd3ORx+FzJndXmJMDIwaBa+9ZhMT8wwwHuhD9erv8f77FlWrur0kt1MYi4h4o/PnYfJkGDPGrA9O\nToEC8Mor5iDggAD31neJbcNjj8GMGTbQE/gYX9++vP32KJ57zsLX15Gy3M6jw9i2bSxP2I9arsup\nWfkicoXTp+Hdd2HcOLNT1pVy5oT77zebc3TqBFmyuL/GBKZOhRkzwJxFXJpChV5m1arhlC2bvn72\ne2wY+/n5ERERQWYHhkzk5kVERCQ6EENE3Oz8eRPAo0bBFdvcki8f9O5tZkTfdx+e0t3cuRN6944B\ndgDl6NLleSZNSvGNu9IEj/2S8+fPz4EDBwgMDCRTpkzqIXso27aJiIjgwIEDiU6VEhE3OX4cPv/c\nHNBw5azoEiXM+qAuXSBTJkfKu5roaAgLi+bChUeAZdx111988EGhdBnE4MFhnP3SoZQHDx4kOjra\n4WrkWvz8/ChQoMDlvzMRSWWRkWZC1rRp5s8rf0bedRcMGQJhYR7bzRw4MJLNm0OBr/DxGcOcOYWc\nmDvmMTzzb+mS7Nmz6we8iKRvMTGwZAn8+qtZlvTnn/DXXxARkfTeokXhtdfMjCgPfmy0YsVFRo5s\nCywB3uftt5+mUiWnq3KWR4exiEi6FRcH4eEweDD8/fe1761WzRxX2KWLY7Oib9ShQ9C27ThgKfAx\nwcHdeeEFp6tynsJYRMST2DYsXGjW/W7ZcvX7SpSARx4xa4NLl3ZffbchKsrsKXL27PNAFXLnDmbq\nVO/cUetmKYxFRDyBbcOKFTBwIPz8c+LXcuY0Q88VKkCZMiZ88+Z1ps5bdObMGapWfZYdO94CCuDj\nE0x4uHfvqnUzFMYiIk777jt49VVYvz5xe5Ys8Nxz0K9f2juiKIFTp05RuXJT9uzZDDwMNGfkSAgJ\ncboyz6EwFhFxwsWLMGeOOTFpw4bEr2XMCL16maMK8+d3pr4UcuLECWrVasyePb8Dc4HmdOyInhNf\nQWEsIuIutg2//w4zZ5ojC48dS/x6hgzwxBNmqNoLxm+PHj1K9eqN2LPnT2AB0Iz77oNJk9xyFHKa\nojAWEUlNcXFm+HnBAvO2b1/Se/z9oWNHM2nrzjvdX2MqsG0YP95mz54MwNdAI/LkgfnzHTmLwuMp\njEVEUkNkJEyfDiNHmnXBySlWDJ56Crp1S/PD0Qn9889hBg7MzbRp+YGNgA933GEmiRcv7nBxHkph\nLCKSks6fh48+MucFHzyY9PUcOaBZM7M7VvPmHrNPdEr5449/qFq1IRcv1gM+BXyoUwe+/NKrft9I\ncQpjEZGUsnat2Xxj797E7dmzmzXBDz0E9eubYWkvtGnTHmrXbkhU1EmgB2D+d3z4oZmTJlenMBYR\nuV0XLpiZz++/n7i9YEF4/nno0cP0iL3Y+vV/ExzckJiYC8AqoApvv23OqdBkretTGIuI3IqoKLND\n1s8/myHphFtW5soFw4ebbqGHb0+ZErZvjyU4uBUxMReB1fj43MukSdC1q9OVpR0KYxGRGxUdDR9/\nDF98Ab/9ZiZpXal5c5g4EQoXdn99Dli/Htq29SUm5lMgG35+5Zkxw2x7KTdOYSwicj22DUuXmp0q\nrjYzOnt2ePddc1hDOhmXHTz4N9544zvi4p4GapA5s1m61Lix05WlPQpjEZGrOXkSNm6Ed96Bb75J\n+nqJElC1qjk1qUOHdNMbjomBzp03MXNmYyAr0JkCBbLz1VdQvbrT1aVNCmMRETBLkn791TwDjn/b\nuTPpfdmzmx2yHn88zR3WkBIiIiAk5Ed++KEpkBtYw333mSAuVszp6tIuhbGIpF8XLsCoUWYR7Nat\nZresq/HxgSefhKFD0+2C2QsXoF699Wze3BwoCKzmoYeKMm2aOdNCbp3CWETSp6VLoXdv2LPn6vf4\n+UHFimbstUcP8346df48tGwJmzf/BRQBVvHSS4UZPlznEacEhbGIpC8HD5q1v7NnJ263LHNWcNWq\nrufAFSumi6VJ13PuHDRteobvv88OPAl0YtiwAAYOdLoy76EwFhHvFxkJixbBtGmwZIlZohQvd254\n6y2zPWW2bM7V6KFOn4ZatRaxbVtnYAlQgzffDOCll5yuzLsojEXEO128CKtXm7U2c+fCqVNJ7+nc\nGUaPhnz53F9fGnD0KFSvPp89e0KBe4FSjBoF/fo5XZn3URiLiPewbRO+M2eaZ8Lnzyd/X40aZoes\nhg3dW18a8u+/UL36bA4d6ghUBZbxzjs5eO45pyvzTgpjEfEOf/4JPXuawxqSU7w4PPoodOoEpUq5\ns7I0Z8cOqFfvew4f7gDUxrIWM3lyNm1vmYoUxiKStl28CG++aZ77RkUlfq1kSWjd2rzVqKFpvzfg\nhx/M/66jR2sAI8iQ4Wlmzsyi7S1TmcJYRNKubdvMsYQ7drjafH3hmWfMmuAyZdLN1pQpYepU6NZt\nGrGx9wNFyJx5APPmQZMmTlfm/fRrooikTevXQ+3aiYO4enXYvNmcolS2rIL4BsXGQv/+0KXLOGJj\nOwNvkzcvrFihIHYX9YxFJO2ZPds8+40fls6SxcyK7t5dQ9E3KSICQkPh669HAf2BhyhXbgyLF5vH\n7OIeCmMRSTtiY82hDS++6GorUMCsHa5c2bm60qjISGjbFpYuHQ4MBEJp3nwaM2f6acm1mymMRcTz\nHTsGn34KH32UePvK0qVh2TJ14W5BdLQ5aGrp0ghgNtCJfv0+5a23MuDr63R16Y/CWEQ804UL5qHl\n3LkwZ47pxiVUuzYsXGh20JKbEhsLnTvbzJ8fA2QC1vHKK9kYPlwp7BSFsYh4jqgoCA83pygtX24e\naF4pVy5zaMPgwdo3+hZERUH37jazZvUFdgJf0rdvTt54w+nK0jeFsYg4z7Zh8WLo2zfx7OiEqlaF\nXr3MbKNMmdxbn5f4/Xfo3DmO//3vGeAD4Bl69szAqFGaeO40hbGIOGvrVnOK0ooVSV8rW9bsQNG2\nLVSp4v7avERMDLz9NgwZEkdMTA9gMtCPLl1GMn68pSD2AApjEXHGuXPw2mswbpx5iBkvRw7TQw4N\n1baVKeDYMWjeHDZuBHgemIyv76uMHDmM556ztBLMQyiMRcT9vvoKnn4a9u93tfn4mHXCQ4fqFKUU\nEhlpNigzQQzQmaJFA1mxoj+lSztZmVxJvxOJiPv8+adrr+iEQXz//fDbb/DhhwriFGLb8NRT8N13\n0cBcLAtGjKjC7t0KYk90Q2FsWVZTy7L+sixrp2VZSY6UtiyrmGVZayzL+tWyrN8ty2qW8qWKSJq1\ncSO0aQPlyplecbx8+eCLL2DVKqhQwbn6vNCoUTBlSiTwMNCOp5/exMsvQwaNh3qk6/61WJbli5l2\n1wjYD/xsWdZC27a3JbhtIDDbtu0PLcsqBywBiqdCvSLiyWJi4KefYN8+OHAADh40e0V/+23Se598\n0py0pHXCKe6rr2DAgAigLbCUGjU+4L33gpwuS67hRn5HqgbstG17N4BlWbOAVkDCMLaB7JfezwEc\nTMkiRcTDXbgAn31m9ofeu/fa97ZoAa++ao40lBT3yy/QseMFzI/pVZQsOYl1657QjGkPdyNhHAj8\nm+B6P1D9inuGAMsty+oDZAFCUqQ6EfFcZ87AX3/BN9+YGdFHj179Xl9f6NjRHA10zz3uqzGd+ftv\naNoULlxYC6wlb97P+OGHx/D3d7oyuZ6UenrQAZhi2/YYy7JqAtMsy7rHtu24hDdZltUd6A5QrFix\nFPrUIuI2CxbABx+Yc4QPXmUALE8eMyErMNC8FS4M9epB0aLurTWdOXgQGjWyOXrUApqRLdtfrFt3\nJ3nzOl2Z3IgbCeMDQMJ/RUUutSXUDWgKYNv2j5ZlBQB5gSMJb7JteyIwESAoKMi+xZpFxN2iosxJ\nSePGXf2eYsWgXz/o2tUcaShuc/IkhIScZN++1sBAMmVqxLJld1KunNOVyY26kTD+GShpWVYJTAiH\nAR2vuOcfIBiYYllWWSAAuMaYlYikGQcPQrt28MMPidv9/aFkSShTBlq1grAw8PNzpsZ07OxZaNLk\nONu3NwL+wMcngi+/hFq1nK5MbsZ1w9i27RjLsp4GvgF8gU9t295qWdZQYJNt2wuBvsAky7Kex0zm\n6mLbtnq+ImndunVmJ6zDh11tDz1kZkHfeafWyTjszz+hZcsj/P13CLAD+IrPP3+ABx5wujK5WTf0\nL8m27SWY5UoJ215L8P42oHbKliYijrFtGDsWBgxwbVXp42NCuF8/nSrgAebOhS5dTnL+fANgL7CI\nd98N4ZFHnK1Lbo1+rRWRxM6eNc995851teXLB7NmQcOGztUlAERHw8svw5gxYFaShuDv35bJk+vT\nqZPDxcktUxiLiMu2beaEpD//dLXVqAFz5kCRIs7VJQBs2QKPPQa//roP80SwOHfdNY4vv4R773W6\nOrkd2ptaRCAuDt55xxxTmDCIe/c2z40VxI6KiYE33zR/Pb/+uhuoB7ShRQubTZsUxN5APWOR9G7X\nLnj88cRbVmbKBJMmoQeQztu710xU/+knMJO0GgIRPPfcfMaM0RGI3kJhLJJe2TZ89JGZkHXhgqv9\n3nth2jQd3OABtm2DRo3i91fZBgSTIUMsc+asoXXrig5XJylJYSySHh0/biZpLVzoavP1hVdegYED\n0f6Jztu0yWxtefy4ubas/mTJYvP992upWFG7eXgbhbFIerN2LTz6qDlVKV758jB1qnkoKY5buxZa\ntjQT28FsaDZ9+jTKlTtGyZIlHa1NUofCWCQ9OHXKTMRavBgmTzZD1PGeeQbefhsCApyrTy5bssQc\n/RwZCbARf/8xLF06lbp1cwG5HK5OUovCWMQbXbgA338Pq1fDqlXmTOG4uMT35MkDU6aYIw3FI3z9\ntVlZFh0N8AOW1ZR8+fJy553HMQfoibdSGIt4i3/+gZkzYelS+PFHc7jD1TRsaCZpFS7svvrkmubP\nNzuPmiBej2U14447CvPtt6sJDFQQezuFsUhaduaM2Slr2jQzDH21LeEtyzwPDg4203Pvvx+tifEc\nc+dChw5mPTGsxrJacNddxVm/fhWFChVyujxxA4WxSFoTEwPLl5sAXrAALl5M/r5y5UwPODgY6teH\nXHre6ImmTze7asVvAV6sWF4CA6uzYEE4+fPnd7Y4cRuFsUhasW+fOU/4iy/gyJGkr/v4mOB95BFo\n0gQKFnR/jXLDbBtGjTJncRjbKVWqDGvWVKRQodVYOowjXVEYi3i6P/6AkSPN82AzjplYxYrQqRN0\n7KhnwGlEbCw8+yx88EF8y5dAGE8+OYHChZ8EFMTpjcJYxBNFRsKiRfDpp2aty5UKFzbh26mTCWNJ\nMy5cMH91X30V3zILeJRq1arz5JPtHaxMnKQwFvEUtm1mQU+dCrNnm7XBV2rQAF580QxD+/q6vUS5\nPXv2mKVLv/4a3zINy+pC7SxTf7IAACAASURBVNp1WLJkEdmyZXOyPHGQwljEaXFxppv09tvxpwEk\nZlnQurV5uFi9uvvrkxSxZInZ+OzkyfiWffj4dKVBgwYsXLiQLFmyOFmeOExhLOKUqCgzGWvUqMTH\nFsYrUcL89O7cGe6+2/31SYqIjYWhQ2HYMNfKMz8/GDfuDu66awl16tQhU6ZMzhYpjlMYi7jb2bMw\ncaI5Pzjh/tBgDmjo1Am6dIHatU2vWNKsixfN+uEFC1xtOXO+z8CBJXjqqRZAI8dqE8+iMBZxlyNH\n4P33Yfz4pM+Ds2eHnj3NFFtt8uAVzp41TxdWr3a13X33KHbu7M8vv3QEtA2puCiMRVLbnj0wZgx8\n8knSDToKFIDnnjNBnCOHM/VJijt+HJo1g40bXW21ar3BDz8MIiwsjKlTpzpXnHgkhbFIatm+Hd54\nA8LDXdsrxbvrLujf3zwP1mlJXuXgQWjcGLZujW+xadhwMKtXD6NTp0589tln+GomvFxBYSyS0nbv\nhtdfN5OzrjwpqXJlMyu6bVstTfJC//5rtv3etctcW5Z5KrFt2wlKlOjGxx9/rCCWZCmMRVLKrl1m\nZvQnnyTdKSs4GF56yfypSVlead8+E8R79phrX1+b8eOP8NRTBYiLGweAjw7nkKvQd4bI7YiNNYfQ\nPvCAWX708ceJg7hpU/PgcOVKCAlREHupPXvMWRzxQZwhQxyNG/fmjTeqcPToUXx8fBTEck367hC5\nFbGxMGmSefbbsiUsW5b49Xr1YP16c7Zw1arO1ChusWuX2Rht3z5z7ecXS8OG3Vm69EMeeeQR8ubN\n62h9kjYojEVu1po15tlv9+6un8Bger3Nm5vjDdeuhbp1HStRUl9EhJmfd++98M8/ps3fP4Z69R5n\n+fJPGDRoEG+99ZZOX5IbomfGIjdqyxZ47bXEOzgA5MkD3brBU0+ZXbPEq9m22Tq8f39XCIOZFB8a\n+hZTp05j2LBhDBw40LkiJc1RGItcS1QUzJsHEybAt98mfi1zZjMpq29f8754vehoaNcu4YlLxj33\nmE3Vypd/huDgO+jUqZMzBUqapTAWSU5kJIwbZzbrOHw46eudOsGbb0JgoPtrE8c891ziIM6bFwYP\njuS//4ZTseIAsmTJriCWW6JnxiIJ2bbpCZcrZ8YhEwZxhgzQvr2ZHf355wridGb8eDNAEq9HD9iy\nJYJFi1oxfPgwVq5c6VxxkuapZywS7+efTQCvXZu4PTDQ/OR94gntG51OLV9utg2PFxYGo0efp1Wr\nlqxZs4bJkyfTqlUr5wqUNE9hLOmbbZs1wG+9lXhHf4BcucxOWk89Zc68k3Rp+3bznDh+M7Vq1eC9\n987SrFlzvv/+e6ZOnaqhabltCmNJf86fh19+MT3h6dPN+wn5+kLv3jB4MOTO7UyN4riICDMs/eab\ncOaMaStSxEymP3PmMLt372bGjBmEhoY6W6h4BYWxpA//+x9Mm2bGG7duTbpnNJgQDg2FgQOhbFn3\n1ygeISrK7Gj6xhvm0Id4mTNDePh5ChbMjGXdzY4dO8isWfSSQhTG4r0OHTKHNUybZtYIX01AgHke\n3LcvFC/utvLE8+zdCy1aJDxxybjjDhg79hi9ezeiWbNmDB8+XEEsKUphLN7Fts0ErAkTYP78pEcX\ngtkpq1w5s01ltWrmBKX8+d1eqniWX381ZxD/95+rrVAhM1Dy4IOHadYshJ07d/Lmm286V6R4LYWx\npG3nz8OOHfDnn2amzdy55s8rZcoErVvDo4+afaOzZnV/reKxvvkGHn4Yzp0z1/7+MGwYPP00nDp1\nkODgYPbt28eiRYsIDg52tljxSgpjSXtiYkzojhkDmzZd+966daFrV9P7zZbNPfVJmjJ1qnlKEX/Y\nVs6cZpJW/foQHR1NSEgI+/fvZ9myZdSrV8/ZYsVrKYwl7YiIgClTzJnB8WfVJSdrVujcGXr2NPsU\nilxFeDh06eK6LlrUHLRVvry59vPzY+jQoRQuXJhatWo5UqOkDwpjSRtWrYLHHoMDBxK3+/qac4TL\nlIHSpaFCBXOkYfbsztQpaca335rf2eLdey8sWQKFC8OuXbvYunUrLVu25OGHH3auSEk3FMbi2aKj\nzUlJb79tJmfFy50b+vQxD/V0XqzcpO3boVUrs4wJzO9yq1ebb6u//vqLhg0bEhcXR3BwMFmyZHG2\nWEkXFMbiuXbvhg4dzF7Q8fLlg1deMQ/5NAlLbsF//5lZ0ydPmusCBczQdO7csG3bNho2bIht26xc\nuVJBLG6jMBbPNHOm2YYyfusjgMaNzWybggWdq0vStB07zL4ue/ea68yZYfFis7z8999/JyQkBF9f\nX9asWUNZbfwibqRTm8SznDsHjz8OHTu6gjhDBjNpa+lSBbHckshIs814hQrw22+mzccHZs+GKlXM\n9bx58/D392fdunUKYnE7y074HM6NgoKC7E3XW5Yi6csvv5jjcP7+29V2112ml1y1qnN1SZq2dq05\ndGvHDlebjw98/HH8kqYYMmTIgG3bHDt2jHz58jlWq3g3y7I227YdlNxr6hmL87ZuNTOlq1dPHMSd\nOpltkRTEcotmz4bg4MRBXK0abN5sgvj777+nXLly/PXXX1iWpSAWxyiMxRkXL8KaNWYZ0j33wOef\nu3ZdyJrVXH/+uTbqkFu2YoXZcC3+TJDs2WH8ePjhB7jvPli7di1NmjTBsiyyajKgOEwTuMQ9Ll6E\nOXPgu+/M0YVbtrjCN6GGDc344d13u79G8RobN8JDD5mVcWCWLq1cCYGB5nrlypW0bNmSEiVKsHLl\nSgoVKuRcsSIojCW1RUebXbOGDYN//03+Hssy+0YPGGCGqkVuw/btZunS+fPmumhRc3JmfBD/8MMP\ntGjRglKlSrFy5Ury65AQ8QAappbUERsL06eb05G6d08+iEuXNsuXtm2DefMUxHLb9u0zK+COHzfX\nefKYIC5a1HXPvffeS9euXVmzZo2CWDyGesaSsmzb7LI/aFDSQ2Hz54fevaF2bbOeJGdOZ2oUr3Tw\noJmstX+/uc6SxWxvWaaMuV6xYgU1atQgW7ZsTJgwwblCRZKhnrGkDNs259BVqwZt2iQO4ly54M03\nzY5ar71mfmIqiCUFHTlivq127TLX/v5msKVaNXM9c+ZMHnjgAQYNGuRckSLXoJ6x3L69e80e0YsX\nJ27PmhWeew769lX4Sqo5cQIaNTJHWoPZI2bOHDNcDTB16lS6du1K3bp1eeONN5wrVOQaFMZy66Kj\n4d13YcgQuHDB1R4QYIajBwwwe0mLpJKzZ6FJE/j9d3Pt42OmKrRsaa4nT55M9+7dCQ4O5quvviJz\n5szOFStyDQpjuTWrV5te75YtrjbLgiefhMGDzTl0IqnsxRchfiM/y4LPPoP27c31uXPneP3112na\ntCnz5s0jICDAuUJFrkNhLDfnxx/h1VfNhh0JVawIEydqRrS4zfr1Zkl6vA8+cJ1PbNs2WbNm5dtv\nv6VQoUJkzJjRmSJFbpAmcMmN2bIFmjeHWrUSB3HmzOYQh02bFMTiNhcvmkGYeC1bmlVyAG+99RbP\nPvsstm1TvHhxBbGkCQpjubYjR8xPufvuM+tE4vn6mp+Gf/4J/fqBn59zNUq6M2yYa7/pbNlMrxhs\nhg4dyssvv8yxY8eIjY11skSRm6JhaknemTMwaRIMHZr4TGHLMscbDhmiLSvFEf/7H4wc6boeORIC\nA20GDhzIiBEjeOyxx/jkk0/w9fV1rkiRm6QwFiM62jyEW7XKTM7atMnsopVQ48YwZow52EHEATEx\n8ccemuu6dc0Gb6+++ipvvvkmTz75JB999BE+Phr0k7RFYZzeHThgesATJ8KhQ8nfU6oUjB1rNvy1\nLPfWJ3JJXBw884xr9rS/v/nW9fGBmjVr8uyzzzJ27FgFsaRJCuP06pdfYMQIs3Vlcs/WLMs8J+7S\nBXr21DNhcVRcnPk2nDjR1TZwYBynTv0MVOfBBx/kwQcfdKw+kdulME5voqLM7Jc330wawoUKmXPn\ngoOhQQPInduREkUSiouDHj1g8mRXW2hoLLt2PUmtWlP59ddfqVixonMFiqQAhXF68ttv8Nhjru2K\n4tWvb3bMat1aPWDxKDEx5pnwZ5+52jp2jAG6MHXqdAYPHkyFChUcq08kpSiMvd1//5l1wStWwLRp\nrpkvAPXqwfjxoB9m4oE2bTJB/OuvrrZOnaKJiHiEuXPnMHz4cF555RXnChRJQQpjbxQRYfaM/uIL\nc1bwlTJlMsPUffqY2S8iHuTMGRg40KwdjotztXfrBvffP5dHH53D6NGj6du3r3NFiqQwhbE3sW2Y\nPRv694d//kn+npo1YcoUM0NaxMP88AO0a2fOJo4XEGC2O+/fHywrjOLFi1G7dm3nihRJBeoWeYvN\nm6FOHQgLSxzE/v7mmfDQofD99/Dddwpi8UjLlkFISOIgbtIENm2K4PffO7Jt2x9YlqUgFq+knnFa\nFxsLb78Nr72WeHZ03rxm1nTnzmb/aBEPNmcOPPKI2XsGzLfv++9DixbnadnyQdauXUvz5s25RxvO\niJe6oZ6xZVlNLcv6y7KsnZZlvXSVe9pblrXNsqytlmXNSNkyJVkHDphT1V991RXEfn7Qty/s3Gn2\nlFYQi4f75BMzoBMfxMWKmeHq5s3P0qzZA6xbt47PP/+cRx55xNlCRVLRdXvGlmX5Ah8AjYD9wM+W\nZS20bXtbgntKAi8DtW3bPmlZVv7UKlgwz4bnzTOLL48fd7XXrAlTp0LJks7VJnIT3nvPHIsdr0wZ\nM/E/W7bTNGnyABs3bmTGjBmEhoY6V6SIG9xIz7gasNO27d22bUcBs4BWV9zzJPCBbdsnAWzbPpKy\nZQpgliVNnw733gsPP+wKYsuCQYPM3tIKYkkjxoxJHMSVK5tv4SJFwN/fn1y5cjFnzhwFsaQLN/LM\nOBD4N8H1fuDKg2tLAViW9T3gCwyxbXtZilQoZn3H5MlmOdLevYlfCww0S5gaNHCiMpFb8vbb8FKC\nB161a8PixRAdfYyTJ33JlSsXixYtwtJe6JJOpNQErgxASaABUARYb1lWBdu2TyW8ybKs7kB3gGLF\niqXQp/Zyu3bB44/Dt98mbs+SxZwnPHAg5MnjTG0it2D4cPNtG69ePRPE588fJjg4mLx587JmzRoF\nsaQrNzJMfQAomuC6yKW2hPYDC23bjrZtew+wAxPOidi2PdG27SDbtoPy5ct3qzWnD3FxMGGCGZJO\nGMR58phlSv/8A++8oyCWNGXo0MRBfP/9sGQJnDlzkAYNGrBnzx5ee+01BbGkOzcSxj8DJS3LKmFZ\nlj8QBiy84p4FmF4xlmXlxQxb707BOtOXXbvM2cG9e8P586bN19fMmt63zzwf1iEOkobYtll9N3iw\nqy0kBBYtghMn/qV+/frs37+fZcuW0bBhQ+cKFXHIdYepbduOsSzraeAbzPPgT23b3mpZ1lBgk23b\nCy+91tiyrG1ALPCibdvHr/5RJVlRUTBqFLzxBly86GovW9bMkq5a1bnaRG6RbZve8IgRrrbGjc3p\nnZkyQYsWj3HkyBGWL19OzZo1nStUxEGWbduOfOKgoCB7U/wp4eldXJw5zKFPH9i+3dXu42PWDA8d\navYEFEljbBteftlM2Ir3wANmZV78t/Tu3bs5ceIEQUFBzhQp4iaWZW22bTvZb3Rth+mUqCj45htz\nYnrRombMLmEQV64MP/0EI0cqiCXNsW2zTOmBBxIHcfPmMH8+7N37Jy+99BJxcXHceeedCmJJ97Qd\nprv9/Td89JE5oPXkyaSvZ81qhql794YM+uuRtCUuDr7+Gt56CzZsSPxaq1YQHg5///0HISEh2LZN\n7969KVq0aPIfTCQd0U97d7Bts3bj/fdh+fLk78mTB9q0MbNcihRxb30iKeDcOejQwUzKSsiyoGtX\nszhg+/b/ERISgp+fH6tXr1YQi1yiME5tf/8NvXrBypVJXytWDNq2hdatoVYt9YQlzTp8GFq0gITT\nQPz94bHHoF8/c1DY5s2badSoEVmyZGH16tWU1G5xIpfpp39qiYw0z3uHDzfvx7Ms81OrVy8zpdRH\nj+0lbduxA5o2hT17XG29e5uVeIUKudqOHz9OgQIFWLJkCSVKlHB/oSIeTGGcki5cMGcGr14Nc+ea\nk5Pi+fiYAO7bF4oXd6xEkZS0ahWEhrq2SffxgQ8/hO7dXfccO3aMvHnz0rhxY7Zs2UIGjQCJJKFu\nWUrYvNn0cnPlMn++9VbiIA4Kgp9/Ns+MFcTiBXbtMk9YQkJcQZw5M3z1VeIgXrt2LXfeeSdffvkl\ngIJY5CoUxrcjLs4cPVOzpjn3LSoq8evZssG4cWZaaeXKztQokoLOnoUBA6BcObNWOF6+fLB2rXkC\nE2/FihU0a9aMokWLUrt2bbfXKpKW6NfUW3X4MHTpAsuuOJyqXDkIDoaGDc1b9uyOlCeS0iIizOFg\nv/ySuP3RR81gUGCgq23JkiW0adOGMmXKsGLFCrQXvci1KYxv1qlTZmvKN980gRwvKMgcZVi6tHO1\niaSi559PHMQ1asC770L1Kw5U/fvvv2ndujUVK1Zk+fLl5NY+6iLXpTC+Ub/+ahZKzphhJmol1L8/\nDBtm1nKIeKHZs+Hjj13XI0eaJUvJHa5UsmRJPvzwQ9q2bUvOnDndV6RIGqa9qa/l4kUzK3rCBPjx\nx6SvFyxoesmNG7u/NhE32b0bKlWCM2fMdfv2MGtW0iCeNWsWpUqVorLmR4gk61p7U6tnnJy//4ZP\nP4XJk+HYsaSvV6hgFlI+8ojZvlLES0VFmaVL8UF8550wcWLSIJ4yZQpdu3alTZs2zJ071/2FiqRx\nCuN4x46ZjXOnTTMHNFzJzw8eftisFa5dO/nxOREv07+/a1ctPz/TI86RI/E9EydOpEePHjRq1IjP\nP//c/UWKeIH0HcbxR8t88IE5SiYmJuk9RYvCU09Bt25QoID7axRxyLvvwnvvua5Hjkx6pPb48ePp\n06cPzZs3Z+7cuQTohDGRW5I+w/jCBXNq0oQJsG1b0tf9/KBZM3j8cXPmmzYqkHRm2jQzezpe69bw\n7LOJ74mLi+Obb76hdevWhIeH468JjCK3LP2lzNat5nSkHTuSvlajBnTqZB6S5cnj/tpEPMDixeb3\n0Hi1a8P06YmfzERERJApUybmzJmDr68vfn5+7i9UxIukrx24Zs6EatUSB3HWrNCzJ/z+u5kx3auX\ngljSre+/h3btIDbWXFeoYM4nzpzZXNu2zZAhQ6hVqxanT58mICBAQSySAtJHGEdFmTG2jh1da4Qz\nZzYPxQ4cMMPVFSo4W6OIw/75B1q2NDttgdlGfdkys+U6mCB+9dVXef3117nvvvvIqpUEIinG+4ep\nL16EBx9MfJ5wqVJmY93y5Z2rS8SDREdDhw5w4oS5zp8fli+HwoXNtW3b9OvXj7Fjx9KjRw8mTJiA\nj47/FEkx3v2vKSbG9IYTBvFDD5kTlBTEIpcNHgw//GDe9/WFL7+EkiVdr48YMYKxY8fSp08fPvzw\nQwWxSArz3p6xbZslSfPnu9peew2GDNEaYZEEli83W63HGzYM6tRJfE/nzp3x9fVlwIABWPr3I5Li\nvPfX21degU8+cV2/8IKCWOQKhw6ZBQTxGjUyRyQCxMbGMnnyZGJjYylatCgvvfSSglgklXhnGI8b\nZ850i9e5M4wapSAWSSA21hx/eOSIuS5Y0Kwv9vGBmJgYOnfuzJNPPsmiRYucLVQkHfC+YerISBg0\nyHXdooXZY1rPuEQSGTECVq8271uWWUtcoABER0fzyCOPMGfOHEaMGEGrVq2cLVQkHfC+MD550rWr\nfY4c5uw3rYMUSWT9evPUJt7AgdCwIURGRhIWFsaCBQsYM2YML7zwgmM1iqQn3hfG0dGu97NmhUyZ\nnKtFxAMdPWqWMcXFmet69czcRoBt27axfPly3n//fZ5++mnnihRJZ7w7jNUjFkkkLg66dIGDB811\nnjwwYwZYVizgS6VKldi5cyeFChVyskyRdMf7HqQqjEWu6p13YMkS1/Xnn0OOHOdo1KgRH3/8MYCC\nWMQBCmORdCA62qz2e/FFV1u/flCnzhmaNm3KunXryJYtm3MFiqRzGqYW8XK7d5uN6H76ydVWvTq8\n+OIpGjduyubNm5k1axbt2rVzrkiRdE49YxEvFh4OlSolDuJGjWDu3EiaNQvhl19+Ye7cuQpiEYcp\njEW81McfQ1iYa6VfhgwwcqQ5ialIkYx06NCBBQsWaB2xiAfQMLWIF5o61WzNHu+uu8xx3kWL/sfm\nzf9StWpV+vbt61yBIpKIwljEy4SHQ9euruuqVWHFCjh37gD16zfk/Pnz7Ny5k4CAAOeKFJFEFMYi\nXuSrr+CRR1wbetx7rxmWPn36Hxo2bMiRI0dYunSpgljEw+iZsYiXmDUL2rUzB0AAlC1resSnT++h\nXr16HDt2jBUrVlC7dm1nCxWRJBTGImmcbZtDyTp0cH373303rFoF+fLB6NGjOXv2LKtWraJ69erO\nFisiyVIYi6RhsbHwzDPQv7+rrWxZcxpT/EZa77zzDhs2bKBKlSrOFCki16UwFkmjTp2Chx+G8eNd\nbfXqwfffw+nTfxAcHMzRo0fx9/enZMmSzhUqItelMBZJg5Ytg3vugQULXG3t28M338C+fb/RoEED\n/vzzT06ePOlckSJywxTGImnI2bPQvTs88AAcOOBqf+EFs474jz820bBhQzJnzsy6desoVaqUc8WK\nyA3T0iaRNGL7dmjWDPbudbXlywcffght28KmTZsIDg4md+7crFmzhuLFiztVqojcJPWMRdKArVuh\nQYPEQdy2Lfzxh/kToEiRItStW5f169criEXSGIWxiIfbssUE8ZEj5jprVpgxA+bMgfz54bfffiMm\nJoaCBQuyaNEiihYt6mi9InLzFMYiHux//4P774djx8x1tmxmklaHDmBZ8M0331CzZk0GDx7sbKEi\nclsUxiIe6vffoWFDOH7cXGfPDsuXQ61a5nrRokW0bNmSMmXK8PzzzztXqIjcNoWxiAfavRuaNIET\nJ8x1jhxma8saNcz1/PnzadOmDRUrVmTVqlXkzZvXuWJF5LZpNrWIh/nvP2jc2PwJpke8ciUEBZnr\n06dP061bN6pUqcKyZcvIkSOHc8WKSIpQGIt4kNOnoWlT2LXLXAcEwNdfu4IYIEeOHCxfvpzSpUuT\nLVs2ZwoVkRSlYWoRDxERAS1bmklbAL6+5mzievXM9aeffsp7770HQFBQkIJYxIsojEU8wPHj0KgR\nrF/vaps82YQzwEcffUS3bt1YunQpsfFnJIqI11AYizhszx6oXdsc8BBv1Cjo0sW8P27cOHr27Enz\n5s1ZsGABvr6+jtQpIqlHYSzioM2boWZN+Osvc21ZMHYs9OtnrkePHs2zzz7LQw89xLx58wgICHCu\nWBFJNZrAJeKQ1avNMPT58+Y6Y0aYNg3atXPdkzFjRkJDQ5k2bRp++n4W8VrqGYs44Ntv4cEHXUGc\nK5dZR9yuHdi2zd5Lm1D36dOHmTNnKohFvJzCWMTNNm6E5s3hwgVzHRhonhfXrWuC+OWXX+aee+7h\nr0tj15ZlOVitiLiDwljEjX77zeysdfasuS5Y0AxXly1rgrhv3768/fbbdOrUiZIlSzpbrIi4jcJY\nxE22bzfLl06dMtd585qdtUqVgri4OPr06cM777zDM888w4QJE/Dx8b5/niKSPO/7164wFg909Cg0\na+Y6fSlnTnPoQ/ny5nrKlCl88MEH9OvXj3fffVdD0yLpjGZTi6SyyEho0wYuzckiSxZYtgwqVXLd\n07lzZzJnzkxoaKiCWCQdUs9YJBXZNjz1FHz3nbm2LJg5E6pXh5iYGPr378+hQ4fIkCEDYWFhCmKR\ndEphLJKKRo+GKVNc1yNHmiVNUVFRhIWFMWrUKBYvXuxYfSLiGTRMLZJKFiyAAQNc1127Qt++EBkZ\nSfv27Vm4cCFjx47liSeecK5IEfEICmORVLB+PYSFmWFqMCcvffghXLwYQdu2bVm6dCkffPABvXr1\ncrZQEfEIGqYWSWG//WaGoiMjzfVdd8GXX4K/P1y4cIH9+/czadIkBbGIXKaesUgK2rULmjaFM2fM\ndcGCZglTQMA5oqL8yZMnD5s2bcLf39/ZQkXEo6hnLJJCDh2Cxo3h8GFznSMHfPMN5MlzmiZNmvDo\no49i27aCWESSUBiLpIB//oEGDWD3bnMdEACLFkHRoidp3LgxGzdupH379lq6JCLJ0jC1yG366y+z\nzeW//5prX1+YOxfKlj1OcHAj/vjjD+bOnUurVq2cLVREPNYN9Ywty2pqWdZflmXttCzrpWvc19ay\nLNuyrKCUK/EmKYzFjX75xZy2FB/E/v4wezY0a2bTpk0btm3bxldffaUgFpFrum7P2LIsX+ADoBGw\nH/jZsqyFtm1vu+K+bMCzwE+pUegNUxiLm3z/vdlvOn6yVubMZm1xo0YAFiNHjuTs2bOEhIQ4WaaI\npAE30jOuBuy0bXu3bdtRwCwguV/zhwFvAxdTsL6bExvrWthpWWa8UCQV7Nhhli/FB3HOnOYEpnLl\nDjBx4kQAqlevriAWkRtyI2EcCPyb4Hr/pbbLLMuqDBS1bdvZff2iolzvq1csqeTkSRPEJ0+a6/z5\nYd06CAz8h/r169OvXz8OHTrkbJEikqbc9gQuy7J8gLFAlxu4tzvQHaBYsWK3+6mT0hC1pLLoaGjX\nzvSMwcyaXrwYsmbdTb16DTl16hQrVqygUKFCzhYqImnKjfSMDwBFE1wXudQWLxtwD7DWsqy9QA1g\nYXKTuGzbnmjbdpBt20H58uW79aqvRmEsqci24ZlnYNUqV9vnn0OOHH9Tv359zp49y+rVq6levbpz\nRYpImnQjYfwzUNKyrBKWZfkDYcDC+Bdt2z5t23Ze27aL27ZdHNgAtLRte1OqVHwtCmNJRWPHwkcf\nua6HDjW95A0bNhAViMGxQgAAG29JREFUFcWaNWuoXLmycwWKSJp13TC2bTsGeBr4BtgOzLZte6tl\nWUMty2qZ2gXeFIWxpIK4OHj5ZejXz9XWoQO8+KLZfLpTp07s2LGDihUrOlShiKR1N7TO2LbtJbZt\nl7Jt+y7btodfanvNtu2FydzbwJFeMSiMJcVFRkKnTvDWW662WrWgT59fKVWqJOvWrQMgR44cDlUo\nIt7Au7bDVBhLCjp1Ch54AGbMcLW1aAEjRmykWbOGWJZFkSJFnCtQRLyGd22HqTCWFHLhgtm8Y1OC\nMZ4ePaBjxx9o0aIpefPmZc2aNdxxxx3OFSkiXkNhLHKFuDh47LHEQTxiBLRqtZ1q1RpTuHBhVq9e\nrV6xiKQYDVOLXGHwYHPQQ7z33zcTuMqUKc3zzz/PunXrFMQikqK8N4x1Zqzcghkz4I03XNd9+kCZ\nMiv5559/8PHxYdiwYdrQQ0RSnPeGsXrGcpM2bICuXV3XTZpAw4Zf07x5c1544QXnChMRr6cwFgG+\n/dacwBRplg5Ttix07Pgl7dq14d5772XSpEnOFigiXk1hLOnenDlm5nT8wQ958kD37jPp2jWUatWq\nsWLFCnLlyuVskSLi1RTGkq698w6Ehrp6xPnzw5IlscyY8Q61a9dm2bJl2tBDRFKdljZJuhQdDX37\nmpnS8UqXhsWL47jrLl+WLl1KQEAAWbJkca5IEUk31DOWdOfQIQgOThzEtWtDt24TePbZlkRGRpIn\nTx4FsYi4jcJY0pVvv4XKlc2f8dq2hVat3qV//974+HjXPwkRSRs0TC3pgm3DuHFmaDo21rT5+Jg1\nxT4+I+nffwBt27ZlxowZ+GuNuoi4mXd1AxTGkozISHjiCXjuOVcQ580Ly5eDv/8YXnppAGFhYcya\nNUtBLCKOUM9YvNrRo9CmDXz3nautWjWz3WXRopAr1/307t2b9957D19fX+cKFZF0TT1j8VpbtkDV\nqomDuHNnWLfO5s8/VwBQuXJlxo8fryAWEUcpjMUrzZ4NNWvCvn3m+v/t3X+cjXXex/HXxzCspfzM\nbSkkkojcQm4bZvzKz70li7RItyJZbKLftkj2fpRtd0PK3EJkExqWGb8GtcWqtG201pAkNon1m/n1\nvf+4ZnaGjDnMmXOdc+b9fDzm8TjXda7HOW9fY96+3+vMdZnBb34D//d/jgkTxtCpUyeSkpL8DSki\nkk3L1BJV0tPh0Ufht7/N3VeunHcDiG7dshg5ciQzZsxg9OjRdO7c2b+gIiJ5qIwlahw8CH37nr8s\nfcMNsHQpNGyYxbBhw5g9ezaPPvooL7zwAmbmX1gRkTy0TC0R74svYNQoaNDg/CLu1Qs++ggaNYIP\nPviAhIQEnnrqKRWxiIQdzYwlYiUleeeBU1LO31+iBEye7C1X51zDo02bNnzyySc0bdo09EFFRAqg\nmbFEpNdegzvv/GER33ij9/vDEyZARkYaAwYM+PcHtVTEIhKuVMYScZYsgQcfzN2OifF+l3jtWm/J\nOj4ezp07R58+fVi4cCG7du3yL6yISAC0TC0RJSUF+veHrCxvu1kzWLbMu4BHjjNnztC7d2+SkpKY\nPn06w4cP9yesiEiAVMYSMT75xPtQVlqat12/Pqxa5d2DOMfZs2fp0aMH69ev5/XXX2fo0KH+hBUR\nuQwqY4kI+/ZBly5w4oS3/ZOfeOeG8xYxQGxsLPXr12fQoEHce++9oQ8qInIFVMYS9pzzbvTw3Xfe\ndsWKXhHXqpV7zLFjxzh69Ci1a9dm+vTp/gQVEblCKmMJe7NnwxrvUtKYeeeIb7459/mjR4/SuXNn\njh49yvbt23XnJRGJOCpjCWv79sHYsbnbY8fCHXfkbh8+fJhOnTqxfft2Fi9erCIWkYikMpaw5RwM\nG5Z7nrh+fXjuudznDx06RIcOHdi1axfvvvsuXbp08SeoiEghqYwlbCUkQHKy99jM2/7Rj3KfHzdu\nHKmpqaxYsYL4+Hh/QoqIBIEu+iFhaf/+85enR4+G//qv8495+eWXSUlJURGLSMRTGUtYGj8ejh/3\nHterB5MmeY/37t3Lfffdx5kzZ6hQoQItW7b0L6SISJCojCXsbNni3X84x6xZULYs7N69m7Zt27J0\n6VL27NnjX0ARkSBTGUtYcQ7GjMnd7t0b2rWDnTt3cscdd3Dq1CnWr1/PzXl/t0lEJMLpA1wSVhYt\ngg8/9B7Hxnq3SNyxYwdxcXE450hJSaFx48b+hhQRCTLNjCVsnDnjnSvOMWoU1K0LWVlZVKtWjQ0b\nNqiIRSQqaWYsYWPaNO8iHwBVqsCQIftxrgaNGjVi27ZtlCgRXf93FBHJEV0/3VTGEevAAZgyJXd7\nyJAttG7diJdeeglARSwiUS26fsKpjCPSiRPQsyecPOlt16nzZ2bO7EjlypXp06ePv+FEREJAZSy+\nOnsWfvYz+Phjb9tsAwcPdqZ69eps2rSJWnlvzSQiEqVUxuKbzEwYOBDWr8/Z8z2lS/fk+utrsXHj\nRmrUqOFnPBGRkNEHuMQXzsGIEfDOO7n7Jk+uTOPGb9KqVSuqVq3qXzgRkRBTGYsvnnrKu7KW5116\n9IDHHuuFWQ8/Y4mI+CJ6lqkzM73pFni3+ImJ8TeP5Ovll2Hy5JytxZj14V//ehFwPqYSEfFP9JSx\nZsURYf587w5MnoWY9eP221uyYsUKzMzPaCIivlEZS8isXAlDhuRsvQEMpE2bNiQnJ3HVVVf5mExE\nxF/Rc85YZRzWUlKgTx/IyPC2K1f+lEaN4li58l3Kli3rbzgREZ+pjKXILVgAgwfn/BUdo3btq3n/\n/ZeoUiWN0qVL+5xORMR/WqaWIuOcd9ele+7J+euZRokSDZkzZx81apiKWEQkm8pYikRmJjz8cN67\nME0FxtKlS2tat67uYzIRkfCjMpagcw4eeABeeSVnz3PABO66qz/vvruQUvr7ERE5j8pYgm7SJJg9\nO2drNvA0AwcOYtGieZQsGT0fUxARCZbo+cmoMg4Lb7wBTz+du92/f1+aNj3CI4/8SrdBFBHJh8pY\ngmbtWrj/fvCupPUH2rUbwpw55YmNHedzMhGR8BY9UxWVsa/+9je46y7IyMgCRgCj6NlzPrGxficT\nEQl/mhlLoZ05A337wvHjmcAwIIGHHprA6NEP+B1NRCQiaGYshfb44/D3v2cAg4EEHnzwGX7/++d1\nrWkRkQBpZiyFkpICv/0twEFgLb16TWbGjMd9TiUiElmip4zT0nIfq4xD4tgxGDQoHe/b6Fo6dNjO\n0qWV/I4lIhJxtEwtV2zkyLN8/XVvYByVKsHcuZXQyrSIyOVTGcsVWbToDPPn9wJWAPWYOROq6yqX\nIiJXJHqWqVXGIbNz5ykGDuwBbAASGDBgCHff7XMoEZEIpjKWy5Ke7mjZshcZGRuBudSsOZA//MHv\nVCIikU3L1HJZJk40jh0bASwkJmYgCxdCxYp+pxIRiWyaGUtAjhw5wsyZf2HKlC5AbwCefRbatPE3\nl4hINFAZS4EOHz5M+/Yd2b59F859CVSlY0eYMMHvZCIi0UFlLJf07bff0r59PDt37sa5d4GqVKsG\n8+aBbsIkIhIcKmPJ14EDB4iLi2fXrn1kZf0JiMMM3nwTqlXzO52ISPRQGUu+5s1bSGrqfrKykoCf\nAt6lL+Pj/c0lIhJtAlpoNLMuZrbTzFLN7AdnCs1srJntMLPPzGydmdUKftQCqIyDxjlHejp88MFY\nMjM/I6eIX3gBRo3yN5uISDQqsIzNLAZ4BbgTaAj0N7OGFxy2DWjunLsFWAz8JthBC6QyDorU1FSa\nNGnBT3/6BYmJBtQB4JlnYPx4f7OJiESrQJapWwCpzrk9AGb2FtAL2JFzgHMuJc/xm4GBwQwZEJVx\noX322d9p0yaOEyfSgXP/3j9unFfGIiJSNAJZpq4BfJ1ne3/2vvwMBVYVJtQVyVvGsbEhf/tIt2DB\n5zRr1o4TJzKBFKApJUrAk0/C1KnoBhAiIkUoqB/gMrOBQHOgbT7PDwOGAVx33XXBfGvNjAth5sy/\nM3x4e6AUsB5owH/+J8yaBc2a+RxORKQYCGRm/A1wbZ7tmtn7zmNmHYAngJ7OuXMXPg/gnJvlnGvu\nnGtetWrVK8mbP5XxFVm4EEaOvA7oDGykfPkG/O53sGWLilhEJFQCmRlvBeqZWR28Eu4HDMh7gJnd\nCrwKdHHOHQp6ykCojC/bE09s4/nnrweuBuZTpw6sWQN16/qdTESkeClwZuycywBGAsnAF8AfnXPb\nzexZM+uZfdj/AuWAt83sUzNLLLLE+VEZX5YRI97n+efvAB4CoGFDeP99FbGIiB8COmfsnFsJrLxg\n39N5HncIcq7LpzIO2NixG5gxoxve2Yep3HYbrFoFlSv7nUxEpHiKnqsLq4wD8tRTa5g2rStQG9hA\n27Y1WLdORSwi4ieVcTGSlJTOpEkjgHrABm677T9YvhzKl/c7mYhI8aZrUxcTW7dCnz6lgCSgAjfe\nWJmVK1XEIiLhQDPjKJeVBUOHvk3r1qM4dcoBdalZszKrV0OVKn6nExER0Mw4qu3dC926vcmOHb8A\nWgNnqFSpLKtXQ7CvuSIiIldOM+MoNXcuNGgwhx077sW7INoqbr65LCkpcNNNfqcTEZG8VMZRaMYM\nGDTodc6dGwJ0wGwFEyaU4+OP4ZZb/E4nIiIX0jJ1lJkzB0aMAKgO/Df16i1g7twytGrlby4REcmf\nyjiKvPUW3HffF8BNQDdatOjGmjVw1VV+JxMRkUvRMnWUWLYMBgyYgnONgE00bQpJSSpiEZFIoDKO\nAps3O/r0+TXOPQ7056abWrN6NVSs6HcyEREJhJapI9zBg46OHZ8gM3MKMJi6dV9n3boYgn2HShER\nKTqaGUewtDTo0GENJ09OAR6gQoXZJCfHUL2638lERORyaGYcwX75S9ixoyOwHLNuLFpkugWiiEgE\n0sw4AmVlZdG58zhmzvwrYEB3pk41OnXyO5mIiFwJzYwjTGZmJl273s/q1XOAikAT+vWDRx7xOZiI\niFyx6CjjzExwzntsBjEx/uYpIhkZGXTvPojVqxcAE4HHaNIEZs/2/tgiIhKZoqOMi8GsOD09nZ49\n7yE5+W3geeAxataExEQoW9bvdCIiUhgq4wjx1VdZbNhwAngRGEulSujuSyIiUUJlHObOnj3L11+f\noVevipw9uwKI4cc/hpUrdfclEZFooTIOY6dPn6Zbt5/xl78c4/TpPwMlKVUKliyBli39TiciIsGi\nMg5TJ0+epGvXHrz33kYgASiJGcybh36FSUQkyqiMw9Dx48fp0qUrH374ITAfGABAQgL8/Oe+RhMR\nkSKgMg5D99//AJs3bwHeAu4GYPp0GDzYz1QiIlJUouMKXFFUxllZcObMCzi3jJwifuklGD7c31wi\nIlJ0NDMOE9999x0zZszg5MknWbGiFlALgEmTYMwYf7OJiEjRUhmHgX/+85/Ex8eza9ce0tN7A40A\neOghePxxf7OJiEjRUxn77JtvviEuLo6vvtpPRsZKcoq4Z094+WVd5lJEpDhQGfto3759xMXFceDA\nISAZ59oA0KIFLFwYtZfYFhGRC6iMfbR79x4OHTrF2bOrca4VAHXqwPLlut60iEhxojL2wcmTJ4mN\nLcf8+e04cWI34DXvtddCUhJcc42/+UREJLT0q00h9sUXX1C/fgMaN55HQgLkFPHtt8PWrVC/vq/x\nRETEByrjEPr8889p27Ydhw9n8I9/NPv3/l/8AlJSoFo1H8OJiIhvVMYh8umnn9KuXTuOHy9JevpG\n4GYApk6FOXOgdGlf44mIiI90zjgEvv32W+Li4jh3rhznzq0HbgBg2jQYPdrfbCIi4j/NjEPgmmuq\n0aTJs5w+vYmcIn78cRWxiIh4NDMuQu+99x5mZXjjjdvYsGHkv/cPG+Zd5lJERARUxkVm3bp19OjR\nkxIlmnLq1PuAdymt3r29OzDpyloiIpJDy9RFIDk5ma5du5OWdj2nTi0hp4gHDIA339SVtURE5Hwq\n4yBbtmwF3br1JC2tAZmZKUA1SpaE3/8e5s+HMmV8jSciImFIy9RBtHcvDB36BpmZtwDJQCV+8hNY\nvNi7qIeIiMjFaGYcJAsWZNCkCRw5Mh9YC1QiPh4++URFLCIil6YyDoLevedzzz0tOH78e6A0MTFX\nM2UKrF6tq2qJiEjBtExdSH36JLB06f1AO6AMderAggXQqlVIY4iISATTzLgQ+vZ9lXfeGQp0BFbQ\nqdOP2bZNRSwiIpcnOmbGaWm5j0NUxkOGzOXttx8EugGLad++DMuWwY9+FJK3FxGRKKKZ8RV47TWY\nMyceGAUsoXXrMiQmqohFROTKqIwvg3Pw858vY9iwTKAG8DLNm8eyciWUK1dkbysiIlFOZRygtDTH\nrbdO5I9//G8gAYBbb4XkZLj66iJ5SxERKSZUxgE4ftzRoMHj/PWvvwaGAPfRuTNs3AiVKgX97URE\npJhRGRdgxw5HnTq/4ssvXwAeBF5nyJAYli+H8uWD+lYiIlJMqYwvYfFiuO22VI4ceRXvw1rTmTix\nBLNn+34JbBERiSLR8atNQS7j9HQYP94xbZoB9YBPKVPmBl57zRg4sNAvLyIich6V8QXOnoXevTNZ\ntep+4DZgBHXr1mPJErjllkK9tIiIyEVpmTqPM2egR48MVq26F5gDfE+PHvDRRypiEREpOirjbKdP\nQ/fuaaxd2w9YCLzAY489xbJlUKFCUFKKiIhclJapgVOnoHv3LDZsuBtIBF5i4sQxPPNM0BKKiIjk\nq1jPjJ2DFSugWTPYsKEE0Bb4A889pyIWEZHQKbYz4+3bYcwYWLPmNPAPoCkwlilTYMKEoggpIiJy\nccVyZjx5sveBrDVrTgJdgfaUL3+UV19VEYuISOgVu5nx9Onw5JMAx/GKeDPx8fNYuLAiVasWYUYR\nEZF8FKuZ8fLl8PDDAEeBjpht4cUXF7F2bX8VsYiI+Cb6ZsaxsRc9ZOtW6NcPsrIAXsRsG2+99Q59\n+/YMSUQREZH8FIuZ8ZdfQvfu3u8SA9Su/Qx/+tP7KmIREQkLUV/G3sU84NChg0BvKlT4lqSkUtx5\nZ4vQZhQREclH9C1TX1DGjzwCO3bsB+KAAzz//G5uvLFaSOOJiIhcSlTPjBMTYcaMr/Au5vFPxo9P\nZvjw1iGPJyIicimRPzPOzPQupQVgBjExABw8CIMHfwm0A47Tvv1apkzR0rSIiISfyJ8ZX2RWnJUF\ngwfD0aNlgWupWnUdixe3wMyXhCIiIpcU+TPji5Txk09+yerVNYFqwHssWmRUquRLOhERkQIFNDM2\nsy5mttPMUs3sBxeMNLPSZrYo+/ktZlY72EHzlaeMXalSDB36N6ZMaQmMBmD8eKN9+5ClERERuWwF\nlrGZxQCvAHcCDYH+ZtbwgsOGAkedczcA04CpwQ6arzxlvOkkJCS0B2KBX9K8OTz7bMiSiIiIXJFA\nZsYtgFTn3B7nXBrwFtDrgmN6AW9kP14MxJuF6AxtdhlvBXplHAN+DGyka9f6JCfne0EuERGRsBFI\nGdcAvs6zvT9730WPcc5lAMeAysEIWJAli9I5B/QGriIG2MSvf12X5cvReWIREYkIIf0Al5kNA4YB\nXHfddUF5zXq10ykNvA1kWE1OrqxFly5BeWkREZGQCKSMvwGuzbNdM3vfxY7Zb2YlgauB7y98Iefc\nLGAWQPPmzd2VBL5Q4zsqsq3rE+z8OJ0egytRSUUsIiIRJpAy3grUM7M6eKXbDxhwwTGJwCDgQ6AP\nsN45F5SyLVDVqjRdMYlGGQXeylhERCQsFVjGzrkMMxsJJAMxQIJzbruZPQt85JxLBGYD88wsFTiC\nV9ghY6YiFhGRyBXQOWPn3Epg5QX7ns7z+Cxwd3CjiYiIFA+RfzlMERGRCKcyFhER8ZnKWERExGcq\nYxEREZ+pjEVERHymMhYREfGZylhERMRnKmMRERGfqYxFRER8pjIWERHxmcpYRETEZypjERERn6mM\nRUREfKYyFhER8ZnKWERExGfmnPPnjc2+A74K4ktWAQ4H8fWKK41j4WkMC09jWHgaw8IL9hjWcs5V\nvdgTvpVxsJnZR8655n7niHQax8LTGBaexrDwNIaFF8ox1DK1iIiIz1TGIiIiPoumMp7ld4AooXEs\nPI1h4WkMC09jWHghG8OoOWcsIiISqaJpZiwiIhKRIq6MzayLme00s1Qzm3CR50ub2aLs57eYWe3Q\npwxvAYzhWDPbYWafmdk6M6vlR85wVtAY5jnuLjNzZqZPtV5EIONoZn2zvx+3m9mCUGcMdwH8e77O\nzFLMbFv2v+mufuQMV2aWYGaHzOzzfJ43M/td9vh+ZmbNiiSIcy5ivoAYYDdwPRAL/BVoeMExI4CZ\n2Y/7AYv8zh1OXwGOYXugbPbj4RrDyx/D7OPKA5uAzUBzv3OH21eA34v1gG1Axezta/zOHU5fAY7h\nLGB49uOGwF6/c4fTF3AH0Az4PJ/nuwKrAANaAVuKIkekzYxbAKnOuT3OuTTgLaDXBcf0At7IfrwY\niDczC2HGcFfgGDrnUpxzp7M3NwM1Q5wx3AXyfQjwHDAVOBvKcBEkkHH8H+AV59xRAOfcoRBnDHeB\njKEDrsp+fDVwIIT5wp5zbhNw5BKH9ALmOs9moIKZVQ92jkgr4xrA13m292fvu+gxzrkM4BhQOSTp\nIkMgY5jXULz/FUquAscweynrWufcn0IZLMIE8r1YH6hvZn82s81m1iVk6SJDIGM4ERhoZvuBlcDD\noYkWNS73Z+YVKRnsF5ToYWYDgeZAW7+zRBIzKwG8BAz2OUo0KIm3VN0Ob4Vmk5k1ds79y9dUkaU/\nMMc596KZ3Q7MM7NGzrksv4NJrkibGX8DXJtnu2b2voseY2Yl8ZZlvg9JusgQyBhiZh2AJ4Cezrlz\nIcoWKQoaw/JAI2CDme3FO8+UqA9x/UAg34v7gUTnXLpz7kvgH3jlLJ5AxnAo8EcA59yHQBm8ay5L\nYAL6mVlYkVbGW4F6ZlbHzGLxPqCVeMExicCg7Md9gPUu+yy8AAGMoZndCryKV8Q6R/dDlxxD59wx\n51wV51xt51xtvPPuPZ1zH/kTN2wF8u95Gd6sGDOrgrdsvSeUIcNcIGO4D4gHMLOb8Mr4u5CmjGyJ\nwC+yP1XdCjjmnDsY7DeJqGVq51yGmY0EkvE+RZjgnNtuZs8CHznnEoHZeMswqXgn5fv5lzj8BDiG\n/wuUA97O/uzbPudcT99Ch5kAx1AKEOA4JgOdzGwHkAmMc85ppStbgGP4K+A1MxuD92GuwZqg5DKz\nhXj/4auSfV79GaAUgHNuJt559q5AKnAaGFIkOfR3IiIi4q9IW6YWERGJOipjERERn6mMRUREfKYy\nFhER8ZnKWERExGcqYxEREZ+pjEVERHymMhYREfHZ/wPNSlaT2CEdcgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 576x576 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "results = pd.concat(dfs)\n",
+    "import statsmodels.api as sm\n",
+    "grid = np.linspace(0, 1, 101)\n",
+    "fig = plt.figure(figsize=(8, 8))\n",
+    "plt.plot(grid, sm.distributions.ECDF(results['pivot'])(grid), 'b-', linewidth=3, label='Pivot')\n",
+    "plt.plot(grid, sm.distributions.ECDF(results['pvalue'])(grid), 'r-', linewidth=3, label='P-value')\n",
+    "plt.plot([0, 1], [0, 1], 'k--')\n",
+    "plt.legend(fontsize=15);"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "all,-slideshow",
+   "formats": "ipynb,Rmd"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/doc/source/algorithms/index.rst b/doc/source/algorithms/index.rst
index 245c9e2eb..1f08e567d 100644
--- a/doc/source/algorithms/index.rst
+++ b/doc/source/algorithms/index.rst
@@ -10,3 +10,5 @@ post-selection inference.
 
    covtest.ipynb
    spacings
+   LASSO.ipynb
+   ROSI.ipynb
\ No newline at end of file
diff --git a/selectinf/algorithms/api.py b/selectinf/algorithms/api.py
index cf5391f1c..f15caa897 100644
--- a/selectinf/algorithms/api.py
+++ b/selectinf/algorithms/api.py
@@ -1,4 +1,5 @@
 from .lasso import (lasso, 
+                    ROSI,
                     data_carving as data_carving_lasso, 
                     additive_noise as additive_noise_lasso)
 

From 93636b1bc4c6b7a5c1160de67b2812f15ff7eb8a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Sep 2019 09:59:25 -0700
Subject: [PATCH 006/187] updating install instructions

---
 doc/source/download.rst | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/doc/source/download.rst b/doc/source/download.rst
index 6aef2651a..5858ba0ee 100644
--- a/doc/source/download.rst
+++ b/doc/source/download.rst
@@ -17,13 +17,20 @@ Selection depends on the following Python tools
 
 * `Pandas <http://www.pandas.org>`_
 
+The package can be installed via pip
+
+     pip install selectinf
+
+Development
+~~~~~~~~~~~
+
 You can clone the selection repo using::
 
      git clone https://github.com/selective-inference/Python-software.git
 
 Then installation is a simple call to python::
 
-     cd selection
+     cd selectinf
      git submodule update --init
      pip install -r requirements.txt
      python setup.py install --prefix=MYDIR
@@ -41,3 +48,10 @@ There is a small but growing suite of tests that be easily checked using `nose <
      cd tmp
      nosetests -v selectinf
 
+Building documentation
+----------------------
+
+     cd doc
+     make html
+
+To upload a fresh build of the documentation to your :code:`gh-pages` branch, use :code:`make github`.

From 6dc72a49fb128261ee1ff55edb5ee60b63708ee2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 25 Sep 2019 10:03:24 -0700
Subject: [PATCH 007/187] change of title

---
 doc/source/algorithms/LASSO.Rmd     | 2 +-
 doc/source/algorithms/LASSO.ipynb   | 2 +-
 doc/source/algorithms/ROSI.Rmd      | 2 +-
 doc/source/algorithms/ROSI.ipynb    | 2 +-
 doc/source/algorithms/covtest.ipynb | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/algorithms/LASSO.Rmd b/doc/source/algorithms/LASSO.Rmd
index 770d31eda..c0ad171f7 100644
--- a/doc/source/algorithms/LASSO.Rmd
+++ b/doc/source/algorithms/LASSO.Rmd
@@ -14,7 +14,7 @@ jupyter:
     name: python3
 ---
 
-# Conditioning on signs and active set
+# LASSO when conditioning on signs and active set
 
 One of the first works in this line of conditional inference
 is [Lee et al.](projecteuclid.org/euclid.aos/1460381681) which
diff --git a/doc/source/algorithms/LASSO.ipynb b/doc/source/algorithms/LASSO.ipynb
index 7e505805f..8c15520d4 100644
--- a/doc/source/algorithms/LASSO.ipynb
+++ b/doc/source/algorithms/LASSO.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Conditioning on signs and active set\n",
+    "# LASSO when conditioning on signs and active set\n",
     "\n",
     "One of the first works in this line of conditional inference\n",
     "is [Lee et al.](projecteuclid.org/euclid.aos/1460381681) which\n",
diff --git a/doc/source/algorithms/ROSI.Rmd b/doc/source/algorithms/ROSI.Rmd
index b53c6a8c5..9ed0517e1 100644
--- a/doc/source/algorithms/ROSI.Rmd
+++ b/doc/source/algorithms/ROSI.Rmd
@@ -14,7 +14,7 @@ jupyter:
     name: python3
 ---
 
-# Conditioning on less: ROSI
+# LASSO when conditioning on less: ROSI
 
 Instead of conditioning on the active set and signs, 
 one can work in the full model and for each feature $j$ selected
diff --git a/doc/source/algorithms/ROSI.ipynb b/doc/source/algorithms/ROSI.ipynb
index 0ca401472..11996ef3d 100644
--- a/doc/source/algorithms/ROSI.ipynb
+++ b/doc/source/algorithms/ROSI.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Conditioning on less: ROSI\n",
+    "# LASSO when conditioning on less: ROSI\n",
     "\n",
     "Instead of conditioning on the active set and signs, \n",
     "one can work in the full model and for each feature $j$ selected\n",
diff --git a/doc/source/algorithms/covtest.ipynb b/doc/source/algorithms/covtest.ipynb
index 13ec59bfe..7a2aa98ed 100644
--- a/doc/source/algorithms/covtest.ipynb
+++ b/doc/source/algorithms/covtest.ipynb
@@ -416,5 +416,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 2
 }

From f3c5a36e380ed37a5fed4866b7a4e7a79e3f2d6a Mon Sep 17 00:00:00 2001
From: jonathan-taylor <jtaylo@stanford.edu>
Date: Thu, 21 Nov 2019 12:28:59 -0800
Subject: [PATCH 008/187] updates to python scripts to selectinf

---
 doc/learning_examples/BH/gbm_targets_BH.py    |  57 +++++--
 .../BH/gbm_targets_BH_single.py               | 119 ++++++++++++++
 .../BH/gbm_targets_BH_single_5000.py          | 119 ++++++++++++++
 .../BH/logit_targets_BH_single_5000.py        | 120 ++++++++++++++
 .../knockoffs/knockoff_followup.py            | 106 +++++++------
 .../knockoffs/knockoff_followup_6000.py       | 141 +++++++++++++++++
 .../knockoffs/knockoff_kernel.py              |   1 -
 .../knockoffs/knockoff_kernel_multi.py        |  53 +++++--
 .../knockoffs/knockoff_kernel_multi_5000.py   | 117 ++++++++++++++
 .../knockoffs/knockoff_kernel_multi_8000.py   | 117 ++++++++++++++
 .../knockoffs/knockoff_kernel_multi_gbm.py    |  90 +++++++++++
 .../multi_target/lasso_multi.py               | 120 ++++++++++++++
 ..._example_multi_CV.py => lasso_multi_CV.py} |  12 +-
 ...lti_bigger.py => lasso_multi_CV_bigger.py} |  46 +++---
 .../multi_target/lasso_multi_CV_gbm.py        |  84 ++++++++++
 .../multi_target/lasso_multi_CV_split.py      | 149 ++++++++++++++++++
 .../multi_target/lasso_multi_bigger.py        | 135 ++++++++++++++++
 .../multi_target/lasso_multi_logit.py         | 134 ++++++++++++++++
 .../multi_target/lee_multi.py                 |  18 ++-
 .../stability/stability_selection_harder.py   |  12 +-
 .../stability_selection_harder_5000.py        | 102 ++++++++++++
 .../stability_selection_harder_big.py         |  12 +-
 22 files changed, 1735 insertions(+), 129 deletions(-)
 create mode 100644 doc/learning_examples/BH/gbm_targets_BH_single.py
 create mode 100644 doc/learning_examples/BH/gbm_targets_BH_single_5000.py
 create mode 100644 doc/learning_examples/BH/logit_targets_BH_single_5000.py
 create mode 100644 doc/learning_examples/knockoffs/knockoff_followup_6000.py
 create mode 100644 doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py
 create mode 100644 doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py
 create mode 100644 doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py
 create mode 100644 doc/learning_examples/multi_target/lasso_multi.py
 rename doc/learning_examples/multi_target/{lasso_example_multi_CV.py => lasso_multi_CV.py} (85%)
 rename doc/learning_examples/multi_target/{lasso_example_multi_bigger.py => lasso_multi_CV_bigger.py} (60%)
 create mode 100644 doc/learning_examples/multi_target/lasso_multi_CV_gbm.py
 create mode 100644 doc/learning_examples/multi_target/lasso_multi_CV_split.py
 create mode 100644 doc/learning_examples/multi_target/lasso_multi_bigger.py
 create mode 100644 doc/learning_examples/multi_target/lasso_multi_logit.py
 create mode 100644 doc/learning_examples/stability/stability_selection_harder_5000.py

diff --git a/doc/learning_examples/BH/gbm_targets_BH.py b/doc/learning_examples/BH/gbm_targets_BH.py
index 7d107c109..f9fd6150b 100644
--- a/doc/learning_examples/BH/gbm_targets_BH.py
+++ b/doc/learning_examples/BH/gbm_targets_BH.py
@@ -5,11 +5,11 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import normal_sampler, gbm_fit_sk
-from selection.learning.learners import mixture_learner
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, gbm_fit_sk
+from selectinf.learning.learners import mixture_learner
 mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
 
 def BHfilter(pval, q=0.2):
@@ -22,9 +22,7 @@ def BHfilter(pval, q=0.2):
         return np.nonzero(pval <= thresh)[0]
     return []
 
-def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
-
-    # description of statistical problem
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
 
     X, y, truth = gaussian_instance(n=n,
                                     p=p, 
@@ -36,6 +34,23 @@ def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
                                     random_signs=True,
                                     scale=False)[:3]
 
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+
     XTX = X.T.dot(X)
     XTXi = np.linalg.inv(XTX)
     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
@@ -81,19 +96,35 @@ def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
     import matplotlib.pyplot as plt
     import pandas as pd
 
-    for i in range(500):
-        df = simulate(B=40000)
-        csvfile = 'gbm_targets_BH.csv'
+    U = np.linspace(0, 1, 101)
+    plt.clf()
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=20000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
         outbase = csvfile[:-4]
 
-        if df is not None and i > 0:
+        if df is not None:
 
-            try: # concatenate to disk
+            try:
                 df = pd.concat([df, pd.read_csv(csvfile)])
             except FileNotFoundError:
                 pass
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_ax, length_ax = pivot_plot(df, outbase)
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
 
diff --git a/doc/learning_examples/BH/gbm_targets_BH_single.py b/doc/learning_examples/BH/gbm_targets_BH_single.py
new file mode 100644
index 000000000..bc13e149d
--- /dev/null
+++ b/doc/learning_examples/BH/gbm_targets_BH_single.py
@@ -0,0 +1,119 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, gbm_fit_sk
+from selectinf.learning.learners import mixture_learner
+mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
+
+def BHfilter(pval, q=0.2):
+    pval = np.asarray(pval)
+    pval_sort = np.sort(pval)
+    comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0]
+    passing = pval_sort < comparison
+    if passing.sum():
+        thresh = comparison[np.nonzero(passing)[0].max()]
+        return np.nonzero(pval <= thresh)[0]
+    return []
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n, p=p, s=s, signal=signal, sigma=sigma)
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
+        global counter
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        soln = XTXi.dot(noisy_S)
+        solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
+        pval = ndist.cdf(solnZ)
+        pval = 2 * np.minimum(pval, 1 - pval)
+        return set(BHfilter(pval, q=0.2))
+
+    lam = 4. * np.sqrt(n)
+    selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=gbm_fit_sk,
+                                fit_args={'n_estimators':500},
+                                how_many=1)
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+            df['R2'] = np.ones(df.shape[0]) * R2mean
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/BH/gbm_targets_BH_single_5000.py b/doc/learning_examples/BH/gbm_targets_BH_single_5000.py
new file mode 100644
index 000000000..97891ef2e
--- /dev/null
+++ b/doc/learning_examples/BH/gbm_targets_BH_single_5000.py
@@ -0,0 +1,119 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, gbm_fit_sk
+from selectinf.learning.learners import mixture_learner
+mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
+
+def BHfilter(pval, q=0.2):
+    pval = np.asarray(pval)
+    pval_sort = np.sort(pval)
+    comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0]
+    passing = pval_sort < comparison
+    if passing.sum():
+        thresh = comparison[np.nonzero(passing)[0].max()]
+        return np.nonzero(pval <= thresh)[0]
+    return []
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n, p=p, s=s, signal=signal, sigma=sigma)
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
+        global counter
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        soln = XTXi.dot(noisy_S)
+        solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
+        pval = ndist.cdf(solnZ)
+        pval = 2 * np.minimum(pval, 1 - pval)
+        return set(BHfilter(pval, q=0.2))
+
+    lam = 4. * np.sqrt(n)
+    selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=gbm_fit_sk,
+                                fit_args={'n_estimators':500},
+                                how_many=1)
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=5000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+            df['R2'] = np.ones(df.shape[0]) * R2mean
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/BH/logit_targets_BH_single_5000.py b/doc/learning_examples/BH/logit_targets_BH_single_5000.py
new file mode 100644
index 000000000..48e9a57d6
--- /dev/null
+++ b/doc/learning_examples/BH/logit_targets_BH_single_5000.py
@@ -0,0 +1,120 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler
+from selectinf.learning.Rfitters import logit_fit
+from selectinf.learning.learners import mixture_learner
+mixture_learner.scales = [1]*10 + [1.5,2,3,4,5,10]
+
+def BHfilter(pval, q=0.2):
+    pval = np.asarray(pval)
+    pval_sort = np.sort(pval)
+    comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0]
+    passing = pval_sort < comparison
+    if passing.sum():
+        thresh = comparison[np.nonzero(passing)[0].max()]
+        return np.nonzero(pval <= thresh)[0]
+    return []
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=1000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n, p=p, s=s, signal=signal, sigma=sigma)
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(XTX, XTXi, dispersion, lam, sampler):
+        global counter
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        soln = XTXi.dot(noisy_S)
+        solnZ = soln / (np.sqrt(np.diag(XTXi)) * np.sqrt(dispersion))
+        pval = ndist.cdf(solnZ)
+        pval = 2 * np.minimum(pval, 1 - pval)
+        return set(BHfilter(pval, q=0.2))
+
+    lam = 4. * np.sqrt(n)
+    selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, dispersion, lam)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=logit_fit,
+                                fit_args={'df':20},
+                                how_many=1)
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=5000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+            df['R2'] = np.ones(df.shape[0]) * R2mean
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/knockoffs/knockoff_followup.py b/doc/learning_examples/knockoffs/knockoff_followup.py
index 3978af5f3..a19fc6e3c 100644
--- a/doc/learning_examples/knockoffs/knockoff_followup.py
+++ b/doc/learning_examples/knockoffs/knockoff_followup.py
@@ -5,19 +5,16 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.Rutils import lasso_glmnet
-from selection.learning.utils import (full_model_inference, 
-                                   pivot_plot,
-                                   naive_full_model_inference)
-from selection.learning.core import split_sampler, keras_fit
+from selectinf.learning.Rutils import lasso_glmnet
+from selectinf.learning.utils import (full_model_inference, 
+                                      pivot_plot,
+                                      split_full_model_inference)
+from selectinf.learning.core import normal_sampler, keras_fit
 
-def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0):
+def generate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, **ignored):
 
-    # description of statistical problem
-
-    np.random.seed(seed)
     X, y, truth = gaussian_instance(n=n,
                                     p=p, 
                                     s=s,
@@ -26,8 +23,24 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0):
                                     sigma=sigma,
                                     signal=signal,
                                     random_signs=True,
-                                    scale=False,
-                                    center=False)[:3]
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), 
+             sigma=2, alpha=0.1,B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
 
     dispersion = sigma**2
 
@@ -35,12 +48,12 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0):
     covS = dispersion * X.T.dot(X)
     smooth_sampler = normal_sampler(S, covS)
 
+
     def meta_algorithm(X, XTXi, resid, sampler):
 
         n, p = X.shape
 
-        idx = np.random.choice(np.arange(n), 200, replace=False)
-
+        idx = np.random.choice(np.arange(n), int(n/2), replace=False)
         S = sampler(scale=0.) # deterministic with scale=0
         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
         Xidx, yidx = X[idx], y[idx]
@@ -66,45 +79,25 @@ def meta_algorithm(X, XTXi, resid, sampler):
                               y,
                               truth,
                               selection_algorithm,
-                              splitting_sampler,
+                              smooth_sampler,
                               success_params=(8, 10),
                               B=B,
                               fit_probability=keras_fit,
-                              fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'},
-                              fit_args={'df':20})
+                              fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
 
     if df is not None:
 
+        idx2 = np.random.choice(np.arange(n), int(n/2), replace=False)
         observed_set = list(df['variable'])
-        true_target = truth[observed_set]
-
-        np.random.seed(seed)
-        X2, _, _ = gaussian_instance(n=n,
-                                     p=p, 
-                                     s=s,
-                                     equicorrelated=False,
-                                     rho=0.5, 
-                                     sigma=sigma,
-                                     signal=signal,
-                                     random_signs=True,
-                                     center=False,
-                                     scale=False)[:3]
-        stage_1 = np.random.choice(np.arange(n), 200, replace=False)
-        stage_2 = sorted(set(range(n)).difference(stage_1))
-        X2 = X2[stage_2]
-        y2 = X2.dot(truth) + sigma * np.random.standard_normal(X2.shape[0])
-
-        XTXi_2 = np.linalg.inv(X2.T.dot(X2))
-        resid2 = y2 - X2.dot(XTXi_2.dot(X2.T.dot(y2)))
-        dispersion_2 = np.linalg.norm(resid2)**2 / (X2.shape[0] - X2.shape[1])
-
-        naive_df = naive_full_model_inference(X2,
-                                              y2,
-                                              dispersion_2,
+        split_df = split_full_model_inference(X,
+                                              y,
+                                              idx2,
+                                              None, # ignored dispersion
+                                              truth,
                                               observed_set,
                                               alpha=alpha)
 
-        df = pd.merge(df, naive_df, on='variable')
+        df = pd.merge(df, split_df, on='variable')
         return df
 
 if __name__ == "__main__":
@@ -112,13 +105,27 @@ def meta_algorithm(X, XTXi, resid, sampler):
     import matplotlib.pyplot as plt
     import pandas as pd
 
-    iseed = int(np.fabs(np.random.standard_normal() * 1000))
-    for i in range(500):
-        df = simulate(seed=i + iseed)
-        csvfile = 'knockoff_followup.csv'
+    opts = dict(n=2000, p=100, s=10, 
+                signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, 
+                alpha=0.1, B=3000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '_2000_idx.csv'
         outbase = csvfile[:-4]
 
-        if df is not None and i > 0:
+        if df is not None:
 
             try:
                 df = pd.concat([df, pd.read_csv(csvfile)])
@@ -127,5 +134,6 @@ def meta_algorithm(X, XTXi, resid, sampler):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_plot(df, outbase)
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
 
diff --git a/doc/learning_examples/knockoffs/knockoff_followup_6000.py b/doc/learning_examples/knockoffs/knockoff_followup_6000.py
new file mode 100644
index 000000000..57a8d8649
--- /dev/null
+++ b/doc/learning_examples/knockoffs/knockoff_followup_6000.py
@@ -0,0 +1,141 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.Rutils import lasso_glmnet
+from selectinf.learning.utils import (full_model_inference, 
+                                      pivot_plot,
+                                      split_full_model_inference)
+from selectinf.learning.core import normal_sampler, keras_fit
+from selectinf.learning.fitters import gbm_fit_sk
+
+def generate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=2000, p=100, s=10, signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), 
+             sigma=2, alpha=0.1,B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+
+    def meta_algorithm(X, XTXi, resid, sampler):
+
+        n, p = X.shape
+        idx = np.random.choice(np.arange(n), int(n/2), replace=False)
+
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        Xidx, yidx = X[idx], y[idx]
+        rho = 0.8
+
+        Xnew = rho * Xidx + np.sqrt(1 - rho**2) * np.random.standard_normal(Xidx.shape)
+
+        X_full = np.hstack([Xidx, Xnew])
+        beta_full = np.linalg.pinv(X_full).dot(yidx)
+        winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:]
+        return set(np.nonzero(winners)[0])
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+
+    # run selection algorithm
+
+    df = full_model_inference(X,
+                              y,
+                              truth,
+                              selection_algorithm,
+                              smooth_sampler,
+                              success_params=(8, 10),
+                              B=B,
+                              fit_probability=gbm_fit_sk,
+                              fit_args={'n_estimators':1000}
+                              )
+
+    if df is not None:
+
+        observed_set = list(df['variable'])
+        idx2 = np.random.choice(np.arange(n), int(n/2), replace=False)
+        split_df = split_full_model_inference(X,
+                                              y,
+                                              idx2,
+                                              None, # ignored dispersion
+                                              truth,
+                                              observed_set,
+                                              alpha=alpha)
+
+        df = pd.merge(df, split_df, on='variable')
+        return df
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    opts = dict(n=2000, p=100, s=10, 
+                signal=(np.sqrt(2)*0.5, np.sqrt(2)*1), sigma=2, 
+                alpha=0.1, B=6000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '_gbm.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None:
+
+            try:
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/knockoffs/knockoff_kernel.py b/doc/learning_examples/knockoffs/knockoff_kernel.py
index 1ac91d8c7..d979566a9 100644
--- a/doc/learning_examples/knockoffs/knockoff_kernel.py
+++ b/doc/learning_examples/knockoffs/knockoff_kernel.py
@@ -14,7 +14,6 @@ def simulate(n=1000, p=50, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=
 
     # description of statistical problem
 
-    np.random.seed(seed)
     X, y, truth = gaussian_instance(n=n,
                                     p=p, 
                                     s=s,
diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi.py
index a6e438cdd..2fdac03b5 100644
--- a/doc/learning_examples/knockoffs/knockoff_kernel_multi.py
+++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi.py
@@ -5,16 +5,13 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import normal_sampler, keras_fit
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit
 
-def simulate(n=1000, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=5000):
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
 
-    # description of statistical problem
-
-    np.random.seed(seed)
     X, y, truth = gaussian_instance(n=n,
                                     p=p, 
                                     s=s,
@@ -23,8 +20,23 @@ def simulate(n=1000, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B
                                     sigma=sigma,
                                     signal=signal,
                                     random_signs=True,
-                                    scale=False,
-                                    center=False)[:3]
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
 
     dispersion = sigma**2
 
@@ -71,10 +83,23 @@ def meta_algorithm(X, XTXi, resid, sampler):
     import matplotlib.pyplot as plt
     import pandas as pd
 
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+
     iseed = int(np.fabs(np.random.standard_normal() * 50000))
-    for i in range(500):
-        df = simulate(seed=i + iseed, B=3000)
-        csvfile = 'knockoff_kernel_multi.csv'
+    for i in range(2000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '_200.csv'
         outbase = csvfile[:-4]
 
         if df is not None and i > 0:
@@ -86,6 +111,6 @@ def meta_algorithm(X, XTXi, resid, sampler):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_ax, length_ax = pivot_plot(df, outbase)
-
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
 
diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py
new file mode 100644
index 000000000..031cc0fb5
--- /dev/null
+++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi_5000.py
@@ -0,0 +1,117 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(X, XTXi, resid, sampler):
+
+        n, p = X.shape
+
+        rho = 0.8
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape)
+
+        X_full = np.hstack([X, Xnew])
+        beta_full = np.linalg.pinv(X_full).dot(ynew)
+        winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:]
+        return set(np.nonzero(winners)[0])
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(8, 10),
+                                B=B,
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), 
+                sigma=2, alpha=0.1, B=5000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+
+    iseed = int(np.fabs(np.random.standard_normal() * 50000))
+    for i in range(2000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '_200.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py
new file mode 100644
index 000000000..8b4035d26
--- /dev/null
+++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi_8000.py
@@ -0,0 +1,117 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(X, XTXi, resid, sampler):
+
+        n, p = X.shape
+
+        rho = 0.8
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape)
+
+        X_full = np.hstack([X, Xnew])
+        beta_full = np.linalg.pinv(X_full).dot(ynew)
+        winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:]
+        return set(np.nonzero(winners)[0])
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(8, 10),
+                                B=B,
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    opts = dict(n=2000, p=100, s=10, signal=(0.5, 1), 
+                sigma=2, alpha=0.1, B=8000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+
+    iseed = int(np.fabs(np.random.standard_normal() * 50000))
+    for i in range(2000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '_2000.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py b/doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py
new file mode 100644
index 000000000..4f834ec7b
--- /dev/null
+++ b/doc/learning_examples/knockoffs/knockoff_kernel_multi_gbm.py
@@ -0,0 +1,90 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False,
+                                    center=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(X, XTXi, resid, sampler):
+
+        n, p = X.shape
+
+        rho = 0.8
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        Xnew = rho * X + np.sqrt(1 - rho**2) * np.random.standard_normal(X.shape)
+
+        X_full = np.hstack([X, Xnew])
+        beta_full = np.linalg.pinv(X_full).dot(ynew)
+        winners = np.fabs(beta_full)[:p] > np.fabs(beta_full)[p:]
+        return set(np.nonzero(winners)[0])
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(8, 10),
+                                B=B,
+                                fit_probability=gbm_fit_sk,
+                                fit_args={'n_estimators':1000})
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    iseed = int(np.fabs(np.random.standard_normal() * 50000))
+    for i in range(2000):
+        df = simulate(seed=i + iseed, B=3000)
+        csvfile = 'knockoff_kernel_multi_gbm.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                pivot_plot(df, outbase)
+
+
diff --git a/doc/learning_examples/multi_target/lasso_multi.py b/doc/learning_examples/multi_target/lasso_multi.py
new file mode 100644
index 000000000..ba3754c8b
--- /dev/null
+++ b/doc/learning_examples/multi_target/lasso_multi.py
@@ -0,0 +1,120 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk
+from selectinf.learning.Rutils import lasso_glmnet
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(X, XTXi, resid, lam, sampler):
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
+        problem = rr.simple_problem(loss, pen)
+        soln = problem.solve(max_its=100, tol=1.e-10)
+        success += soln != 0
+        return set(np.nonzero(success)[0])
+        
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    lam = 4. * np.sqrt(n)
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, lam)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
+
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    U = np.linspace(0, 1, 101)
+    plt.clf()
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None:
+
+            try:
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/multi_target/lasso_example_multi_CV.py b/doc/learning_examples/multi_target/lasso_multi_CV.py
similarity index 85%
rename from doc/learning_examples/multi_target/lasso_example_multi_CV.py
rename to doc/learning_examples/multi_target/lasso_multi_CV.py
index 7daf55c83..14d407608 100644
--- a/doc/learning_examples/multi_target/lasso_example_multi_CV.py
+++ b/doc/learning_examples/multi_target/lasso_multi_CV.py
@@ -5,11 +5,11 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import split_sampler, keras_fit
-from selection.learning.Rutils import lasso_glmnet
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, keras_fit
+from selectinf.learning.Rutils import lasso_glmnet
 
 def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
 
@@ -48,6 +48,8 @@ def meta_algorithm(X, XTXi, resid, sampler):
 
     # run selection algorithm
 
+    print('SNR', np.linalg.norm(X.dot(truth)) / np.linalg.norm(y-X.dot(truth)))
+    print('R2', 1 - np.linalg.norm(y-X.dot(truth))**2 / np.linalg.norm(y)**2)
     return full_model_inference(X,
                                 y,
                                 truth,
@@ -66,7 +68,7 @@ def meta_algorithm(X, XTXi, resid, sampler):
     U = np.linspace(0, 1, 101)
     plt.clf()
 
-    for i in range(500):
+    for i in range(1000):
         df = simulate()
         csvfile = 'lasso_multi_CV.csv'
         outbase = csvfile[:-4]
diff --git a/doc/learning_examples/multi_target/lasso_example_multi_bigger.py b/doc/learning_examples/multi_target/lasso_multi_CV_bigger.py
similarity index 60%
rename from doc/learning_examples/multi_target/lasso_example_multi_bigger.py
rename to doc/learning_examples/multi_target/lasso_multi_CV_bigger.py
index 19cabbf6c..4f43caa7e 100644
--- a/doc/learning_examples/multi_target/lasso_example_multi_bigger.py
+++ b/doc/learning_examples/multi_target/lasso_multi_CV_bigger.py
@@ -5,10 +5,11 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import split_sampler, keras_fit
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk
+from selectinf.learning.Rutils import lasso_glmnet
 
 def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000):
 
@@ -29,31 +30,22 @@ def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000):
     S = X.T.dot(y)
     covS = dispersion * X.T.dot(X)
     smooth_sampler = normal_sampler(S, covS)
-    splitting_sampler = split_sampler(X * y[:, None], covS)
 
-    def meta_algorithm(XTX, XTXi, lam, sampler):
+    def meta_algorithm(X, XTXi, resid, sampler):
 
-        p = XTX.shape[0]
-        success = np.zeros(p)
-
-        loss = rr.quadratic_loss((p,), Q=XTX)
-        pen = rr.l1norm(p, lagrange=lam)
-
-        scale = 0.
-        noisy_S = sampler(scale=scale)
-        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
-        problem = rr.simple_problem(loss, pen)
-        soln = problem.solve(max_its=100, tol=1.e-10)
-        success += soln != 0
-        return set(np.nonzero(success)[0])
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        G = lasso_glmnet(X, ynew, *[None]*4)
+        select = G.select()
+        print(select)
+        return set(list(select[0]))
 
     XTX = X.T.dot(X)
     XTXi = np.linalg.inv(XTX)
     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
     dispersion = np.linalg.norm(resid)**2 / (n-p)
                          
-    lam = 5. * np.sqrt(n)
-    selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi, lam)
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
 
     # run selection algorithm
 
@@ -61,11 +53,11 @@ def meta_algorithm(XTX, XTXi, lam, sampler):
                                 y,
                                 truth,
                                 selection_algorithm,
-                                splitting_sampler,
+                                smooth_sampler,
                                 success_params=(1, 1),
                                 B=B,
-                                fit_probability=logit_fit,
-                                fit_args={'df':20})
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
 
 
 if __name__ == "__main__":
@@ -76,9 +68,9 @@ def meta_algorithm(XTX, XTXi, lam, sampler):
     U = np.linspace(0, 1, 101)
     plt.clf()
 
-    for i in range(500):
-        df = simulate(B=4000)
-        csvfile = 'lasso_multi_bigger.csv'
+    for i in range(2000):
+        df = simulate(B=3000)
+        csvfile = __file__[:-3] + '.csv'
         outbase = csvfile[:-4]
 
         if df is not None and i > 0:
@@ -90,4 +82,4 @@ def meta_algorithm(XTX, XTXi, lam, sampler):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_ax, length_ax = pivot_plot(df, outbase)
+                pivot_plot(df, outbase)
diff --git a/doc/learning_examples/multi_target/lasso_multi_CV_gbm.py b/doc/learning_examples/multi_target/lasso_multi_CV_gbm.py
new file mode 100644
index 000000000..73e4f14a8
--- /dev/null
+++ b/doc/learning_examples/multi_target/lasso_multi_CV_gbm.py
@@ -0,0 +1,84 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, gbm_fit_sk
+from selectinf.learning.Rutils import lasso_glmnet
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    splitting_sampler = split_sampler(X * y[:, None], covS)
+
+    def meta_algorithm(X, XTXi, resid, sampler):
+
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        G = lasso_glmnet(X, ynew, *[None]*4)
+        select = G.select()
+        return set(list(select[0]))
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                splitting_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=gbm_fit_sk,
+                                fit_args={'n_estimators':1000})
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    U = np.linspace(0, 1, 101)
+    plt.clf()
+
+    for i in range(2000):
+        df = simulate()
+        csvfile = 'lasso_multi_CV_gbm.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None:
+
+            try:
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                pivot_plot(df, outbase)
+
diff --git a/doc/learning_examples/multi_target/lasso_multi_CV_split.py b/doc/learning_examples/multi_target/lasso_multi_CV_split.py
new file mode 100644
index 000000000..dfa17a801
--- /dev/null
+++ b/doc/learning_examples/multi_target/lasso_multi_CV_split.py
@@ -0,0 +1,149 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, keras_fit
+from selectinf.learning.Rutils import lasso_glmnet
+from rpy2.robjects import numpy2ri
+import rpy2.robjects as rpy
+
+class lasso_glmnet_split(lasso_glmnet):
+
+    def select(self, CV=True, seed=0):
+
+        numpy2ri.activate()
+
+        rpy.r.assign('X', self.X.copy())
+        rpy.r.assign('Y', self.Y.copy())
+        rpy.r('X = as.matrix(X)')
+        rpy.r('Y = as.numeric(Y)')
+        rpy.r('n = nrow(X)')
+        rpy.r('split_ = sample(1:n, n/2, replace=FALSE)')
+        rpy.r('Xsplit_ = X[split_,]')
+        rpy.r('Ysplit_ = Y[split_]')
+        rpy.r('set.seed(%d)' % seed)
+        rpy.r('cvG = cv.glmnet(Xsplit_, Ysplit_, intercept=FALSE, standardize=FALSE)')
+        rpy.r("L1 = cvG[['lambda.min']]")
+        rpy.r("L2 = cvG[['lambda.1se']]")
+        if CV:
+            rpy.r("L = L1")
+        else:
+            rpy.r("L = 0.99 * L2")
+        rpy.r("G = glmnet(X, Y, intercept=FALSE, standardize=FALSE)")
+        n, p = self.X.shape
+        L = rpy.r('L')
+        rpy.r('B = as.numeric(coef(G, s=L, exact=TRUE, x=X, y=Y))[-1]')
+        B = np.asarray(rpy.r('B'))
+        selected = (B != 0)
+        numpy2ri.deactivate()
+        if selected.sum():
+            V = np.nonzero(selected)[0]
+            return V, V
+        else:
+            return [], []
+
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    splitting_sampler = split_sampler(X * y[:, None], covS)
+
+    def meta_algorithm(X, XTXi, resid, sampler):
+
+        S = sampler(scale=0.) # deterministic with scale=0
+        ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
+        G = lasso_glmnet_split(X, ynew, *[None]*4)
+        select = G.select()
+        return set(list(select[0]))
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                splitting_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    U = np.linspace(0, 1, 101)
+    plt.clf()
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None:
+
+            try:
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/multi_target/lasso_multi_bigger.py b/doc/learning_examples/multi_target/lasso_multi_bigger.py
new file mode 100644
index 000000000..e7f86b13d
--- /dev/null
+++ b/doc/learning_examples/multi_target/lasso_multi_bigger.py
@@ -0,0 +1,135 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk
+from selectinf.learning.Rutils import lasso_glmnet
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=2000, p=1000, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=4000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(X, XTXi, resid, lam, sampler):
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
+        problem = rr.simple_problem(loss, pen)
+        soln = problem.solve(max_its=100, tol=1.e-10)
+        success += soln != 0
+        return set(np.nonzero(success)[0])
+        
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    lam = 5. * np.sqrt(n)
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, lam)
+
+    # run selection algorithm
+
+    print('SNR', np.linalg.norm(X.dot(truth)) / np.linalg.norm(y-X.dot(truth)))
+    print('R2', 1 - np.linalg.norm(y-X.dot(truth))**2 / np.linalg.norm(y)**2)
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
+
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    U = np.linspace(0, 1, 101)
+    plt.clf()
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None:
+
+            try:
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/multi_target/lasso_multi_logit.py b/doc/learning_examples/multi_target/lasso_multi_logit.py
new file mode 100644
index 000000000..94f9cd4d1
--- /dev/null
+++ b/doc/learning_examples/multi_target/lasso_multi_logit.py
@@ -0,0 +1,134 @@
+import functools
+
+import numpy as np
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk
+from selectinf.learning.Rutils import lasso_glmnet
+from selectinf.learning.Rfitters import logit_fit
+
+def generate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, **ignored):
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    return X, y, truth
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=3000):
+
+    # description of statistical problem
+
+    X, y, truth = generate(n=n,
+                           p=p, 
+                           s=s,
+                           equicorrelated=False,
+                           rho=0.5, 
+                           sigma=sigma,
+                           signal=signal,
+                           random_signs=True,
+                           scale=False)[:3]
+
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p, 
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.5, 
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=False)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    smooth_sampler = normal_sampler(S, covS)
+
+    def meta_algorithm(X, XTXi, resid, lam, sampler):
+        p = XTX.shape[0]
+        success = np.zeros(p)
+
+        loss = rr.quadratic_loss((p,), Q=XTX)
+        pen = rr.l1norm(p, lagrange=lam)
+
+        scale = 0.
+        noisy_S = sampler(scale=scale)
+        loss.quadratic = rr.identity_quadratic(0, 0, -noisy_S, 0)
+        problem = rr.simple_problem(loss, pen)
+        soln = problem.solve(max_its=100, tol=1.e-10)
+        success += soln != 0
+        return set(np.nonzero(success)[0])
+        
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+                         
+    lam = 4. * np.sqrt(n)
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, lam)
+
+    # run selection algorithm
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                smooth_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=logit_fit,
+                                fit_args={'df':20})
+
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    U = np.linspace(0, 1, 101)
+    plt.clf()
+
+    opts = dict(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000)
+
+    R2 = []
+    for _ in range(100):
+
+        X, y, truth = generate(**opts)
+        R2.append((np.linalg.norm(y-X.dot(truth))**2, np.linalg.norm(y)**2))
+
+    R2 = np.array(R2)
+    R2mean = 1 - np.mean(R2[:,0]) / np.mean(R2[:,1])
+    print('R2', R2mean)
+
+    for i in range(5000):
+        df = simulate(**opts)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None:
+
+            try:
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
+
diff --git a/doc/learning_examples/multi_target/lee_multi.py b/doc/learning_examples/multi_target/lee_multi.py
index d81ff4cb1..2bf5a4eee 100644
--- a/doc/learning_examples/multi_target/lee_multi.py
+++ b/doc/learning_examples/multi_target/lee_multi.py
@@ -5,12 +5,13 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import (partial_model_inference, 
-                                   pivot_plot,
-                                   lee_inference)
-from selection.learning.core import normal_sampler, keras_fit
+from selectinf.learning.utils import (partial_model_inference, 
+                                      pivot_plot,
+                                      lee_inference)
+from selectinf.learning.core import normal_sampler, keras_fit, gbm_fit_sk
+from selectinf.learning.learners import sparse_mixture_learner
 
 def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=8000):
 
@@ -63,11 +64,12 @@ def meta_algorithm(XTX, XTXi, lam, sampler):
                                  truth,
                                  selection_algorithm,
                                  smooth_sampler,
-                                 fit_probability=keras_fit,
-                                 fit_args={'epochs':30, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'},
+                                 fit_probability=gbm_fit_sk,
+                                 fit_args={'n_estimators':1000},
                                  success_params=(1, 1),
                                  B=B,
-                                 alpha=alpha)
+                                 alpha=alpha,
+                                 learner_klass=sparse_mixture_learner)
 
     lee_df = lee_inference(X,
                            y,
diff --git a/doc/learning_examples/stability/stability_selection_harder.py b/doc/learning_examples/stability/stability_selection_harder.py
index 2ac1a1903..f13a9006a 100644
--- a/doc/learning_examples/stability/stability_selection_harder.py
+++ b/doc/learning_examples/stability/stability_selection_harder.py
@@ -5,11 +5,11 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import split_sampler, keras_fit
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, keras_fit
 
 from sklearn.linear_model import lasso_path
 
@@ -83,9 +83,9 @@ def _alpha_grid(X, y, center, XTX):
     import matplotlib.pyplot as plt
     import pandas as pd
 
-    for i in range(500):
+    for i in range(2000):
         df = simulate(B=3000)
-        csvfile = 'stability_selection_harder.csv'
+        csvfile = __file__[:-3] + '.csv'
         outbase = csvfile[:-4]
 
         if df is not None and i > 0:
@@ -97,6 +97,6 @@ def _alpha_grid(X, y, center, XTX):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_ax, length_ax = pivot_plot(df, outbase)
+                pivot_plot(df, outbase)
 
 
diff --git a/doc/learning_examples/stability/stability_selection_harder_5000.py b/doc/learning_examples/stability/stability_selection_harder_5000.py
new file mode 100644
index 000000000..33943a72f
--- /dev/null
+++ b/doc/learning_examples/stability/stability_selection_harder_5000.py
@@ -0,0 +1,102 @@
+import functools, uuid
+
+import numpy as np, pandas as pd
+from scipy.stats import norm as ndist
+
+import regreg.api as rr
+
+from selectinf.tests.instance import gaussian_instance
+
+
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, keras_fit
+
+from sklearn.linear_model import lasso_path
+
+def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
+
+    # description of statistical problem
+
+    X, y, truth = gaussian_instance(n=n,
+                                    p=p,
+                                    s=s,
+                                    equicorrelated=False,
+                                    rho=0.1,
+                                    sigma=sigma,
+                                    signal=signal,
+                                    random_signs=True,
+                                    scale=True)[:3]
+
+    dispersion = sigma**2
+
+    S = X.T.dot(y)
+    covS = dispersion * X.T.dot(X)
+    splitting_sampler = split_sampler(X * y[:, None], covS)
+
+    def meta_algorithm(XTX, XTXi, sampler):
+
+        min_success = 6
+        ntries = 10
+
+        def _alpha_grid(X, y, center, XTX):
+            n, p = X.shape
+            alphas, coefs, _ = lasso_path(X, y, Xy=center, precompute=XTX)
+            nselected = np.count_nonzero(coefs, axis=0)
+            return alphas[nselected < np.sqrt(0.8 * p)]
+
+        alpha_grid = _alpha_grid(X, y, sampler(scale=0.), XTX)
+        success = np.zeros((p, alpha_grid.shape[0]))
+
+        for _ in range(ntries):
+            scale = 1.  # corresponds to sub-samples of 50%
+            noisy_S = sampler(scale=scale)
+            _, coefs, _ = lasso_path(X, y, Xy = noisy_S, precompute=XTX, alphas=alpha_grid)
+            success += np.abs(np.sign(coefs))
+
+        selected = np.apply_along_axis(lambda row: any(x>min_success for x in row), 1, success)
+        vars = set(np.nonzero(selected)[0])
+        return vars
+
+    XTX = X.T.dot(X)
+    XTXi = np.linalg.inv(XTX)
+    resid = y - X.dot(XTXi.dot(X.T.dot(y)))
+    dispersion = np.linalg.norm(resid)**2 / (n-p)
+
+    selection_algorithm = functools.partial(meta_algorithm, XTX, XTXi)
+
+    # run selection algorithm
+
+
+    return full_model_inference(X,
+                                y,
+                                truth,
+                                selection_algorithm,
+                                splitting_sampler,
+                                success_params=(1, 1),
+                                B=B,
+                                fit_probability=keras_fit,
+                                fit_args={'epochs':10, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
+
+
+if __name__ == "__main__":
+    import statsmodels.api as sm
+    import matplotlib.pyplot as plt
+    import pandas as pd
+
+    for i in range(2000):
+        df = simulate(B=5000)
+        csvfile = __file__[:-3] + '.csv'
+        outbase = csvfile[:-4]
+
+        if df is not None and i > 0:
+
+            try: # concatenate to disk
+                df = pd.concat([df, pd.read_csv(csvfile)])
+            except FileNotFoundError:
+                pass
+            df.to_csv(csvfile, index=False)
+
+            if len(df['pivot']) > 0:
+                pivot_plot(df, outbase)
+
+
diff --git a/doc/learning_examples/stability/stability_selection_harder_big.py b/doc/learning_examples/stability/stability_selection_harder_big.py
index e22389e6a..9fd38d909 100644
--- a/doc/learning_examples/stability/stability_selection_harder_big.py
+++ b/doc/learning_examples/stability/stability_selection_harder_big.py
@@ -5,11 +5,11 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import split_sampler, keras_fit
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, keras_fit
 
 from sklearn.linear_model import lasso_path
 
@@ -83,9 +83,9 @@ def _alpha_grid(X, y, center, XTX):
     import matplotlib.pyplot as plt
     import pandas as pd
 
-    for i in range(500):
+    for i in range(2000):
         df = simulate(B=3000)
-        csvfile = 'stability_selection_harder_big.csv'
+        csvfile = __file__[:-3] + '.csv'
         outbase = csvfile[:-4]
 
         if df is not None and i > 0:
@@ -97,6 +97,6 @@ def _alpha_grid(X, y, center, XTX):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_ax, length_ax = pivot_plot(df, outbase)
+                pivot_plot(df, outbase)
 
 

From c5052da67537fdf911248e2ee5d9c3ad3f98c3e4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Fri, 21 Feb 2020 23:55:27 -0800
Subject: [PATCH 009/187] C code for update for Cox partial likelihood

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index 851279ffb..84de59b94 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 851279ffb326b145d00af45b87e7d857e3941ec9
+Subproject commit 84de59b94ecdb10805fa4f947abfacc8ca1bf6bf

From 32502b38606e36149b8c4ba656e8f28044d8ed6a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 22 Feb 2020 11:03:53 -0800
Subject: [PATCH 010/187] updated C software for cox, wrapper

---
 C-software                                    |   2 +-
 .../multi_target/followup_multi.py            |  54 ++---
 selectinf/algorithms/cox_utils.pyx            | 213 ++++++++++++++++++
 3 files changed, 233 insertions(+), 36 deletions(-)
 create mode 100644 selectinf/algorithms/cox_utils.pyx

diff --git a/C-software b/C-software
index 84de59b94..1307f8ce0 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 84de59b94ecdb10805fa4f947abfacc8ca1bf6bf
+Subproject commit 1307f8ce09995d99f1d1e2ecaba8e1eaef201b17
diff --git a/doc/learning_examples/multi_target/followup_multi.py b/doc/learning_examples/multi_target/followup_multi.py
index aa16ded9f..0b506dae7 100644
--- a/doc/learning_examples/multi_target/followup_multi.py
+++ b/doc/learning_examples/multi_target/followup_multi.py
@@ -5,11 +5,13 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import full_model_inference, pivot_plot, naive_full_model_inference
-from selection.learning.core import normal_sampler, keras_fit
-from selection.learning.Rutils import lasso_glmnet
+from selectinf.learning.utils import (full_model_inference, 
+                                      pivot_plot, 
+                                      split_full_model_inference)
+from selectinf.learning.core import normal_sampler, keras_fit
+from selectinf.learning.Rutils import lasso_glmnet
 
 def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=2000):
 
@@ -33,10 +35,11 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=
     covS = dispersion * X.T.dot(X)
     smooth_sampler = normal_sampler(S, covS)
 
-    def meta_algorithm(X, XTXi, resid, sampler):
+    idx = np.random.choice(np.arange(n), int(n)/2, replace=False)
+
+    def meta_algorithm(X, XTXi, resid, idx, sampler):
 
         n, p = X.shape
-        idx = np.random.choice(np.arange(n), 200, replace=False)
 
         S = sampler(scale=0.) # deterministic with scale=0
         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
@@ -50,7 +53,7 @@ def meta_algorithm(X, XTXi, resid, sampler):
     resid = y - X.dot(XTXi.dot(X.T.dot(y)))
     dispersion = np.linalg.norm(resid)**2 / (n-p)
                          
-    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid)
+    selection_algorithm = functools.partial(meta_algorithm, X, XTXi, resid, idx)
 
     # run selection algorithm
 
@@ -68,35 +71,15 @@ def meta_algorithm(X, XTXi, resid, sampler):
     if df is not None:
 
         observed_set = list(df['variable'])
-        true_target = truth[observed_set]
-
-        np.random.seed(seed)
-        X2, _, _ = gaussian_instance(n=n,
-                                     p=p, 
-                                     s=s,
-                                     equicorrelated=False,
-                                     rho=0.5, 
-                                     sigma=sigma,
-                                     signal=signal,
-                                     random_signs=True,
-                                     center=False,
-                                     scale=False)[:3]
-        stage_1 = np.random.choice(np.arange(n), 200, replace=False)
-        stage_2 = sorted(set(range(n)).difference(stage_1))
-        X2 = X2[stage_2]
-        y2 = X2.dot(truth) + sigma * np.random.standard_normal(X2.shape[0])
-
-        XTXi_2 = np.linalg.inv(X2.T.dot(X2))
-        resid2 = y2 - X2.dot(XTXi_2.dot(X2.T.dot(y2)))
-        dispersion_2 = np.linalg.norm(resid2)**2 / (X2.shape[0] - X2.shape[1])
-
-        naive_df = naive_full_model_inference(X2,
-                                              y2,
-                                              dispersion_2,
+        split_df = split_full_model_inference(X, 
+                                              y,
+                                              idx,
+                                              dispersion,
+                                              truth,
                                               observed_set,
                                               alpha=alpha)
 
-        df = pd.merge(df, naive_df, on='variable')
+        df = pd.merge(df, split_df, on='variable')
         return df
 
 if __name__ == "__main__":
@@ -107,7 +90,7 @@ def meta_algorithm(X, XTXi, resid, sampler):
     iseed = int(np.fabs(np.random.standard_normal() * 1000))
     for i in range(500):
         df = simulate(seed=i+iseed, B=2000)
-        csvfile = 'followup_multi.csv'
+        csvfile = __file__[:-3] + '.csv'
         outbase = csvfile[:-4]
 
         if df is not None and i > 0:
@@ -119,6 +102,7 @@ def meta_algorithm(X, XTXi, resid, sampler):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                pivot_plot(df, outbase)
+                f = pivot_plot(df, outbase)
+                f.close()
 
 
diff --git a/selectinf/algorithms/cox_utils.pyx b/selectinf/algorithms/cox_utils.pyx
new file mode 100644
index 000000000..317e87291
--- /dev/null
+++ b/selectinf/algorithms/cox_utils.pyx
@@ -0,0 +1,213 @@
+import warnings
+import numpy as np, cython
+cimport numpy as cnp
+
+DTYPE_float = np.float
+ctypedef cnp.float_t DTYPE_float_t
+DTYPE_int = np.int
+ctypedef cnp.int_t DTYPE_int_t
+ctypedef cnp.intp_t DTYPE_intp_t
+
+cdef extern from "cox_fns.h":
+
+    void _update_cox_exp(double *linear_pred_ptr, # Linear term in objective 
+                         double *exp_accum_ptr,   # inner accumulation vector 
+                         long *censoring_ptr,     # censoring indicator 
+                         long *ordering_ptr,      # 0-based ordering of times 
+                         long *rankmin_ptr,       # 0-based ranking with min tie breaking 
+                         long ncase               # how many subjects / times 
+                         );       
+
+    void _update_cox_expZ(double *linear_pred_ptr,  # Linear term in objective 
+                          double *right_vector_ptr, # Linear term in objective 
+                          double *expZ_accum_ptr,   # inner accumulation vector 
+                          long *censoring_ptr,      # censoring indicator 
+                          long *ordering_ptr,       # 0-based ordering of times 
+                          long *rankmin_ptr,        # 0-based ranking with min tie breaking 
+                          long ncase                # how many subjects / times 
+                          );       
+
+    void _update_outer_1st(double *linear_pred_ptr,     # Linear term in objective 
+                           double *exp_accum_ptr,       # inner accumulation vector 
+                           double *outer_accum_1st_ptr, # outer accumulation vector 
+                           long *censoring_ptr,         # censoring indicator 
+                           long *ordering_ptr,          # 0-based ordering of times 
+                           long *rankmin_ptr,           # 0-based ranking with min tie breaking 
+                           long ncase                   # how many subjects / times 
+                           );       
+
+    void _update_outer_2nd(double *linear_pred_ptr,     # Linear term in objective 
+                           double *exp_accum_ptr,       # inner accumulation vector  Ze^{\eta} 
+                           double *expZ_accum_ptr,      # inner accumulation vector e^{\eta} 
+                           double *outer_accum_2nd_ptr, # outer accumulation vector 
+                           long *censoring_ptr,         # censoring indicator 
+                           long *ordering_ptr,          # 0-based ordering of times 
+                           long *rankmin_ptr,           # 0-based ranking with min tie breaking 
+                           long ncase                   # how many subjects / times 
+                           );
+
+    double _cox_objective(double *linear_pred_ptr,     # Linear term in objective 
+                          double *inner_accum_ptr,     # inner accumulation vector 
+                          double *outer_accum_1st_ptr, # outer accumulation vector 
+                          long *censoring_ptr,         # censoring indicator 
+                          long *ordering_ptr,          # 0-based ordering of times 
+                          long *rankmin_ptr,           # 0-based ranking with min tie breaking 
+                          long *rankmax_ptr,           # 0-based ranking with max tie breaking 
+                          long ncase                   # how many subjects / times 
+                          );       
+
+    void _cox_gradient(double *gradient_ptr,        # Where gradient is stored 
+                       double *linear_pred_ptr,     # Linear term in objective 
+                       double *outer_accum_1st_ptr, # outer accumulation vector 
+                       long *censoring_ptr,         # censoring indicator 
+                       long *ordering_ptr,          # 0-based ordering of times 
+                       long *rankmin_ptr,           # 0-based ranking with min tie breaking 
+                       long *rankmax_ptr,           # 0-based ranking with max tie breaking 
+                       long ncase                   # how many subjects / times 
+                       );
+
+    void _cox_hessian(double *hessian_ptr,          # Where hessian is stored 
+                      double *linear_pred_ptr,      # Linear term in objective 
+                      double *outer_accum_1st_ptr,  # outer accumulation vector used in outer prod "mean"
+                      double *outer_accum_2nd_ptr,  # outer accumulation vector used in "2nd" moment
+                      long *censoring_ptr,          # censoring indicator 
+                      long *ordering_ptr,           # 0-based ordering of times 
+                      long *rankmax_ptr,            # 0-based ranking with max tie breaking 
+                      long ncase                    # how many subjects / times 
+                      );
+   
+def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
+                  cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
+                  cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
+                  cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
+                  cnp.ndarray[DTYPE_int_t, ndim=1] ordering,
+                  cnp.ndarray[DTYPE_int_t, ndim=1] rankmin,
+                  cnp.ndarray[DTYPE_int_t, ndim=1] rankmax,
+                  long ncase):
+
+    _update_cox_exp(<double *>linear_pred.data,
+                    <double *>exp_accum.data,
+                    <long *>censoring.data,
+                    <long *>ordering.data,
+                    <long *>rankmin.data,
+                    ncase)
+
+    _update_outer_1st(<double *>linear_pred.data,
+                      <double *>exp_accum.data,
+                      <double *>outer_1st_accum.data,
+                      <long *>censoring.data,
+                      <long *>ordering.data,
+                      <long *>rankmin.data,
+                      ncase)
+
+    return _cox_objective(<double *>linear_pred.data,
+                          <double *>exp_accum.data,
+                          <double *>outer_1st_accum.data,
+                          <long *>censoring.data,
+                          <long *>ordering.data,
+                          <long *>rankmin.data,
+                          <long *>rankmax.data,
+                          ncase)
+
+def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
+                 cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
+                 cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
+                 cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
+                 cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
+                 cnp.ndarray[DTYPE_int_t, ndim=1] ordering,
+                 cnp.ndarray[DTYPE_int_t, ndim=1] rankmin,
+                 cnp.ndarray[DTYPE_int_t, ndim=1] rankmax,
+                 long ncase):
+    """
+    Compute Cox partial likelihood gradient in place.
+    """
+
+    # this computes e^{\eta} and stores cumsum at rankmin
+
+    _update_cox_exp(<double *>linear_pred.data,
+                    <double *>exp_accum.data,
+                    <long *>censoring.data,
+                    <long *>ordering.data,
+                    <long *>rankmin.data,
+                    ncase)
+
+    _update_outer_1st(<double *>linear_pred.data,
+                      <double *>exp_accum.data,
+                      <double *>outer_1st_accum.data,
+                      <long *>censoring.data,
+                      <long *>ordering.data,
+                      <long *>rankmin.data,
+                      ncase)
+
+    _cox_gradient(<double *>gradient.data,
+                  <double *>linear_pred.data,
+                  <double *>outer_1st_accum.data,
+                  <long *>censoring.data,
+                  <long *>ordering.data,
+                  <long *>rankmin.data,
+                  <long *>rankmax.data,
+                  ncase)
+    
+    return gradient
+
+def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
+                cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
+                cnp.ndarray[DTYPE_float_t, ndim=1] right_vector,
+                cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
+                cnp.ndarray[DTYPE_float_t, ndim=1] expZ_accum,
+                cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
+                cnp.ndarray[DTYPE_float_t, ndim=1] outer_2nd_accum,
+                cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
+                cnp.ndarray[DTYPE_int_t, ndim=1] ordering,
+                cnp.ndarray[DTYPE_int_t, ndim=1] rankmin,
+                cnp.ndarray[DTYPE_int_t, ndim=1] rankmax,
+                long ncase):
+    """
+    Compute Cox partial likelihood gradient in place.
+    """
+
+    # this computes e^{\eta} and stores cumsum at rankmin, stored in outer_accum_1st
+
+    _update_cox_exp(<double *>linear_pred.data,
+                    <double *>exp_accum.data,
+                    <long *>censoring.data,
+                    <long *>ordering.data,
+                    <long *>rankmin.data,
+                    ncase)
+
+    _update_outer_1st(<double *>linear_pred.data,
+                      <double *>exp_accum.data,
+                      <double *>outer_1st_accum.data,
+                      <long *>censoring.data,
+                      <long *>ordering.data,
+                      <long *>rankmin.data,
+                      ncase)
+
+    _update_cox_expZ(<double *>linear_pred.data,
+                     <double *>right_vector.data,
+                     <double *>exp_accum.data,
+                     <long *>censoring.data,
+                     <long *>ordering.data,
+                     <long *>rankmin.data,
+                     ncase)
+
+    _update_outer_2nd(<double *>linear_pred.data,
+                      <double *>exp_accum.data,
+                      <double *>expZ_accum.data,
+                      <double *>outer_2nd_accum.data,
+                      <long *>censoring.data,
+                      <long *>ordering.data,
+                      <long *>rankmin.data,
+                      ncase)
+
+    _cox_hessian(<double *>hessian.data,
+                 <double *>linear_pred.data,
+                 <double *>outer_1st_accum.data,
+                 <double *>outer_2nd_accum.data,
+                 <long *>censoring.data,
+                 <long *>ordering.data,
+                 <long *>rankmax.data,
+                 ncase)
+    
+    return hessian
+              

From 3a41a6b3cf5c598d71addfafce527816177f0899 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 22 Feb 2020 11:06:55 -0800
Subject: [PATCH 011/187] minor fixes, added Cox code

---
 .../cross_inference/cross_inference.py        |  4 +--
 .../knockoffs/knockoff_followup.py            | 33 ++++++++++---------
 .../multi_target/followup_multi.py            | 12 +++----
 selectinf/info.py                             |  2 +-
 selectinf/learning/fitters.py                 | 17 ++++++----
 selectinf/learning/utils.py                   |  2 +-
 setup.py                                      |  9 ++++-
 7 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/doc/learning_examples/cross_inference/cross_inference.py b/doc/learning_examples/cross_inference/cross_inference.py
index 9383e69ee..90000e99e 100644
--- a/doc/learning_examples/cross_inference/cross_inference.py
+++ b/doc/learning_examples/cross_inference/cross_inference.py
@@ -1,7 +1,7 @@
 import numpy as np
 
-from selection.learning.core import cross_inference
-from selection.learning.keras_fit import keras_fit
+from selectinf.learning.core import cross_inference
+from selectinf.learning.core import keras_fit
 
 data = np.load('lasso_multi_learning.npz')
 learning_data = (data['T'][:2000], data['Y'][:2000])
diff --git a/doc/learning_examples/knockoffs/knockoff_followup.py b/doc/learning_examples/knockoffs/knockoff_followup.py
index 3978af5f3..9bbe1093c 100644
--- a/doc/learning_examples/knockoffs/knockoff_followup.py
+++ b/doc/learning_examples/knockoffs/knockoff_followup.py
@@ -5,15 +5,15 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.Rutils import lasso_glmnet
-from selection.learning.utils import (full_model_inference, 
-                                   pivot_plot,
-                                   naive_full_model_inference)
-from selection.learning.core import split_sampler, keras_fit
+from selectinf.learning.Rutils import lasso_glmnet
+from selectinf.learning.utils import (full_model_inference, 
+                                      pivot_plot,
+                                      split_full_model_inference)
+from selectinf.learning.core import normal_sampler, keras_fit
 
-def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0):
+def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=2000):
 
     # description of statistical problem
 
@@ -39,7 +39,7 @@ def meta_algorithm(X, XTXi, resid, sampler):
 
         n, p = X.shape
 
-        idx = np.random.choice(np.arange(n), 200, replace=False)
+        idx = np.random.choice(np.arange(n), int(n/2), replace=False)
 
         S = sampler(scale=0.) # deterministic with scale=0
         ynew = X.dot(XTXi).dot(S) + resid # will be ok for n>p and non-degen X
@@ -66,12 +66,11 @@ def meta_algorithm(X, XTXi, resid, sampler):
                               y,
                               truth,
                               selection_algorithm,
-                              splitting_sampler,
+                              smooth_sampler,
                               success_params=(8, 10),
                               B=B,
                               fit_probability=keras_fit,
-                              fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'},
-                              fit_args={'df':20})
+                              fit_args={'epochs':20, 'sizes':[100]*5, 'dropout':0., 'activation':'relu'})
 
     if df is not None:
 
@@ -89,7 +88,7 @@ def meta_algorithm(X, XTXi, resid, sampler):
                                      random_signs=True,
                                      center=False,
                                      scale=False)[:3]
-        stage_1 = np.random.choice(np.arange(n), 200, replace=False)
+        stage_1 = np.random.choice(np.arange(n), int(n/2), replace=False)
         stage_2 = sorted(set(range(n)).difference(stage_1))
         X2 = X2[stage_2]
         y2 = X2.dot(truth) + sigma * np.random.standard_normal(X2.shape[0])
@@ -98,13 +97,15 @@ def meta_algorithm(X, XTXi, resid, sampler):
         resid2 = y2 - X2.dot(XTXi_2.dot(X2.T.dot(y2)))
         dispersion_2 = np.linalg.norm(resid2)**2 / (X2.shape[0] - X2.shape[1])
 
-        naive_df = naive_full_model_inference(X2,
+        split_df = split_full_model_inference(X2,
                                               y2,
+                                              stage_1,
                                               dispersion_2,
+                                              truth,
                                               observed_set,
                                               alpha=alpha)
 
-        df = pd.merge(df, naive_df, on='variable')
+        df = pd.merge(df, split_df, on='variable')
         return df
 
 if __name__ == "__main__":
@@ -113,8 +114,8 @@ def meta_algorithm(X, XTXi, resid, sampler):
     import pandas as pd
 
     iseed = int(np.fabs(np.random.standard_normal() * 1000))
-    for i in range(500):
-        df = simulate(seed=i + iseed)
+    for i in range(5000):
+        df = simulate(seed=i + iseed, B=3000)
         csvfile = 'knockoff_followup.csv'
         outbase = csvfile[:-4]
 
diff --git a/doc/learning_examples/multi_target/followup_multi.py b/doc/learning_examples/multi_target/followup_multi.py
index 0b506dae7..95fe1208c 100644
--- a/doc/learning_examples/multi_target/followup_multi.py
+++ b/doc/learning_examples/multi_target/followup_multi.py
@@ -13,11 +13,10 @@
 from selectinf.learning.core import normal_sampler, keras_fit
 from selectinf.learning.Rutils import lasso_glmnet
 
-def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=2000):
+def simulate(n=1000, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, B=2000):
 
     # description of statistical problem
 
-    np.random.seed(seed)
     X, y, truth = gaussian_instance(n=n,
                                     p=p, 
                                     s=s,
@@ -35,7 +34,7 @@ def simulate(n=400, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1, seed=0, B=
     covS = dispersion * X.T.dot(X)
     smooth_sampler = normal_sampler(S, covS)
 
-    idx = np.random.choice(np.arange(n), int(n)/2, replace=False)
+    idx = np.random.choice(np.arange(n), int(n/2), replace=False)
 
     def meta_algorithm(X, XTXi, resid, idx, sampler):
 
@@ -87,9 +86,8 @@ def meta_algorithm(X, XTXi, resid, idx, sampler):
     import matplotlib.pyplot as plt
     import pandas as pd
 
-    iseed = int(np.fabs(np.random.standard_normal() * 1000))
     for i in range(500):
-        df = simulate(seed=i+iseed, B=2000)
+        df = simulate(B=3000)
         csvfile = __file__[:-3] + '.csv'
         outbase = csvfile[:-4]
 
@@ -102,7 +100,7 @@ def meta_algorithm(X, XTXi, resid, idx, sampler):
             df.to_csv(csvfile, index=False)
 
             if len(df['pivot']) > 0:
-                f = pivot_plot(df, outbase)
-                f.close()
+                f = pivot_plot(df, outbase)[1]
+                plt.close(f)
 
 
diff --git a/selectinf/info.py b/selectinf/info.py
index b228f8e56..1df639924 100644
--- a/selectinf/info.py
+++ b/selectinf/info.py
@@ -43,7 +43,7 @@
 # versions
 NUMPY_MIN_VERSION='1.7.1'
 SCIPY_MIN_VERSION = '0.9'
-CYTHON_MIN_VERSION = '0.21'
+CYTHON_MIN_VERSION = '0.29.5'
 MPMATH_MIN_VERSION = "0.18"
 PYINTER_MIN_VERSION = "0.1.6"
 SKLEARN_MIN_VERSION = '0.19'
diff --git a/selectinf/learning/fitters.py b/selectinf/learning/fitters.py
index c6edb396c..30bcf8e86 100644
--- a/selectinf/learning/fitters.py
+++ b/selectinf/learning/fitters.py
@@ -9,13 +9,16 @@ def gbm_fit_sk(T, Y, **params):
     fitfns = []
     for j in range(Y.shape[1]):
         y = Y[:,j].astype(np.int)
-        clf = ensemble.GradientBoostingClassifier(**params)
-        clf.fit(T, y)
-
-        def fit_fn(clf, t):
-            return clf.predict_proba(t)[:,1]
-
-        fitfns.append(functools.partial(fit_fn, clf))
+        if len(np.unique(y)) > 1:
+            clf = ensemble.GradientBoostingClassifier(**params)
+            clf.fit(T, y)
+
+            def fit_fn(clf, t):
+                return clf.predict_proba(t)[:,1]
+            fit_fn = functools.partial(fit_fn, clf)
+        else:
+            fit_fn = lambda t: np.atleast_1d(np.ones(t.shape[0]))
+        fitfns.append(fit_fn)
 
     return fitfns
 
diff --git a/selectinf/learning/utils.py b/selectinf/learning/utils.py
index 4eeb77b77..d68bc5b6a 100644
--- a/selectinf/learning/utils.py
+++ b/selectinf/learning/utils.py
@@ -49,7 +49,7 @@ def full_model_inference(X,
 
         if how_many is None:
             how_many = len(observed_list)
-        observed_list = observed_list[:how_many]
+        observed_list = list(np.random.choice(observed_list, how_many, replace=False))
 
         # find the target, based on the observed outcome
 
diff --git a/setup.py b/setup.py
index cf882987b..4b6a011f0 100755
--- a/setup.py
+++ b/setup.py
@@ -59,9 +59,16 @@
                       libraries=[],
                       include_dirs=['C-software/src']))
 
+EXTS.append(Extension('selectinf.algorithms.cox_utils',
+                      ['selectinf/algorithms/cox_utils.pyx',
+                       'C-software/src/cox_fns.c'],
+                      libraries=[],
+                      include_dirs=['C-software/src']))
+
 EXTS.append(Extension('selectinf.randomized.selective_MLE_utils',
                       ['selectinf/randomized/selective_MLE_utils.pyx',
-                       'C-software/src/selective_mle.c'],
+                       'C-software/src/selective_mle.c',
+                       'C-software/src/cox_fns.c'],
                       libraries=[],
                       include_dirs=['C-software/src']))
 

From 5a89aa3e43764ee958d2ac896c0b9b03ebd76b3a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 22 Feb 2020 13:40:49 -0800
Subject: [PATCH 012/187] updating C, cleanup cox

---
 C-software                         |  2 +-
 selectinf/algorithms/cox_utils.pyx | 21 ++++++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/C-software b/C-software
index 1307f8ce0..3f5d8f344 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 1307f8ce09995d99f1d1e2ecaba8e1eaef201b17
+Subproject commit 3f5d8f3447ebf4670c6f12cc0bfb970b1e1872d5
diff --git a/selectinf/algorithms/cox_utils.pyx b/selectinf/algorithms/cox_utils.pyx
index 317e87291..63f6c2856 100644
--- a/selectinf/algorithms/cox_utils.pyx
+++ b/selectinf/algorithms/cox_utils.pyx
@@ -11,6 +11,7 @@ ctypedef cnp.intp_t DTYPE_intp_t
 cdef extern from "cox_fns.h":
 
     void _update_cox_exp(double *linear_pred_ptr, # Linear term in objective 
+                         double *exp_ptr,         # stores exp(eta) 
                          double *exp_accum_ptr,   # inner accumulation vector 
                          long *censoring_ptr,     # censoring indicator 
                          long *ordering_ptr,      # 0-based ordering of times 
@@ -20,6 +21,7 @@ cdef extern from "cox_fns.h":
 
     void _update_cox_expZ(double *linear_pred_ptr,  # Linear term in objective 
                           double *right_vector_ptr, # Linear term in objective 
+                          double *exp_ptr,         # stores exp(eta) 
                           double *expZ_accum_ptr,   # inner accumulation vector 
                           long *censoring_ptr,      # censoring indicator 
                           long *ordering_ptr,       # 0-based ordering of times 
@@ -57,7 +59,7 @@ cdef extern from "cox_fns.h":
                           );       
 
     void _cox_gradient(double *gradient_ptr,        # Where gradient is stored 
-                       double *linear_pred_ptr,     # Linear term in objective 
+                       double *exp_ptr,             # stores exp(eta) 
                        double *outer_accum_1st_ptr, # outer accumulation vector 
                        long *censoring_ptr,         # censoring indicator 
                        long *ordering_ptr,          # 0-based ordering of times 
@@ -67,7 +69,8 @@ cdef extern from "cox_fns.h":
                        );
 
     void _cox_hessian(double *hessian_ptr,          # Where hessian is stored 
-                      double *linear_pred_ptr,      # Linear term in objective 
+                      double *exp_ptr,              # stores exp(eta) 
+                      double *right_vector_ptr,     # Right vector in Hessian
                       double *outer_accum_1st_ptr,  # outer accumulation vector used in outer prod "mean"
                       double *outer_accum_2nd_ptr,  # outer accumulation vector used in "2nd" moment
                       long *censoring_ptr,          # censoring indicator 
@@ -77,6 +80,7 @@ cdef extern from "cox_fns.h":
                       );
    
 def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
+                  cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer,
                   cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
                   cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
                   cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
@@ -86,6 +90,7 @@ def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
                   long ncase):
 
     _update_cox_exp(<double *>linear_pred.data,
+                    <double *>exp_buffer.data,
                     <double *>exp_accum.data,
                     <long *>censoring.data,
                     <long *>ordering.data,
@@ -111,6 +116,7 @@ def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
 
 def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
                  cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
+                 cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer,
                  cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
                  cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
                  cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
@@ -125,6 +131,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
     # this computes e^{\eta} and stores cumsum at rankmin
 
     _update_cox_exp(<double *>linear_pred.data,
+                    <double *>exp_buffer.data,
                     <double *>exp_accum.data,
                     <long *>censoring.data,
                     <long *>ordering.data,
@@ -140,7 +147,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
                       ncase)
 
     _cox_gradient(<double *>gradient.data,
-                  <double *>linear_pred.data,
+                  <double *>exp_buffer.data,
                   <double *>outer_1st_accum.data,
                   <long *>censoring.data,
                   <long *>ordering.data,
@@ -153,6 +160,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
 def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
                 cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
                 cnp.ndarray[DTYPE_float_t, ndim=1] right_vector,
+                cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer,
                 cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
                 cnp.ndarray[DTYPE_float_t, ndim=1] expZ_accum,
                 cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
@@ -169,6 +177,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
     # this computes e^{\eta} and stores cumsum at rankmin, stored in outer_accum_1st
 
     _update_cox_exp(<double *>linear_pred.data,
+                    <double *>exp_buffer.data,
                     <double *>exp_accum.data,
                     <long *>censoring.data,
                     <long *>ordering.data,
@@ -185,7 +194,8 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
 
     _update_cox_expZ(<double *>linear_pred.data,
                      <double *>right_vector.data,
-                     <double *>exp_accum.data,
+                     <double *>exp_buffer.data,
+                     <double *>expZ_accum.data,
                      <long *>censoring.data,
                      <long *>ordering.data,
                      <long *>rankmin.data,
@@ -201,7 +211,8 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
                       ncase)
 
     _cox_hessian(<double *>hessian.data,
-                 <double *>linear_pred.data,
+                 <double *>exp_buffer.data,
+                 <double *>right_vector.data,
                  <double *>outer_1st_accum.data,
                  <double *>outer_2nd_accum.data,
                  <long *>censoring.data,

From 74098c72c1912ca9ce704c4caeaf044e2716d278 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 22 Feb 2020 14:59:24 -0800
Subject: [PATCH 013/187] added case weights but results don't quite agree with
 R

---
 selectinf/algorithms/cox_utils.pyx | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/selectinf/algorithms/cox_utils.pyx b/selectinf/algorithms/cox_utils.pyx
index 63f6c2856..01244835b 100644
--- a/selectinf/algorithms/cox_utils.pyx
+++ b/selectinf/algorithms/cox_utils.pyx
@@ -13,6 +13,7 @@ cdef extern from "cox_fns.h":
     void _update_cox_exp(double *linear_pred_ptr, # Linear term in objective 
                          double *exp_ptr,         # stores exp(eta) 
                          double *exp_accum_ptr,   # inner accumulation vector 
+                         double *case_weight_ptr, # case weights 
                          long *censoring_ptr,     # censoring indicator 
                          long *ordering_ptr,      # 0-based ordering of times 
                          long *rankmin_ptr,       # 0-based ranking with min tie breaking 
@@ -21,8 +22,9 @@ cdef extern from "cox_fns.h":
 
     void _update_cox_expZ(double *linear_pred_ptr,  # Linear term in objective 
                           double *right_vector_ptr, # Linear term in objective 
-                          double *exp_ptr,         # stores exp(eta) 
+                          double *exp_ptr,          # stores exp(eta) 
                           double *expZ_accum_ptr,   # inner accumulation vector 
+                          double *case_weight_ptr,  # case weights 
                           long *censoring_ptr,      # censoring indicator 
                           long *ordering_ptr,       # 0-based ordering of times 
                           long *rankmin_ptr,        # 0-based ranking with min tie breaking 
@@ -32,6 +34,7 @@ cdef extern from "cox_fns.h":
     void _update_outer_1st(double *linear_pred_ptr,     # Linear term in objective 
                            double *exp_accum_ptr,       # inner accumulation vector 
                            double *outer_accum_1st_ptr, # outer accumulation vector 
+                           double *case_weight_ptr,     # case weights 
                            long *censoring_ptr,         # censoring indicator 
                            long *ordering_ptr,          # 0-based ordering of times 
                            long *rankmin_ptr,           # 0-based ranking with min tie breaking 
@@ -42,6 +45,7 @@ cdef extern from "cox_fns.h":
                            double *exp_accum_ptr,       # inner accumulation vector  Ze^{\eta} 
                            double *expZ_accum_ptr,      # inner accumulation vector e^{\eta} 
                            double *outer_accum_2nd_ptr, # outer accumulation vector 
+                           double *case_weight_ptr,     # case weights 
                            long *censoring_ptr,         # censoring indicator 
                            long *ordering_ptr,          # 0-based ordering of times 
                            long *rankmin_ptr,           # 0-based ranking with min tie breaking 
@@ -51,6 +55,7 @@ cdef extern from "cox_fns.h":
     double _cox_objective(double *linear_pred_ptr,     # Linear term in objective 
                           double *inner_accum_ptr,     # inner accumulation vector 
                           double *outer_accum_1st_ptr, # outer accumulation vector 
+                          double *case_weight_ptr,     # case weights 
                           long *censoring_ptr,         # censoring indicator 
                           long *ordering_ptr,          # 0-based ordering of times 
                           long *rankmin_ptr,           # 0-based ranking with min tie breaking 
@@ -61,6 +66,7 @@ cdef extern from "cox_fns.h":
     void _cox_gradient(double *gradient_ptr,        # Where gradient is stored 
                        double *exp_ptr,             # stores exp(eta) 
                        double *outer_accum_1st_ptr, # outer accumulation vector 
+                       double *case_weight_ptr,     # case weights 
                        long *censoring_ptr,         # censoring indicator 
                        long *ordering_ptr,          # 0-based ordering of times 
                        long *rankmin_ptr,           # 0-based ranking with min tie breaking 
@@ -73,6 +79,7 @@ cdef extern from "cox_fns.h":
                       double *right_vector_ptr,     # Right vector in Hessian
                       double *outer_accum_1st_ptr,  # outer accumulation vector used in outer prod "mean"
                       double *outer_accum_2nd_ptr,  # outer accumulation vector used in "2nd" moment
+                      double *case_weight_ptr,     # case weights 
                       long *censoring_ptr,          # censoring indicator 
                       long *ordering_ptr,           # 0-based ordering of times 
                       long *rankmax_ptr,            # 0-based ranking with max tie breaking 
@@ -83,6 +90,7 @@ def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
                   cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer,
                   cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
                   cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
+                  cnp.ndarray[DTYPE_float_t, ndim=1] case_weight,
                   cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
                   cnp.ndarray[DTYPE_int_t, ndim=1] ordering,
                   cnp.ndarray[DTYPE_int_t, ndim=1] rankmin,
@@ -92,6 +100,7 @@ def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
     _update_cox_exp(<double *>linear_pred.data,
                     <double *>exp_buffer.data,
                     <double *>exp_accum.data,
+                    <double *>case_weight.data,
                     <long *>censoring.data,
                     <long *>ordering.data,
                     <long *>rankmin.data,
@@ -100,6 +109,7 @@ def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
     _update_outer_1st(<double *>linear_pred.data,
                       <double *>exp_accum.data,
                       <double *>outer_1st_accum.data,
+                      <double *>case_weight.data,
                       <long *>censoring.data,
                       <long *>ordering.data,
                       <long *>rankmin.data,
@@ -108,6 +118,7 @@ def cox_objective(cnp.ndarray[DTYPE_float_t, ndim=1] linear_pred,
     return _cox_objective(<double *>linear_pred.data,
                           <double *>exp_accum.data,
                           <double *>outer_1st_accum.data,
+                          <double *>case_weight.data,
                           <long *>censoring.data,
                           <long *>ordering.data,
                           <long *>rankmin.data,
@@ -119,6 +130,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
                  cnp.ndarray[DTYPE_float_t, ndim=1] exp_buffer,
                  cnp.ndarray[DTYPE_float_t, ndim=1] exp_accum,
                  cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
+                 cnp.ndarray[DTYPE_float_t, ndim=1] case_weight,
                  cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
                  cnp.ndarray[DTYPE_int_t, ndim=1] ordering,
                  cnp.ndarray[DTYPE_int_t, ndim=1] rankmin,
@@ -133,6 +145,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
     _update_cox_exp(<double *>linear_pred.data,
                     <double *>exp_buffer.data,
                     <double *>exp_accum.data,
+                    <double *>case_weight.data,
                     <long *>censoring.data,
                     <long *>ordering.data,
                     <long *>rankmin.data,
@@ -141,6 +154,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
     _update_outer_1st(<double *>linear_pred.data,
                       <double *>exp_accum.data,
                       <double *>outer_1st_accum.data,
+                      <double *>case_weight.data,
                       <long *>censoring.data,
                       <long *>ordering.data,
                       <long *>rankmin.data,
@@ -149,6 +163,7 @@ def cox_gradient(cnp.ndarray[DTYPE_float_t, ndim=1] gradient,
     _cox_gradient(<double *>gradient.data,
                   <double *>exp_buffer.data,
                   <double *>outer_1st_accum.data,
+                  <double *>case_weight.data,
                   <long *>censoring.data,
                   <long *>ordering.data,
                   <long *>rankmin.data,
@@ -165,6 +180,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
                 cnp.ndarray[DTYPE_float_t, ndim=1] expZ_accum,
                 cnp.ndarray[DTYPE_float_t, ndim=1] outer_1st_accum,
                 cnp.ndarray[DTYPE_float_t, ndim=1] outer_2nd_accum,
+                cnp.ndarray[DTYPE_float_t, ndim=1] case_weight,
                 cnp.ndarray[DTYPE_int_t, ndim=1] censoring,
                 cnp.ndarray[DTYPE_int_t, ndim=1] ordering,
                 cnp.ndarray[DTYPE_int_t, ndim=1] rankmin,
@@ -179,6 +195,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
     _update_cox_exp(<double *>linear_pred.data,
                     <double *>exp_buffer.data,
                     <double *>exp_accum.data,
+                    <double *>case_weight.data,
                     <long *>censoring.data,
                     <long *>ordering.data,
                     <long *>rankmin.data,
@@ -187,6 +204,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
     _update_outer_1st(<double *>linear_pred.data,
                       <double *>exp_accum.data,
                       <double *>outer_1st_accum.data,
+                      <double *>case_weight.data,
                       <long *>censoring.data,
                       <long *>ordering.data,
                       <long *>rankmin.data,
@@ -196,6 +214,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
                      <double *>right_vector.data,
                      <double *>exp_buffer.data,
                      <double *>expZ_accum.data,
+                     <double *>case_weight.data,
                      <long *>censoring.data,
                      <long *>ordering.data,
                      <long *>rankmin.data,
@@ -205,6 +224,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
                       <double *>exp_accum.data,
                       <double *>expZ_accum.data,
                       <double *>outer_2nd_accum.data,
+                      <double *>case_weight.data,
                       <long *>censoring.data,
                       <long *>ordering.data,
                       <long *>rankmin.data,
@@ -215,6 +235,7 @@ def cox_hessian(cnp.ndarray[DTYPE_float_t, ndim=1] hessian,
                  <double *>right_vector.data,
                  <double *>outer_1st_accum.data,
                  <double *>outer_2nd_accum.data,
+                 <double *>case_weight.data,
                  <long *>censoring.data,
                  <long *>ordering.data,
                  <long *>rankmax.data,

From 94e2c6a9033d923f950486ad9a783d54b409de76 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 22 Feb 2020 14:59:35 -0800
Subject: [PATCH 014/187] updated cox code

---
 C-software | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/C-software b/C-software
index 3f5d8f344..8c36cc18b 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 3f5d8f3447ebf4670c6f12cc0bfb970b1e1872d5
+Subproject commit 8c36cc18b1c78c139d8cba4ecbb8875eb8275b20

From ffbc2e1c75a6fbc20857b45eb969cbe7680228b1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 20:58:48 -0700
Subject: [PATCH 015/187] updating C software

---
 C-software                       | 2 +-
 selectinf/sampling/sequential.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/C-software b/C-software
index 8c36cc18b..7a3d663fe 160000
--- a/C-software
+++ b/C-software
@@ -1 +1 @@
-Subproject commit 8c36cc18b1c78c139d8cba4ecbb8875eb8275b20
+Subproject commit 7a3d663feadaf6c61400359fe8fe95a61099b645
diff --git a/selectinf/sampling/sequential.py b/selectinf/sampling/sequential.py
index 06a018895..450ae81c8 100644
--- a/selectinf/sampling/sequential.py
+++ b/selectinf/sampling/sequential.py
@@ -10,7 +10,7 @@
 def sample(white_constraint,
            nsample,
            proposal_sigma=0.2,
-           temps=np.linspace(0, 50, 51.)):
+           temps=np.linspace(0, 50., 51)):
     """
     Build up an approximately constrained Gaussian
     based on relaxations of the constraint.

From b35e9352a158a9f30db813b9e7b6a183933edce4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 21:00:28 -0700
Subject: [PATCH 016/187] fixing imports

---
 selectinf/algorithms/lasso.py                 | 42 +++++++++----------
 selectinf/algorithms/tests/test_covtest.py    |  1 -
 .../algorithms/tests/test_debiased_lasso.py   |  1 -
 selectinf/algorithms/tests/test_lasso.py      | 17 ++++----
 selectinf/algorithms/tests/test_softmax.py    |  3 +-
 selectinf/algorithms/tests/test_sqrt_lasso.py |  3 +-
 .../constraints/tests/test_quadratic_tests.py |  1 -
 selectinf/randomized/tests/test_BH.py         |  3 +-
 .../sampling/tests/test_sample_sphere.py      |  2 +-
 selectinf/sampling/tests/test_sequential.py   |  5 +--
 selectinf/tests/decorators.py                 |  7 +---
 selectinf/truncated/tests/test_truncated.py   |  5 +--
 12 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/selectinf/algorithms/lasso.py b/selectinf/algorithms/lasso.py
index fa35e5ed2..f885eb964 100644
--- a/selectinf/algorithms/lasso.py
+++ b/selectinf/algorithms/lasso.py
@@ -21,7 +21,6 @@
 from regreg.api import (glm,
                         weighted_l1norm,
                         simple_problem,
-                        coxph as coxph_obj,
                         smooth_sum,
                         squared_error,
                         identity_quadratic,
@@ -470,13 +469,13 @@ def logistic(klass,
                      covariance_estimator=covariance_estimator)
 
     @classmethod
-    def coxph(klass,
-              X,
-              times,
-              status,
-              feature_weights,
-              covariance_estimator=None,
-              quadratic=None):
+    def cox(klass,
+            X,
+            times,
+            status,
+            feature_weights,
+            covariance_estimator=None,
+            quadratic=None):
         r"""
         Cox proportional hazards LASSO with feature weights.
         Objective function is
@@ -521,7 +520,7 @@ def coxph(klass,
         coordinates of the gradient of the likelihood at
         the unpenalized estimator.
         """
-        loglike = coxph_obj(X, times, status, quadratic=quadratic)
+        loglike = glm.cox(X, times, status, quadratic=quadratic)
         return klass(loglike, feature_weights,
                      covariance_estimator=covariance_estimator)
 
@@ -1003,14 +1002,14 @@ def poisson(klass,
         return klass(loglike1, loglike2, loglike, feature_weights)
 
     @classmethod
-    def coxph(klass,
-              X,
-              times,
-              status,
-              feature_weights,
-              split_frac=0.9,
-              sigma=1.,
-              stage_one=None):
+    def cox(klass,
+            X,
+            times,
+            status,
+            feature_weights,
+            split_frac=0.9,
+            sigma=1.,
+            stage_one=None):
 
         n, p = X.shape
         if stage_one is None:
@@ -1025,9 +1024,9 @@ def coxph(klass,
         times1, X1, status1 = times[stage_one], X[stage_one], status[stage_one]
         times2, X2, status2 = times[stage_two], X[stage_two], status[stage_two]
 
-        loglike = coxph_obj(X, times, status)
-        loglike1 = coxph_obj(X1, times1, status1)
-        loglike2 = coxph_obj(X2, times2, status2)
+        loglike = glm.cox(X, times, status)
+        loglike1 = glm.cox(X1, times1, status1)
+        loglike2 = glm.cox(X2, times2, status2)
 
         return klass(loglike1, loglike2, loglike, feature_weights)
 
@@ -1878,7 +1877,8 @@ def fit(self,
 
             # Needed for finding truncation intervals
 
-            self._Qbeta_bar = X.T.dot(W * X.dot(lasso_solution)) - self.loglike.smooth_objective(lasso_solution, 'grad')
+            self._Qbeta_bar = (X.T.dot(W * X.dot(lasso_solution)) - 
+                               self.loglike.smooth_objective(lasso_solution, 'grad'))
             self._W = W
 
             if n > p and self.approximate_inverse is None:
diff --git a/selectinf/algorithms/tests/test_covtest.py b/selectinf/algorithms/tests/test_covtest.py
index 9f0a2c2be..f80981659 100644
--- a/selectinf/algorithms/tests/test_covtest.py
+++ b/selectinf/algorithms/tests/test_covtest.py
@@ -1,7 +1,6 @@
 import itertools
 
 import numpy as np
-import numpy.testing.decorators as dec
 
 from ...tests.instance import gaussian_instance
 from ...tests.flags import SET_SEED, SMALL_SAMPLES
diff --git a/selectinf/algorithms/tests/test_debiased_lasso.py b/selectinf/algorithms/tests/test_debiased_lasso.py
index 51eb94f94..161cb5196 100644
--- a/selectinf/algorithms/tests/test_debiased_lasso.py
+++ b/selectinf/algorithms/tests/test_debiased_lasso.py
@@ -1,6 +1,5 @@
 import numpy as np
 import nose.tools as nt
-import numpy.testing.decorators as dec
 
 from ...tests.instance import gaussian_instance as instance
 
diff --git a/selectinf/algorithms/tests/test_lasso.py b/selectinf/algorithms/tests/test_lasso.py
index 172535b10..3b1a3186e 100644
--- a/selectinf/algorithms/tests/test_lasso.py
+++ b/selectinf/algorithms/tests/test_lasso.py
@@ -1,6 +1,5 @@
 import numpy as np, pandas as pd
 import nose.tools as nt
-import numpy.testing.decorators as dec
 from itertools import product
 
 from ...tests.flags import SMALL_SAMPLES
@@ -143,7 +142,7 @@ def test_poisson():
     return L, C, P
 
 @set_seed_iftrue(True)
-@dec.skipif(not statsmodels_available, "needs statsmodels")
+@np.testing.dec.skipif(not statsmodels_available, "needs statsmodels")
 def test_coxph():
 
     Q = rr.identity_quadratic(0.01, 0, np.ones(5), 0)
@@ -151,10 +150,10 @@ def test_coxph():
     T = np.random.standard_exponential(100)
     S = np.random.binomial(1, 0.5, size=(100,))
 
-    L = lasso.coxph(X, T, S, 0.1, quadratic=Q)
+    L = lasso.cox(X, T, S, 0.1, quadratic=Q)
     L.fit()
 
-    L = lasso.coxph(X, T, S, 0.1, quadratic=Q)
+    L = lasso.cox(X, T, S, 0.1, quadratic=Q)
     L.fit()
 
     C = L.constraints
@@ -450,7 +449,7 @@ def test_data_carving_poisson(n=500,
        
 @wait_for_return_value()
 @set_seed_iftrue(True)
-@dec.skipif(not statsmodels_available, "needs statsmodels")
+@np.testing.dec.skipif(not statsmodels_available, "needs statsmodels")
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, burnin=10)
 def test_data_carving_coxph(n=400,
                             p=20,
@@ -478,14 +477,14 @@ def test_data_carving_coxph(n=400,
 
     lam_theor = 10. * np.ones(p)
     lam_theor[0] = 0.
-    DC = data_carving.coxph(X, T, S, feature_weights=lam_theor,
-                            stage_one=stage_one)
+    DC = data_carving.cox(X, T, S, feature_weights=lam_theor,
+                          stage_one=stage_one)
 
     DC.fit()
 
     if len(DC.active) < n - int(n*split_frac):
-        DS = data_splitting.coxph(X, T, S, feature_weights=lam_theor,
-                                     stage_one=stage_one)
+        DS = data_splitting.cox(X, T, S, feature_weights=lam_theor,
+                                stage_one=stage_one)
         DS.fit(use_full_cov=True)
         data_split = True
     else:
diff --git a/selectinf/algorithms/tests/test_softmax.py b/selectinf/algorithms/tests/test_softmax.py
index 329f847d5..1f6e64664 100644
--- a/selectinf/algorithms/tests/test_softmax.py
+++ b/selectinf/algorithms/tests/test_softmax.py
@@ -1,9 +1,8 @@
 import numpy as np
-import numpy.testing.decorators as dec
 
 from itertools import product
 from ..softmax import softmax_objective
 
-@dec.skipif(True, "need some tests for softmax objective")
+@np.testing.dec.skipif(True, "need some tests for softmax objective")
 def test_softmax():
     raise ValueError('need some tests for softmax objective')
diff --git a/selectinf/algorithms/tests/test_sqrt_lasso.py b/selectinf/algorithms/tests/test_sqrt_lasso.py
index 0d05495d1..86edb6078 100644
--- a/selectinf/algorithms/tests/test_sqrt_lasso.py
+++ b/selectinf/algorithms/tests/test_sqrt_lasso.py
@@ -1,7 +1,6 @@
 from __future__ import division
 
 import numpy as np
-import numpy.testing.decorators as dec
 import nose.tools as nt
 
 import regreg.api as rr
@@ -23,7 +22,7 @@
 
 @wait_for_return_value()
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, burnin=10, ndraw=10)
-@dec.slow
+@np.testing.dec.slow
 def test_goodness_of_fit(n=20, p=25, s=10, sigma=20.,
                          nsim=10, burnin=2000, ndraw=8000):
     P = []
diff --git a/selectinf/constraints/tests/test_quadratic_tests.py b/selectinf/constraints/tests/test_quadratic_tests.py
index 5ea4e2767..1a1698f1a 100644
--- a/selectinf/constraints/tests/test_quadratic_tests.py
+++ b/selectinf/constraints/tests/test_quadratic_tests.py
@@ -1,7 +1,6 @@
 import numpy as np
 from scipy.stats import chi
 import nose.tools as nt
-import numpy.testing.decorators as dec
 
 from ...distributions import chisq 
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue, rpy_test_safe
diff --git a/selectinf/randomized/tests/test_BH.py b/selectinf/randomized/tests/test_BH.py
index a6fe5851f..07192cfcb 100644
--- a/selectinf/randomized/tests/test_BH.py
+++ b/selectinf/randomized/tests/test_BH.py
@@ -1,5 +1,4 @@
 import numpy as np
-import numpy.testing.decorators as dec
 
 from scipy.stats import norm as ndist
 
@@ -42,7 +41,7 @@ def BH_cutoff():
         np.testing.assert_allclose(sorted(BHfilter(2 * ndist.sf(np.fabs(Z)), q=0.2)),
                                    sorted(stepup_selection(Z, BH_cutoffs)[1]))
 
-@dec.skipif(True, "independent estimator test not working")
+@np.testing.dec.skipif(True, "independent estimator test not working")
 def test_independent_estimator(n=100, n1=50, q=0.2, signal=3, p=100):
 
     Z = np.random.standard_normal((n, p))
diff --git a/selectinf/sampling/tests/test_sample_sphere.py b/selectinf/sampling/tests/test_sample_sphere.py
index e1be9724c..07d858b0c 100644
--- a/selectinf/sampling/tests/test_sample_sphere.py
+++ b/selectinf/sampling/tests/test_sample_sphere.py
@@ -90,7 +90,7 @@ def test_sample_sphere(burnin=1000,
     s2 = AC.sample_from_sphere(con, initial, ndraw=ndraw, burnin=burnin)
     return s1, s2
 
-@dec.slow
+@np.testing.dec.slow
 @set_seed_iftrue(SET_SEED, 20)
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=10, ndraw=10, burnin=10)
 def test_distribution_sphere(n=15, p=10, sigma=1.,
diff --git a/selectinf/sampling/tests/test_sequential.py b/selectinf/sampling/tests/test_sequential.py
index b4634bfd3..a4f34a2b6 100644
--- a/selectinf/sampling/tests/test_sequential.py
+++ b/selectinf/sampling/tests/test_sequential.py
@@ -1,5 +1,4 @@
 import numpy as np
-import numpy.testing.decorators as dec
 from scipy.stats import norm as ndist
 
 from ...constraints.affine import constraints
@@ -7,13 +6,13 @@
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 
-@dec.slow
+@np.testing.dec.slow
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=10, nsim=10)
 def test_sequentially_constrained(ndraw=100, nsim=50):
     S = -np.identity(10)[:3]
     b = -6 * np.ones(3)
     C = constraints(S, b)
-    W = sample(C, nsim, temps=np.linspace(0, 200, 1001))
+    W = sample(C, nsim, temps=np.linspace(0, 200., 1001))
     U = np.linspace(0, 1, 101)
 
diff --git a/selectinf/tests/decorators.py b/selectinf/tests/decorators.py
index 37407e65c..125a048ad 100644
--- a/selectinf/tests/decorators.py
+++ b/selectinf/tests/decorators.py
@@ -5,10 +5,7 @@
 import nose
 import nose.tools
 
-try:
-    from numpy.testing.decorators import SkipTest
-except (ImportError, AttributeError):
-    from numpy.testing import SkipTest
+from numpy.testing import SkipTest
 
 def set_seed_iftrue(condition, seed=10):
     """
@@ -209,7 +206,7 @@ def modified_gen(*args, **kwargs):
                 for x in f(*args, **kwargs_cp):
                     yield x
             else:
-                raise np.testing.decorators.SkipTest(get_msg(f, msg))
+                raise SkipTest(get_msg(f, msg))
 
         # Choose the right modified to use when building the actual decorator.
         if nose.util.isgenerator(f):
diff --git a/selectinf/truncated/tests/test_truncated.py b/selectinf/truncated/tests/test_truncated.py
index a1adfa19e..b5ddaaeb1 100644
--- a/selectinf/truncated/tests/test_truncated.py
+++ b/selectinf/truncated/tests/test_truncated.py
@@ -1,7 +1,6 @@
 from __future__ import print_function
 import nose.tools as nt
 import numpy as np
-import numpy.testing.decorators as dec
 
 from ..gaussian import truncated_gaussian, truncated_gaussian_old
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
@@ -25,7 +24,7 @@ def test_sigma():
                             np.around(np.array(tg2.equal_tailed_interval(Z,0.05)), 4))
 
 @set_seed_iftrue(SET_SEED)
-@dec.skipif(True, 'checking coverage: this is random with highish failure rate')
+@np.testing.dec.skipif(True, 'checking coverage: this is random with highish failure rate')
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100)
 def test_equal_tailed_coverage(nsim=1000):
 
@@ -44,7 +43,7 @@ def test_equal_tailed_coverage(nsim=1000):
     nt.assert_true(np.fabs(coverage - (1-alpha)*nsim) < 2*SE)
 
 @set_seed_iftrue(SET_SEED)
-@dec.skipif(True, 'really slow')
+@np.testing.dec.skipif(True, 'really slow')
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsim=100)
 def test_UMAU_coverage(nsim=1000):
 

From 0b7d566bf3a41654d6081eeb8926f42b5ef05f58 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 21:16:42 -0700
Subject: [PATCH 017/187] some regreg changes to incorporate, more np
 decorators

---
 selectinf/algorithms/sqrt_lasso.py             | 3 ++-
 selectinf/algorithms/tests/test_compareR.py    | 5 +++--
 selectinf/constraints/tests/test_affine.py     | 3 ++-
 selectinf/randomized/tests/test_group_lasso.py | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/selectinf/algorithms/sqrt_lasso.py b/selectinf/algorithms/sqrt_lasso.py
index e29409892..8bb03c5ef 100644
--- a/selectinf/algorithms/sqrt_lasso.py
+++ b/selectinf/algorithms/sqrt_lasso.py
@@ -10,6 +10,7 @@
 # regreg http://github.com/regreg 
 
 import regreg.api as rr
+from regreg.atoms.mixed_lasso import NONNEGATIVE
 import regreg.affine as ra
 from regreg.smooth.glm import gaussian_loglike
 from regreg.affine import astransform
@@ -427,7 +428,7 @@ def solve_sqrt_lasso_skinny(X, Y, weights=None, initial=None, quadratic=None, so
         weights = lam * np.ones((p,))
     weight_dict = dict(zip(np.arange(p),
                            2 * weights))
-    penalty = rr.mixed_lasso(list(np.arange(p)) + [rr.NONNEGATIVE], lagrange=1.,
+    penalty = rr.mixed_lasso(list(np.arange(p)) + [NONNEGATIVE], lagrange=1.,
                              weights=weight_dict)
 
     loss = sqlasso_objective_skinny(X, Y)
diff --git a/selectinf/algorithms/tests/test_compareR.py b/selectinf/algorithms/tests/test_compareR.py
index d7ef21b76..51ba177cf 100644
--- a/selectinf/algorithms/tests/test_compareR.py
+++ b/selectinf/algorithms/tests/test_compareR.py
@@ -3,6 +3,7 @@
 import numpy as np, pandas as pd
 import regreg.api as rr
 import nose.tools as nt
+from numpy.testing import dec
 
 try:
     import rpy2.robjects as rpy
@@ -22,7 +23,7 @@
 from ...randomized.lasso import lasso as rlasso, selected_targets, full_targets, debiased_targets
 from ...tests.instance import gaussian_instance, logistic_instance
 
-@np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
+@dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_fixed_lambda():
     """
     Check that Gaussian LASSO results agree with R
@@ -240,7 +241,7 @@ def test_coxph():
     beta_hat = np.asarray(rpy.r('beta_hat'))
     x = np.asarray(rpy.r('x'))
 
-    L = lasso.coxph(x, tim, status, 1.5)
+    L = lasso.cox(x, tim, status, 1.5)
     beta2 = L.fit()
 
     G1 = L.loglike.gradient(beta_hat)
diff --git a/selectinf/constraints/tests/test_affine.py b/selectinf/constraints/tests/test_affine.py
index 82cfad9db..dca0f70a5 100644
--- a/selectinf/constraints/tests/test_affine.py
+++ b/selectinf/constraints/tests/test_affine.py
@@ -2,6 +2,7 @@
 
 import nose
 import numpy as np
+from numpy.testing import dec
 from scipy.stats import chi
 import nose.tools as nt
 
@@ -168,7 +169,7 @@ def test_sampling():
                                   np.outer(V.mean(0), V.mean(0)) - S) < 0.01)
 
 @set_seed_iftrue(SET_SEED)
-@np.testing.decorators.skipif(True, msg="optimal tilt undefined -- need to implement softmax version")
+@dec.skipif(True, msg="optimal tilt undefined -- need to implement softmax version")
 def test_optimal_tilt():
 
     A = np.vstack(-np.identity(4))
diff --git a/selectinf/randomized/tests/test_group_lasso.py b/selectinf/randomized/tests/test_group_lasso.py
index 9cc866cf6..0f1380ffb 100644
--- a/selectinf/randomized/tests/test_group_lasso.py
+++ b/selectinf/randomized/tests/test_group_lasso.py
@@ -268,7 +268,7 @@ def test_mixed(n=400,
         which += which_group
     return pval[beta[which] == 0], pval[beta[which] != 0]
 
-@set_seed_iftrue(SET_SEED)
+@set_seed_iftrue(True)
 def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4):
     for target in ['full', 'selected', 'debiased']:
         test_group_lasso(n=n, 

From 9b6a065f8d32560883b969ca9b4191c2432b2cd6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 21:22:02 -0700
Subject: [PATCH 018/187] one more missing import

---
 selectinf/sampling/tests/test_sample_sphere.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selectinf/sampling/tests/test_sample_sphere.py b/selectinf/sampling/tests/test_sample_sphere.py
index 07d858b0c..cef1b08ec 100644
--- a/selectinf/sampling/tests/test_sample_sphere.py
+++ b/selectinf/sampling/tests/test_sample_sphere.py
@@ -2,7 +2,7 @@
 import nose
 import nose.tools as nt
 import numpy as np
-import numpy.testing.decorators as dec
+from numpy.testing import dec
 
 from scipy.stats import chi
 import nose.tools as nt

From 82bc49170de606ecb20120b6d91e8c0942e73758 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 21:31:20 -0700
Subject: [PATCH 019/187] fix to travis yaml

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7b9c78817..958fc13bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -93,7 +93,7 @@ install:
   - if [ "$RUN_R_TESTS" ]; then
      sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
      pip install rpy2 statsmodels -c constraints.txt ;   
-     Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')";
+     # Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"; -- dont need this line
      sudo Rscript -e "install.packages(c('glmnet', 'intervals', 'adaptMCMC', 'SLOPE', 'knockoff'), repos='http://cloud.r-project.org')";
      git clone https://github.com/jonathan-taylor/R-selective.git;
      cd R-selective;

From 357cb4c5908638044b6c092d888ca5e635697f9d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 21:35:45 -0700
Subject: [PATCH 020/187] comment causing problem in travis

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 958fc13bb..63d030948 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -93,7 +93,6 @@ install:
   - if [ "$RUN_R_TESTS" ]; then
      sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
      pip install rpy2 statsmodels -c constraints.txt ;   
-     # Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')"; -- dont need this line
      sudo Rscript -e "install.packages(c('glmnet', 'intervals', 'adaptMCMC', 'SLOPE', 'knockoff'), repos='http://cloud.r-project.org')";
      git clone https://github.com/jonathan-taylor/R-selective.git;
      cd R-selective;

From 38eb9983e9d834af96775c092e24604dc6ca5e92 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 22:16:36 -0700
Subject: [PATCH 021/187] older version of glmnet for older version of R

---
 .travis.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 63d030948..e24d2afe0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -93,7 +93,9 @@ install:
   - if [ "$RUN_R_TESTS" ]; then
      sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
      pip install rpy2 statsmodels -c constraints.txt ;   
-     sudo Rscript -e "install.packages(c('glmnet', 'intervals', 'adaptMCMC', 'SLOPE', 'knockoff'), repos='http://cloud.r-project.org')";
+     sudo Rscript -e "install.packages(c('devtools', 'intervals', 'adaptMCMC', 'SLOPE'), repos='http://cloud.r-project.org')";
+     sudo Rscript -e "require(devtools); install_version('glmnet', version='2.0.18', repos='http://cloud.r-project.org')";
+     sudo Rscript -e "install.packages('knockoff', repos='http://cloud.r-project.org')";
      git clone https://github.com/jonathan-taylor/R-selective.git;
      cd R-selective;
      git submodule init;

From 798718a03e8fd8b1b3841450dbe4fd75a391ed60 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 22:19:09 -0700
Subject: [PATCH 022/187] forcing to be an ndarray

---
 selectinf/tests/instance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py
index 8c096b9ab..15826a148 100644
--- a/selectinf/tests/instance.py
+++ b/selectinf/tests/instance.py
@@ -364,7 +364,7 @@ def HIV_NRTI(drug='3TC',
     NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
 
     X_NRTI = np.array(NRTI_specific, np.float)
-    Y = NRTI[drug] # shorthand
+    Y = np.asarray(NRTI[drug]) # shorthand
     keep = ~np.isnan(Y).astype(np.bool)
     X_NRTI = X_NRTI[np.nonzero(keep)]; Y=Y[keep]
     Y = np.array(np.log(Y), np.float); 

From f552441122d01f3fd37fbe7d51a0e97e275b971e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 22:33:06 -0700
Subject: [PATCH 023/187] fixing version of glmnet

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index e24d2afe0..cc6a7ba64 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -94,7 +94,7 @@ install:
      sudo apt-get install -y r-base r-base-dev r-cran-devtools r-cran-rcpp;
      pip install rpy2 statsmodels -c constraints.txt ;   
      sudo Rscript -e "install.packages(c('devtools', 'intervals', 'adaptMCMC', 'SLOPE'), repos='http://cloud.r-project.org')";
-     sudo Rscript -e "require(devtools); install_version('glmnet', version='2.0.18', repos='http://cloud.r-project.org')";
+     sudo Rscript -e "require(devtools); install_version('glmnet', version='2.0-18', repos='http://cloud.r-project.org')";
      sudo Rscript -e "install.packages('knockoff', repos='http://cloud.r-project.org')";
      git clone https://github.com/jonathan-taylor/R-selective.git;
      cd R-selective;

From 048b18c916a2f3583b40da77dd55bbb316d49b4e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 23:01:00 -0700
Subject: [PATCH 024/187] try doc build with 3.6

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index cc6a7ba64..dd7a5620c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -70,7 +70,7 @@ matrix:
       env:
         - INSTALL_TYPE=requirements
         - DEPENDS=
-    - python: 3.6
+    - python: 3.5
       sudo: true
       env:
         - DOC_BUILD=1

From 2808d308d820cb4aef6b82e195286a6e38016734 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 1 Apr 2020 23:31:13 -0700
Subject: [PATCH 025/187] removing doc build for now

---
 .travis.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index dd7a5620c..24d8ebb83 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -70,10 +70,6 @@ matrix:
       env:
         - INSTALL_TYPE=requirements
         - DEPENDS=
-    - python: 3.5
-      sudo: true
-      env:
-        - DOC_BUILD=1
 
 before_install:
   - source travis-tools/utils.sh

From 44bdd2b75bde1e12a1a4b0937915cc1fde88073d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 2 Apr 2020 00:37:35 -0700
Subject: [PATCH 026/187] py35 build on appveyor failing for pandas / cython
 issue

---
 appveyor.yml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 86ae986cd..6e121c1ee 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -28,9 +28,6 @@ environment:
     - PYTHON: C:\Python36-x64
       NP_BUILD_DEP: "1.13.3"
       NP_TEST_DEP: "1.13.3"
-    - PYTHON: C:\Python35-x64
-      NP_BUILD_DEP: "1.13.3"
-      NP_TEST_DEP: "1.13.3"
 
     - PYTHON: C:\Python37
       NP_BUILD_DEP: "1.14.5"
@@ -39,9 +36,14 @@ environment:
     - PYTHON: C:\Python36
       NP_BUILD_DEP: "1.13.3"
       NP_TEST_DEP: "1.13.3"
-    - PYTHON: C:\Python35
-      NP_BUILD_DEP: "1.13.3"
-      NP_TEST_DEP: "1.13.3"
+
+    # problem with pandas + cython for py35
+    # - PYTHON: C:\Python35-x64
+    #   NP_BUILD_DEP: "1.13.3"
+    #   NP_TEST_DEP: "1.13.3"
+    # - PYTHON: C:\Python35
+    #   NP_BUILD_DEP: "1.13.3"
+    #   NP_TEST_DEP: "1.13.3"
 
 install:
   - cmd: echo "Using cmd"

From 245411ab564b9a4b7b4e6dbec95caef855c06cd7 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Thu, 16 Apr 2020 15:01:56 -0400
Subject: [PATCH 027/187] added class for posterior sampling

---
 selectinf/algorithms/api.py                  |   8 +-
 selectinf/algorithms/sqrt_lasso.py           |   6 +-
 selectinf/constraints/affine.py              |  12 +-
 selectinf/randomized/lasso.py                |  13 +-
 selectinf/randomized/posterior_inference.py  | 137 +++++++++++++++++++
 selectinf/randomized/query.py                |   2 +-
 selectinf/randomized/tests/test_lasso.py     |  14 +-
 selectinf/randomized/tests/test_posterior.py |  70 ++++++++++
 selectinf/sampling/api.py                    |   2 +-
 9 files changed, 237 insertions(+), 27 deletions(-)
 create mode 100644 selectinf/randomized/posterior_inference.py
 create mode 100644 selectinf/randomized/tests/test_posterior.py

diff --git a/selectinf/algorithms/api.py b/selectinf/algorithms/api.py
index f15caa897..786bb2f5e 100644
--- a/selectinf/algorithms/api.py
+++ b/selectinf/algorithms/api.py
@@ -1,13 +1,13 @@
-from .lasso import (lasso, 
+from .lasso import (lasso,
                     ROSI,
                     data_carving as data_carving_lasso, 
                     additive_noise as additive_noise_lasso)
 
-from .sqrt_lasso import (choose_lambda as choose_lambda_sqrt_lasso, 
+from .sqrt_lasso import (choose_lambda as choose_lambda_sqrt_lasso,
                          solve_sqrt_lasso)
 
-from .forward_step import (forward_step, 
+from .forward_step import (forward_step,
                            info_crit_stop)
 
-from .covtest import (covtest, 
+from .covtest import (covtest,
                       selected_covtest)
diff --git a/selectinf/algorithms/sqrt_lasso.py b/selectinf/algorithms/sqrt_lasso.py
index 8bb03c5ef..36512c082 100644
--- a/selectinf/algorithms/sqrt_lasso.py
+++ b/selectinf/algorithms/sqrt_lasso.py
@@ -15,10 +15,10 @@
 from regreg.smooth.glm import gaussian_loglike
 from regreg.affine import astransform
 
-from ..constraints.affine import (constraints as affine_constraints, 
+from selectinf.constraints.affine import (constraints as affine_constraints,
                                   sample_from_sphere)
-from ..distributions.discrete_multiparameter import multiparameter_family
-from ..distributions.discrete_family import discrete_family
+from selectinf.distributions.discrete_multiparameter import multiparameter_family
+from selectinf.distributions.discrete_family import discrete_family
 
 class sqlasso_objective(rr.smooth_atom):
     """
diff --git a/selectinf/constraints/affine.py b/selectinf/constraints/affine.py
index da4f30817..8be98688b 100644
--- a/selectinf/constraints/affine.py
+++ b/selectinf/constraints/affine.py
@@ -17,18 +17,18 @@
 
 import numpy as np
 
-from ..distributions.pvalue import truncnorm_cdf, norm_interval
-from ..truncated.gaussian import truncated_gaussian, truncated_gaussian_old
-from ..sampling.api import (sample_truncnorm_white, 
+from selectinf.distributions.pvalue import truncnorm_cdf, norm_interval
+from selectinf.truncated.gaussian import truncated_gaussian, truncated_gaussian_old
+from selectinf.sampling.api import (sample_truncnorm_white,
                             sample_truncnorm_white_sphere,
                             sample_truncnorm_white_ball)
-from ..distributions.chain import (reversible_markov_chain,
+from selectinf.distributions.chain import (reversible_markov_chain,
                                    parallel_test,
                                    serial_test)
 
-from .estimation import optimal_tilt
+from selectinf.constraints.estimation import optimal_tilt
 
-from ..distributions.discrete_family import discrete_family
+from selectinf.distributions.discrete_family import discrete_family
 from mpmath import mp
 
 WARNINGS = False
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 9c73512ca..c9a5ec466 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -7,13 +7,13 @@
 
 import regreg.api as rr
 
-from ..algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
+from selectinf.algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
 
-from .query import gaussian_query
+from selectinf.randomized.query import gaussian_query
 
-from .randomization import randomization
-from ..base import restricted_estimator
-from ..algorithms.debiased_lasso import (debiasing_matrix,
+from selectinf.randomized.randomization import randomization
+from selectinf.base import restricted_estimator
+from selectinf.algorithms.debiased_lasso import (debiasing_matrix,
                                          pseudoinverse_debiasing_matrix)
 
 #### High dimensional version
@@ -224,6 +224,9 @@ def signed_basis_vector(p, j, s):
         if num_opt_var > 0:
             self._setup_sampler(*self._setup_sampler_data)
 
+        self.A_scaling = A_scaling
+        self.b_scaling = b_scaling
+
         return active_signs
 
     def _solve_randomized_problem(self, 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
new file mode 100644
index 000000000..fa4f2bd1d
--- /dev/null
+++ b/selectinf/randomized/posterior_inference.py
@@ -0,0 +1,137 @@
+from __future__ import division, print_function
+import numpy as np, sys
+
+from selectinf.randomized.selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from scipy.stats import norm as ndist
+
+class posterior_inference_lasso():
+
+    def __init__(self,
+                 observed_target,
+                 cov_target,
+                 cov_target_score,
+                 feasible_point,
+                 cond_mean,
+                 cond_cov,
+                 logdens_linear,
+                 linear_part,
+                 offset,
+                 initial_estimate):
+
+        self.ntarget = cov_target.shape[0]
+        self.nopt = cond_cov.shape[0]
+
+        self.cond_precision = np.linalg.inv(cond_cov)
+        self.prec_target = np.linalg.inv(cov_target)
+
+        self.observed_target = observed_target
+        self.cov_target_score = cov_target_score
+        self.logdens_linear = logdens_linear
+
+        self.feasible_point = feasible_point
+        self.cond_mean = cond_mean
+        self.linear_part = linear_part
+        self.offset = offset
+
+        self.initial_estimate = initial_estimate
+
+        self.set_marginal_parameters()
+
+    def set_marginal_parameters(self):
+
+        target_linear = -self.logdens_linear.dot(self.cov_target_score.T.dot(self.prec_target))
+
+        implied_precision = np.zeros((self.ntarget + self.nopt, self.ntarget + self.nopt))
+        implied_precision[:self.ntarget, :self.ntarget] = (self.prec_target + target_linear.T.dot(self.cond_precision.dot(target_linear)))
+        implied_precision[:self.ntarget, self.ntarget:] = -target_linear.T.dot(self.cond_precision)
+        implied_precision[self.ntarget:, :self.ntarget] = (-target_linear.T.dot(self.cond_precision)).T
+        implied_precision[self.ntarget:, self.ntarget:] = self.cond_precision
+
+        implied_cov = np.linalg.inv(implied_precision)
+        self.linear_coef = implied_cov[self.ntarget:, :self.ntarget].dot(self.prec_target)
+
+        target_offset = self.cond_mean - target_linear.dot(self.observed_target)
+        M = implied_cov[self.ntarget:, self.ntarget:].dot(self.cond_precision.dot(target_offset))
+        N = -target_linear.T.dot(self.cond_precision).dot(target_offset)
+        self.offset_coef = implied_cov[self.ntarget:, :self.ntarget].dot(N) + M
+
+        self.cov_marginal = implied_cov[self.ntarget:, self.ntarget:]
+
+    def prior(self, target_parameter, prior_var=100.):
+
+        grad_prior = -target_parameter/prior_var
+        log_prior = -np.linalg.norm(target_parameter)/(2.*prior_var)
+        return grad_prior, log_prior
+
+    def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
+
+        mean_marginal = self.linear_coef.dot(target_parameter) + self.offset_coef
+        prec_marginal = np.linalg.inv(self.cov_marginal)
+        conjugate_marginal = prec_marginal.dot(mean_marginal)
+
+        solver = solve_barrier_affine_C
+
+        val, soln, hess = solver(conjugate_marginal,
+                                 prec_marginal,
+                                 self.feasible_point,
+                                 self.linear_part,
+                                 self.offset,
+                                 **solve_args)
+
+        log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal)/2
+
+        log_lik = -((self.observed_target - target_parameter).T.dot(self.prec_target).dot(self.observed_target - target_parameter)) / 2.\
+                  - log_normalizer
+
+        grad_lik = self.prec_target.dot(self.observed_target) - self.prec_target.dot(target_parameter) + \
+                   -self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)
+
+        grad_prior, log_prior = self.prior(target_parameter)
+        return grad_lik + grad_prior, log_lik + log_prior
+
+    def posterior_sampler(self, nsample= 2000, nburnin=100, step=1.):
+
+        state = self.initial_estimate
+        stepsize = 1. / (step * self.ntarget)
+
+        sampler = langevin(state, self.log_posterior, stepsize)
+        samples = np.zeros((nsample, self.ntarget))
+
+        for i in range(nsample):
+            sampler.next()
+            sys.stderr.write("sample number: " + str(i) + "sample: " + str(sampler.state.copy())+ "\n")
+            samples[i, :] = sampler.state.copy()
+        return samples[nburnin:, :]
+
+class langevin(object):
+
+    def __init__(self,
+                 initial_condition,
+                 gradient_map,
+                 stepsize):
+
+        (self.state,
+         self.gradient_map,
+         self.stepsize) = (np.copy(initial_condition),
+                           gradient_map,
+                           stepsize)
+        self._shape = self.state.shape[0]
+        self._sqrt_step = np.sqrt(self.stepsize)
+        self._noise = ndist(loc=0,scale=1)
+        self.sample = np.copy(initial_condition)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        while True:
+            grad_posterior = self.gradient_map(self.state)
+            candidate = (self.state + self.stepsize * grad_posterior[0]
+                        + np.sqrt(2.)* self._noise.rvs(self._shape) * self._sqrt_step)
+
+            if not np.all(np.isfinite(self.gradient_map(candidate)[0])):
+                self.stepsize *= 0.5
+                self._sqrt_step = np.sqrt(self.stepsize)
+            else:
+                self.state[:] = candidate
+                break
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index b697afd85..eae063fcc 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -291,7 +291,7 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
                                         opt_offset, 
                                         cond_precision)
 
-        self.cond_mean, self.cond_cov = cond_mean, cond_cov
+        self.cond_mean, self.cond_cov, self.logdens_linear = cond_mean, cond_cov, logdens_linear
 
         affine_con = constraints(A_scaling,
                                  b_scaling,
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 13dae3769..4ee6a5291 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -5,13 +5,13 @@
 
 import regreg.api as rr
 
-from ..lasso import lasso, selected_targets, full_targets, debiased_targets
-from ...tests.instance import gaussian_instance
-from ...tests.flags import SET_SEED
-from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
-from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
-from ..randomization import randomization
-from ...tests.decorators import rpy_test_safe
+from selectinf.randomized.lasso import lasso, selected_targets, full_targets, debiased_targets
+from selectinf.tests.instance import gaussian_instance
+from selectinf.tests.flags import SET_SEED
+from selectinf.tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+from selectinf.algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
+from selectinf.randomized.randomization import randomization
+from selectinf.tests.decorators import rpy_test_safe
 
 def test_highdim_lasso(n=500, 
                        p=200, 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
new file mode 100644
index 000000000..a0a7ffb10
--- /dev/null
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -0,0 +1,70 @@
+import numpy as np
+from selectinf.tests.instance import gaussian_instance
+from selectinf.randomized.lasso import lasso, selected_targets
+from selectinf.randomized.posterior_inference import posterior_inference_lasso
+
+def test_sampler(n=500,
+                 p=100,
+                 signal_fac=1.,
+                 s=5,
+                 sigma=3.,
+                 rho=0.4,
+                 randomizer_scale=1.):
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+    conv = const(X,
+                 Y,
+                 W,
+                 randomizer_scale=randomizer_scale * sigma_)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    (observed_target,
+     cov_target,
+     cov_target_score,
+     alternatives) = selected_targets(conv.loglike,
+                                      conv._W,
+                                      nonzero,
+                                      dispersion=dispersion)
+
+    posterior_inf = posterior_inference_lasso(observed_target,
+                                              cov_target,
+                                              cov_target_score,
+                                              conv.observed_opt_state,
+                                              conv.cond_mean,
+                                              conv.cond_cov,
+                                              conv.logdens_linear,
+                                              conv.A_scaling,
+                                              conv.b_scaling,
+                                              observed_target)
+
+    samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, step=1.)
+    lci = np.percentile(samples, 5, axis=0)
+    uci = np.percentile(samples, 95, axis=0)
+    coverage = (lci < beta_target) * (uci > beta_target)
+    length = uci - lci
+
+    print("check ", coverage, length)
+
+
+test_sampler()
diff --git a/selectinf/sampling/api.py b/selectinf/sampling/api.py
index edc376c5b..6d7c63c3a 100644
--- a/selectinf/sampling/api.py
+++ b/selectinf/sampling/api.py
@@ -1,4 +1,4 @@
 from .langevin import projected_langevin
-from .truncnorm import (sample_truncnorm_white, 
+from .truncnorm import (sample_truncnorm_white,
                         sample_truncnorm_white_sphere,
                         sample_truncnorm_white_ball)

From 0f5e4d6f020bb64ba70307e577af4e0fc36f205f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 16 Apr 2020 12:41:03 -0700
Subject: [PATCH 028/187] reverting imports to original form, attributes can be
 found in other parts of lasso object for posterior

---
 selectinf/algorithms/api.py                  |  8 ++++----
 selectinf/algorithms/sqrt_lasso.py           |  6 +++---
 selectinf/constraints/affine.py              | 12 ++++++------
 selectinf/randomized/lasso.py                | 13 +++++--------
 selectinf/randomized/query.py                |  2 +-
 selectinf/randomized/tests/test_lasso.py     | 14 +++++++-------
 selectinf/randomized/tests/test_posterior.py | 10 +++++++---
 selectinf/sampling/api.py                    |  2 +-
 8 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/selectinf/algorithms/api.py b/selectinf/algorithms/api.py
index 786bb2f5e..f15caa897 100644
--- a/selectinf/algorithms/api.py
+++ b/selectinf/algorithms/api.py
@@ -1,13 +1,13 @@
-from .lasso import (lasso,
+from .lasso import (lasso, 
                     ROSI,
                     data_carving as data_carving_lasso, 
                     additive_noise as additive_noise_lasso)
 
-from .sqrt_lasso import (choose_lambda as choose_lambda_sqrt_lasso,
+from .sqrt_lasso import (choose_lambda as choose_lambda_sqrt_lasso, 
                          solve_sqrt_lasso)
 
-from .forward_step import (forward_step,
+from .forward_step import (forward_step, 
                            info_crit_stop)
 
-from .covtest import (covtest,
+from .covtest import (covtest, 
                       selected_covtest)
diff --git a/selectinf/algorithms/sqrt_lasso.py b/selectinf/algorithms/sqrt_lasso.py
index 36512c082..8bb03c5ef 100644
--- a/selectinf/algorithms/sqrt_lasso.py
+++ b/selectinf/algorithms/sqrt_lasso.py
@@ -15,10 +15,10 @@
 from regreg.smooth.glm import gaussian_loglike
 from regreg.affine import astransform
 
-from selectinf.constraints.affine import (constraints as affine_constraints,
+from ..constraints.affine import (constraints as affine_constraints, 
                                   sample_from_sphere)
-from selectinf.distributions.discrete_multiparameter import multiparameter_family
-from selectinf.distributions.discrete_family import discrete_family
+from ..distributions.discrete_multiparameter import multiparameter_family
+from ..distributions.discrete_family import discrete_family
 
 class sqlasso_objective(rr.smooth_atom):
     """
diff --git a/selectinf/constraints/affine.py b/selectinf/constraints/affine.py
index 8be98688b..da4f30817 100644
--- a/selectinf/constraints/affine.py
+++ b/selectinf/constraints/affine.py
@@ -17,18 +17,18 @@
 
 import numpy as np
 
-from selectinf.distributions.pvalue import truncnorm_cdf, norm_interval
-from selectinf.truncated.gaussian import truncated_gaussian, truncated_gaussian_old
-from selectinf.sampling.api import (sample_truncnorm_white,
+from ..distributions.pvalue import truncnorm_cdf, norm_interval
+from ..truncated.gaussian import truncated_gaussian, truncated_gaussian_old
+from ..sampling.api import (sample_truncnorm_white, 
                             sample_truncnorm_white_sphere,
                             sample_truncnorm_white_ball)
-from selectinf.distributions.chain import (reversible_markov_chain,
+from ..distributions.chain import (reversible_markov_chain,
                                    parallel_test,
                                    serial_test)
 
-from selectinf.constraints.estimation import optimal_tilt
+from .estimation import optimal_tilt
 
-from selectinf.distributions.discrete_family import discrete_family
+from ..distributions.discrete_family import discrete_family
 from mpmath import mp
 
 WARNINGS = False
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index c9a5ec466..9c73512ca 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -7,13 +7,13 @@
 
 import regreg.api as rr
 
-from selectinf.algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
+from ..algorithms.sqrt_lasso import solve_sqrt_lasso, choose_lambda
 
-from selectinf.randomized.query import gaussian_query
+from .query import gaussian_query
 
-from selectinf.randomized.randomization import randomization
-from selectinf.base import restricted_estimator
-from selectinf.algorithms.debiased_lasso import (debiasing_matrix,
+from .randomization import randomization
+from ..base import restricted_estimator
+from ..algorithms.debiased_lasso import (debiasing_matrix,
                                          pseudoinverse_debiasing_matrix)
 
 #### High dimensional version
@@ -224,9 +224,6 @@ def signed_basis_vector(p, j, s):
         if num_opt_var > 0:
             self._setup_sampler(*self._setup_sampler_data)
 
-        self.A_scaling = A_scaling
-        self.b_scaling = b_scaling
-
         return active_signs
 
     def _solve_randomized_problem(self, 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index eae063fcc..b697afd85 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -291,7 +291,7 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
                                         opt_offset, 
                                         cond_precision)
 
-        self.cond_mean, self.cond_cov, self.logdens_linear = cond_mean, cond_cov, logdens_linear
+        self.cond_mean, self.cond_cov = cond_mean, cond_cov
 
         affine_con = constraints(A_scaling,
                                  b_scaling,
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 4ee6a5291..13dae3769 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -5,13 +5,13 @@
 
 import regreg.api as rr
 
-from selectinf.randomized.lasso import lasso, selected_targets, full_targets, debiased_targets
-from selectinf.tests.instance import gaussian_instance
-from selectinf.tests.flags import SET_SEED
-from selectinf.tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
-from selectinf.algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
-from selectinf.randomized.randomization import randomization
-from selectinf.tests.decorators import rpy_test_safe
+from ..lasso import lasso, selected_targets, full_targets, debiased_targets
+from ...tests.instance import gaussian_instance
+from ...tests.flags import SET_SEED
+from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
+from ..randomization import randomization
+from ...tests.decorators import rpy_test_safe
 
 def test_highdim_lasso(n=500, 
                        p=200, 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index a0a7ffb10..8a028d3a7 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -47,15 +47,19 @@ def test_sampler(n=500,
                                       nonzero,
                                       dispersion=dispersion)
 
+    A_scaling = conv.sampler.affine_con.linear_part
+    b_scaling = conv.sampler.affine_con.offset
+    logdens_linear = conv.sampler.logdens_transform[0]
+
     posterior_inf = posterior_inference_lasso(observed_target,
                                               cov_target,
                                               cov_target_score,
                                               conv.observed_opt_state,
                                               conv.cond_mean,
                                               conv.cond_cov,
-                                              conv.logdens_linear,
-                                              conv.A_scaling,
-                                              conv.b_scaling,
+                                              logdens_linear,
+                                              A_scaling,
+                                              b_scaling,
                                               observed_target)
 
     samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, step=1.)
diff --git a/selectinf/sampling/api.py b/selectinf/sampling/api.py
index 6d7c63c3a..edc376c5b 100644
--- a/selectinf/sampling/api.py
+++ b/selectinf/sampling/api.py
@@ -1,4 +1,4 @@
 from .langevin import projected_langevin
-from .truncnorm import (sample_truncnorm_white,
+from .truncnorm import (sample_truncnorm_white, 
                         sample_truncnorm_white_sphere,
                         sample_truncnorm_white_ball)

From da2fcb36582dc7c4a579815789e34183f424b522 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Sat, 18 Apr 2020 17:36:52 -0400
Subject: [PATCH 029/187] corrected prior

---
 selectinf/randomized/posterior_inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index fa4f2bd1d..a4d0a89e6 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -60,7 +60,7 @@ def set_marginal_parameters(self):
     def prior(self, target_parameter, prior_var=100.):
 
         grad_prior = -target_parameter/prior_var
-        log_prior = -np.linalg.norm(target_parameter)/(2.*prior_var)
+        log_prior = -np.linalg.norm(target_parameter)**2 /(2.*prior_var)
         return grad_prior, log_prior
 
     def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
@@ -83,7 +83,7 @@ def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
         log_lik = -((self.observed_target - target_parameter).T.dot(self.prec_target).dot(self.observed_target - target_parameter)) / 2.\
                   - log_normalizer
 
-        grad_lik = self.prec_target.dot(self.observed_target) - self.prec_target.dot(target_parameter) + \
+        grad_lik = self.prec_target.dot(self.observed_target) - self.prec_target.dot(target_parameter) \
                    -self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)
 
         grad_prior, log_prior = self.prior(target_parameter)

From f2c51ebb94fa93d2a64433425d8a6c6b905e72b7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 21 Apr 2020 15:53:22 -0700
Subject: [PATCH 030/187] able to use degenerate gaussian randomization for
 e.g. followup LASSO

---
 selectinf/randomized/lasso.py                 |  10 +-
 selectinf/randomized/posterior_inference.py   |   6 +-
 selectinf/randomized/query.py                 | 119 +++++++----
 selectinf/randomized/randomization.py         |  38 ++++
 selectinf/randomized/screening.py             |  11 +-
 selectinf/randomized/tests/test_BH.py         |  79 ++++----
 .../randomized/tests/test_drop_losers.py      | 186 ++++++++++++++++++
 selectinf/randomized/tests/test_lasso.py      |  49 ++---
 .../tests/test_marginal_screening.py          |  40 ++--
 .../randomized/tests/test_multiple_queries.py |  19 +-
 selectinf/randomized/tests/test_posterior.py  |   2 -
 .../tests/test_selective_MLE_high.py          |  19 +-
 .../tests/test_selective_MLE_onedim.py        |  22 ++-
 selectinf/randomized/tests/test_slope.py      |  71 ++++---
 .../randomized/tests/test_split_lasso.py      |  27 +--
 selectinf/randomized/tests/test_topK.py       |  25 +--
 16 files changed, 532 insertions(+), 191 deletions(-)
 create mode 100644 selectinf/randomized/tests/test_drop_losers.py

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 9c73512ca..38d90ed04 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -310,7 +310,10 @@ def gaussian(X,
 
         """
 
-        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
+        loglike = rr.glm.gaussian(X, 
+                                  Y, 
+                                  coef=1. / sigma ** 2, 
+                                  quadratic=quadratic)
         n, p = X.shape
 
         mean_diag = np.mean((X ** 2).sum(0))
@@ -324,7 +327,8 @@ def gaussian(X,
 
         return lasso(loglike, 
                      np.asarray(feature_weights) / sigma ** 2,
-                     ridge_term, randomizer)
+                     ridge_term, 
+                     randomizer)
 
     @staticmethod
     def logistic(X,
@@ -1001,8 +1005,6 @@ def gaussian(X,
                                   quadratic=quadratic)
         n, p = X.shape
 
-        mean_diag = np.mean((X ** 2).sum(0))
-
         return split_lasso(loglike, 
                            np.asarray(feature_weights) / sigma ** 2,
                            proportion)
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index fa4f2bd1d..99b8be9e2 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -1,7 +1,8 @@
 from __future__ import division, print_function
-import numpy as np, sys
 
-from selectinf.randomized.selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+import numpy as np
+
+from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from scipy.stats import norm as ndist
 
 class posterior_inference_lasso():
@@ -99,7 +100,6 @@ def posterior_sampler(self, nsample= 2000, nburnin=100, step=1.):
 
         for i in range(nsample):
             sampler.next()
-            sys.stderr.write("sample number: " + str(i) + "sample: " + str(sampler.state.copy())+ "\n")
             samples[i, :] = sampler.state.copy()
         return samples[nburnin:, :]
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index b697afd85..06396878e 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -2,6 +2,7 @@
 from itertools import product
 
 import numpy as np
+import pandas as pd
 from scipy.stats import norm as ndist
 from scipy.optimize import bisect
 
@@ -75,7 +76,10 @@ def randomize(self, perturb=None):
         """
 
         if not self._randomized:
-            self.randomized_loss, self._initial_omega = self.randomization.randomize(self.loss, self.epsilon, perturb=perturb)
+            (self.randomized_loss, 
+             self._initial_omega) = self.randomization.randomize(self.loss, 
+                                                                 self.epsilon, 
+                                                                 perturb=perturb)
         self._randomized = True
 
     def get_sampler(self):
@@ -170,10 +174,6 @@ def summary(self,
                                                   normal_sample=target_sample,
                                                   alternatives=alternatives)
 
-        MLE_intervals = self.selective_MLE(observed_target,
-                                           target_cov,
-                                           target_score_cov)[5]
-
         if not np.all(parameter == 0):
             pvalues = self.sampler.coefficient_pvalues(observed_target,
                                                        target_cov,
@@ -185,22 +185,34 @@ def summary(self,
         else:
             pvalues = pivots
 
-        intervals = None
+        result = pd.DataFrame({'target':observed_target,
+                               'pvalue':pvalues})
+
         if compute_intervals:
 
-            MLE_intervals = self.selective_MLE(observed_target,
-                                               target_cov,
-                                               target_score_cov)[4]
+            MLE = query.selective_MLE(self,
+                                      observed_target,
+                                      target_cov,
+                                      target_score_cov)[0]
+            MLE_intervals = np.asarray(MLE[['lower', 'upper']])
+
+            intervals = self.sampler.confidence_intervals(  
+                observed_target,
+                target_cov,
+                target_score_cov,
+                sample=(opt_sample, logW),
+                normal_sample=target_sample,
+                initial_guess=MLE_intervals,
+                level=level)
+
+            result.insert(2, 'lower', intervals[:,0])
+            result.insert(3, 'upper', intervals[:,1])
 
-            intervals = self.sampler.confidence_intervals(observed_target,
-                                                          target_cov,
-                                                          target_score_cov,
-                                                          sample=(opt_sample, logW),
-                                                          normal_sample=target_sample,
-                                                          initial_guess=MLE_intervals,
-                                                          level=level)
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
 
-        return pivots, pvalues, intervals
+        return result
 
     def selective_MLE(self,
                       observed_target, 
@@ -260,15 +272,16 @@ def fit(self, perturb=None):
     # Private methods
 
     def _setup_sampler(self, 
-                       A_scaling,
-                       b_scaling,
+                       linear_part,
+                       offset,
                        opt_linear,
                        opt_offset,
                        # optional dispersion parameter
                        # for covariance of randomization
                        dispersion=1):
 
-        if not np.all(A_scaling.dot(self.observed_opt_state) - b_scaling <= 0):
+        A, b = linear_part, offset
+        if not np.all(A.dot(self.observed_opt_state) - b <= 0):
             raise ValueError('constraints not satisfied')
 
         (cond_mean, 
@@ -293,8 +306,8 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
 
         self.cond_mean, self.cond_cov = cond_mean, cond_cov
 
-        affine_con = constraints(A_scaling,
-                                 b_scaling,
+        affine_con = constraints(A,
+                                 b,
                                  mean=cond_mean,
                                  covariance=cond_cov)
 
@@ -441,7 +454,7 @@ def summary(self,
 
         if not np.all(parameter == 0):
             pvalues = self.coefficient_pvalues(observed_target,
-                                               parameter=parameter,
+                                               parameter=np.zeros_like(observed_target),
                                                alternatives=alternatives)
         else:
             pvalues = pivots
@@ -451,8 +464,16 @@ def summary(self,
             intervals = self.confidence_intervals(observed_target,
                                                   level)
 
-        return pivots, pvalues, intervals
-        
+        result = pd.DataFrame({'target':observed_target,
+                               'pvalue':pvalues,
+                               'lower':intervals[:,0],
+                               'upper':intervals[:,1]})
+
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
+
+        return result
 
     def coefficient_pvalues(self,
                             observed_target,
@@ -1275,7 +1296,6 @@ def _rootL(gamma):
                 delta *= 2
                 count += 1
             lower = bisect(_rootL, Ll, Ul)
-
         return lower + observed_stat, upper + observed_stat
 
     # Private methods
@@ -1501,9 +1521,10 @@ def _solve_barrier_nonneg(conjugate_arg,
 def selective_MLE(observed_target, 
                   target_cov, 
                   target_score_cov, 
-                  init_soln, # initial (observed) value of optimization variables -- 
-                             # used as a feasible point.
-                             # precise value used only for independent estimator 
+                  init_soln, # initial (observed) value of
+                             # optimization variables -- used as a
+                             # feasible point.  precise value used
+                             # only for independent estimator
                   cond_mean,
                   cond_cov,
                   logdens_linear,
@@ -1601,11 +1622,19 @@ def selective_MLE(observed_target,
 
     alpha = 1 - level
     quantile = ndist.ppf(1 - alpha / 2.)
-    intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
-                           final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
-
-    return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
-
+    intervals = np.vstack([final_estimator - 
+                           quantile * np.sqrt(np.diag(observed_info_mean)),
+                           final_estimator + 
+                           quantile * np.sqrt(np.diag(observed_info_mean))]).T
+
+    result = pd.DataFrame({'MLE':final_estimator,
+                           'SE':np.sqrt(np.diag(observed_info_mean)),
+                           'Zvalue':Z_scores,
+                           'pvalue':pvalues,
+                           'lower':intervals[:,0],
+                           'upper':intervals[:,1],
+                           'unbiased':ind_unbiased_estimator})
+    return result, observed_info_mean
 
 def normalizing_constant(target_parameter,
                          observed_target,
@@ -1717,3 +1746,25 @@ def normalizing_constant(target_parameter,
              soln[:ntarget], 
              hess[:ntarget][:,:ntarget])
 
+
+def _bisect(f, lb, ub, min_iter=20, max_iter=100, tol=1.e-3):
+
+    while True:
+        sign_l = np.sign(f(lb))
+        sign_u = np.sign(f(ub))
+        mid = 0.5 * (lb + ub)
+        f_mid = f(mid)
+        if sign_l == 1:
+            if f_mid > 0: # we should move closer to upper
+                lb = mid
+            else:
+                ub = mid
+        else:
+            if f_mid > 0: # we should move closer to lower
+                ub = mid
+            else:
+                lb = mid
+                
+        if np.fabs(f_mid) < tol:
+            break
+    return mid
diff --git a/selectinf/randomized/randomization.py b/selectinf/randomized/randomization.py
index f7dd4e10b..54437990a 100644
--- a/selectinf/randomized/randomization.py
+++ b/selectinf/randomized/randomization.py
@@ -168,6 +168,44 @@ def gaussian(covariance):
                              log_density = lambda x: -np.sum(sqrt_precision.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const),
                              cov_prec=(covariance, precision))
 
+    @staticmethod
+    def degenerate_gaussian(covariance, tol=1.e-6):
+        """
+        Gaussian noise with a given covariance.
+        Parameters
+        ----------
+        covariance : np.float((*,*))
+            Positive definite covariance matrix. Non-negative definite
+            will raise an error.
+        """
+        p = covariance.shape[0]
+        U, D, _ = np.linalg.svd(covariance)
+        keep = D > D.max() * tol
+        rank = keep.sum()
+        sqrt_cov = U[:,keep].dot(np.diag(np.sqrt(D[keep])))
+        sqrt_precision = U[:,keep].dot(np.diag(1./np.sqrt(D[keep])))
+        precision = sqrt_precision.dot(sqrt_precision.T)
+        _const = 1. 
+        density = lambda x: np.exp(-(x * precision.dot(x)).sum() / 2) / _const
+        cdf = lambda x: None
+        pdf = lambda x: None
+        derivative_log_density = lambda x: None
+        grad_negative_log_density = lambda x: precision.dot(x)
+        sampler = lambda size: covariance.dot(sqrt_precision.dot(np.random.standard_normal((rank,) + size)))
+
+        return randomization((p,),
+                             density,
+                             cdf,
+                             pdf,
+                             derivative_log_density,
+                             grad_negative_log_density,
+                             sampler,
+                             lipschitz=(1/D[keep]).max(),
+                             log_density = lambda x: -np.sum(sqrt_precision.T.dot(np.atleast_2d(x).T)**2, 0) * 0.5 - np.log(_const),
+                             cov_prec=(covariance, precision))
+
+
+
     @staticmethod
     def laplace(shape, scale):
         """
diff --git a/selectinf/randomized/screening.py b/selectinf/randomized/screening.py
index 0aab6d341..b87ae0027 100644
--- a/selectinf/randomized/screening.py
+++ b/selectinf/randomized/screening.py
@@ -39,7 +39,10 @@ def multivariate_targets(self, features, dispersion=1.):
         crosscov_target_score = -score_linear.dot(cov_target)
         alternatives = ['twosided'] * features.sum()
 
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+        return (observed_target, 
+                cov_target * dispersion, 
+                crosscov_target_score.T * dispersion, 
+                alternatives)
 
     def full_targets(self, features, dispersion=1.):
         """
@@ -104,7 +107,7 @@ def fit(self, perturb=None):
         self.num_opt_var = self.observed_opt_state.shape[0]
 
         opt_linear = np.zeros((p, self.num_opt_var))
-        opt_linear[self._selected,:] = np.diag(active_signs)
+        opt_linear[self._selected] = np.diag(active_signs)
         opt_offset = np.zeros(p)
         opt_offset[self._selected] = active_signs * self.threshold[self._selected]
         opt_offset[self._not_selected] = _randomized_score[self._not_selected]
@@ -324,7 +327,7 @@ def fit(self, perturb=None):
             self.num_opt_var = self.observed_opt_state.shape[0]
 
             opt_linear = np.zeros((p, self.num_opt_var))
-            opt_linear[self._selected,:] = np.diag(topK_signs)
+            opt_linear[self._selected] = np.diag(topK_signs)
             opt_offset = np.zeros(p)  
 
         else:
@@ -342,7 +345,7 @@ def fit(self, perturb=None):
             self.num_opt_var = self.observed_opt_state.shape[0]
 
             opt_linear = np.zeros((p, self.num_opt_var))
-            opt_linear[self._selected,:] = np.identity(self.num_opt_var)
+            opt_linear[self._selected] = np.identity(self.num_opt_var)
             opt_offset = np.zeros(p)  
 
         # in both cases, this conditioning means we just need to compute
diff --git a/selectinf/randomized/tests/test_BH.py b/selectinf/randomized/tests/test_BH.py
index 07192cfcb..e581c6350 100644
--- a/selectinf/randomized/tests/test_BH.py
+++ b/selectinf/randomized/tests/test_BH.py
@@ -53,31 +53,36 @@ def test_independent_estimator(n=100, n1=50, q=0.2, signal=3, p=100):
     perturb = Zbar1 - Zbar
     
     frac = n1 * 1. / n
-    BH_select = stepup.BH(Zbar, np.identity(p) / n, np.sqrt((1 - frac) / (n * frac)), q=q)
+    BH_select = stepup.BH(Zbar, np.identity(p) / n, 
+                          np.sqrt((1 - frac) / (n * frac)), q=q)
     selected = BH_select.fit(perturb=perturb)
     
     observed_target = Zbar[selected]
     cov_target = np.identity(selected.sum()) / n
     cross_cov = -np.identity(p)[selected] / n
 
-    observed_target1, cov_target1, cross_cov1, _ = BH_select.marginal_targets(selected)
-
-    assert(np.linalg.norm(observed_target - observed_target1) / np.linalg.norm(observed_target) < 1.e-7)
-    assert(np.linalg.norm(cov_target - cov_target1) / np.linalg.norm(cov_target) < 1.e-7)
-    assert(np.linalg.norm(cross_cov - cross_cov1) / np.linalg.norm(cross_cov) < 1.e-7)
-
-    (final_estimator, 
-     _, 
-     Z_scores, 
-     pvalues, 
-     intervals, 
-     ind_unbiased_estimator) = BH_select.selective_MLE(observed_target, cov_target, cross_cov)
-
+    (observed_target1, 
+     cov_target1, 
+     cross_cov1,
+     _) = BH_select.marginal_targets(selected)
+
+    assert(np.linalg.norm(observed_target - observed_target1) / 
+           np.linalg.norm(observed_target) < 1.e-7)
+    assert(np.linalg.norm(cov_target - cov_target1) / 
+           np.linalg.norm(cov_target) < 1.e-7)
+    assert(np.linalg.norm(cross_cov - cross_cov1) / np.linalg.norm(cross_cov) 
+           < 1.e-7)
+
+    result = BH_select.selective_MLE(observed_target, cov_target, cross_cov)[0]
+    Z = result['Zvalue']
+    ind_unbiased_estimator = result['unbiased']
     Zbar2 = Z[n1:].mean(0)[selected]
 
-    assert(np.linalg.norm(ind_unbiased_estimator - Zbar2) / np.linalg.norm(Zbar2) < 1.e-6)
+    assert(np.linalg.norm(ind_unbiased_estimator - Zbar2) 
+           / np.linalg.norm(Zbar2) < 1.e-6)
     np.testing.assert_allclose(sorted(np.nonzero(selected)[0]), 
-                               sorted(BHfilter(2 * ndist.sf(np.fabs(np.sqrt(n1) * Zbar1)))))
+                               sorted(BHfilter(2 * ndist.sf(np.fabs(
+                        np.sqrt(n1) * Zbar1)))))
 
 
 def test_BH(n=500, 
@@ -133,28 +138,33 @@ def test_BH(n=500,
 
             if use_MLE:
                 print('huh')
-                estimate, info, _, pval, intervals, _ = BH_select.selective_MLE(observed_target,
-                                                                                cov_target,
-                                                                                crosscov_target_score,
-                                                                                level=level)
-                pivots = ndist.cdf((estimate - beta_target) / np.sqrt(np.diag(info)))
+                result = BH_select.selective_MLE(observed_target,
+                                                 cov_target,
+                                                 crosscov_target_score,
+                                                 level=level)[0]
+                estimate = result['MLE']
+                pivots = ndist.cdf((estimate - beta_target) / result['SE'])
                 pivots = 2 * np.minimum(pivots, 1 - pivots)
                 # run summary
             else:
-                pivots, pval, intervals = BH_select.summary(observed_target, 
-                                                            cov_target, 
-                                                            crosscov_target_score, 
-                                                            alternatives,
-                                                            compute_intervals=True,
-                                                            level=level,
-                                                            ndraw=20000,
-                                                            burnin=2000,
-                                                            parameter=beta_target)
+                result = BH_select.summary(observed_target, 
+                                           cov_target, 
+                                           crosscov_target_score, 
+                                           alternatives,
+                                           compute_intervals=True,
+                                           level=level,
+                                           ndraw=20000,
+                                           burnin=2000,
+                                           parameter=beta_target)
+                pivots = np.asarray(result['pivot'])
+            pval = np.asarray(result['pvalue'])
+            lower = np.asarray(result['lower'])
+            upper = np.asarray(result['upper'])
             print(pval)
-            print("beta_target and intervals", beta_target, intervals)
-            coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+            print("beta_target and intervals", beta_target, result[['lower', 'upper']])
+            coverage = (beta_target > lower) * (beta_target < upper)
             print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
-            return pivots[beta_target == 0], pivots[beta_target != 0], coverage, intervals, pivots
+            return pivots[beta_target == 0], pivots[beta_target != 0], coverage, result[['lower', 'upper']], pivots
         else:
             return [], [], [], [], []
 
@@ -170,7 +180,8 @@ def main(nsim=500, use_MLE=True, marginal=False):
     P0, PA, cover, length_int = [], [], [], []
     Ps = []
     for i in range(nsim):
-        p0, pA, cover_, intervals, pivots = test_BH(use_MLE=use_MLE, marginal=marginal)
+        p0, pA, cover_, intervals, pivots = test_BH(use_MLE=use_MLE, 
+                                                    marginal=marginal)
         Ps.extend(pivots)
         cover.extend(cover_)
         P0.extend(p0)
diff --git a/selectinf/randomized/tests/test_drop_losers.py b/selectinf/randomized/tests/test_drop_losers.py
new file mode 100644
index 000000000..4d78d8afc
--- /dev/null
+++ b/selectinf/randomized/tests/test_drop_losers.py
@@ -0,0 +1,186 @@
+import numpy as np, pandas as pd
+
+from ..drop_losers import drop_losers
+from ..screening import topK
+from ..randomization import randomization
+
+def test_drop_losers(p=50,
+                     K=5,
+                     n=300,
+                     use_MLE=True):
+
+    arm = []
+    data = []
+    stage = []
+    for a in range(p):
+        N = int(np.random.poisson(n, size=(1,)))
+        arm.extend([a]*N)
+        stage.extend([1]*N)
+        data.extend(list(np.random.standard_normal(N)))
+
+    df = pd.DataFrame({'arm':arm,
+                       'stage':stage,
+                       'data':data})
+
+    grouped = df.groupby('arm')
+    stage1_means = df.groupby('arm').mean().sort_values('data', ascending=False)
+    winners = list(stage1_means.index[:K])
+
+    for winner in winners:
+        N = int(np.random.poisson(30, size=(1,)))
+        arm.extend([winner]*N)
+        stage.extend([2]*N)
+        data.extend(list(np.random.standard_normal(N)))
+
+    df = pd.DataFrame({'arm':arm,
+                       'stage':stage,
+                       'data':data})
+
+    dtl = drop_losers(df,
+                      K=K)
+
+    dtl.selective_MLE() 
+    if not use_MLE:
+        result = dtl.summary(ndraw=20000, burnin=5000)
+    else:
+        result = dtl.selective_MLE()[0]
+    pvalue = np.asarray(result['pvalue'])
+    lower = np.asarray(result['lower'])
+    upper = np.asarray(result['upper'])
+    cover = (lower < 0) * (upper > 0)
+
+    return pvalue, cover
+
+def test_compare_topK(p=20,
+                      K=5,
+                      n=100):
+
+    arm = []
+    data = []
+    stage = []
+    for a in range(p):
+        N = int(np.random.poisson(n, size=(1,)))
+        arm.extend([a]*N)
+        stage.extend([1]*N)
+        data.extend(list(np.random.standard_normal(N)))
+
+    df1 = pd.DataFrame({'arm':arm,
+                       'stage':stage,
+                       'data':data})
+
+    grouped = df1.groupby('arm')
+    stage1_means = df1.groupby('arm').mean().sort_values('data', ascending=False)
+    winners = list(stage1_means.index[:K])
+
+    for winner in winners:
+        N = int(np.random.poisson(30, size=(1,)))
+        arm.extend([winner]*N)
+        stage.extend([2]*N)
+        data.extend(list(np.random.standard_normal(N)))
+        
+    df2 = pd.DataFrame({'arm':arm,
+                        'stage':stage,
+                        'data':data})
+
+    dtl = drop_losers(df2,
+                      K=K)
+
+    # need additional data for randomized api with non-degenerate covariance
+
+    for a in range(p):
+        if a not in winners:
+            N = int(np.random.poisson(30, size=(1,)))
+            arm.extend([a]*N)
+            stage.extend([2]*N)
+            data.extend(list(np.random.standard_normal(N)))
+
+    df_full = pd.DataFrame({'arm':arm,
+                            'stage':stage,
+                            'data':data})
+    full_means = df_full.groupby('arm').mean()['data'].iloc[range(p)]
+    full_std = df_full.groupby('arm').std()['data'].iloc[range(p)]
+    n_1 = df1.groupby('arm').count()['data'].iloc[range(p)]
+    n_full = df_full.groupby('arm').count()['data'].iloc[range(p)]
+    print(n_1, n_full)
+    stage1_means = df1.groupby('arm').mean()['data'].iloc[range(p)]
+    perturb = np.array(stage1_means) - np.array(full_means)
+
+    covariance = np.diag(np.array(full_std)**2 / np.array(n_full))
+    randomizer = randomization.gaussian(np.diag(np.array(full_std)**2 / np.array(n_1)) - 
+                                        covariance)
+
+    randomized_topK = topK(full_means,
+                           covariance,
+                           randomizer,
+                           K,
+                           perturb=perturb)
+
+    randomized_topK.fit(perturb=perturb)
+
+    (observed_target,
+     target_cov,
+     target_score_cov,
+     _) = randomized_topK.marginal_targets(randomized_topK.selection_variable['variables'])
+
+    # try with a degenerate covariance now
+
+    means2 = df2.groupby('arm').mean()['data'].iloc[range(p)]
+    std2 = df2.groupby('arm').std()['data'].iloc[range(p)]
+    n_2 = df2.groupby('arm').count()['data'].iloc[range(p)]
+    stage1_means = df1.groupby('arm').mean()['data'].iloc[range(p)]
+    perturb2 = np.array(stage1_means) - np.array(means2)
+    covariance2 = np.diag(np.array(std2)**2 / np.array(n_2))
+    degenerate_randomizer = randomization.degenerate_gaussian(
+                               np.diag(np.array(std2)**2 / 
+                                       np.array(n_1)) - 
+                               covariance2)
+
+    degenerate_topK = topK(means2,
+                           covariance2,
+                           degenerate_randomizer,
+                           K,
+                           perturb=perturb2)
+
+    np.random.seed(0)
+    summary1 = randomized_topK.summary(observed_target,
+                                       target_cov,
+                                       target_score_cov,
+                                       alternatives=['twosided']*K,
+                                       ndraw=10000,
+                                       burnin=2000,
+                                       compute_intervals=True)
+    np.random.seed(0)
+    summary2 = dtl.summary(ndraw=10000,
+                           burnin=2000)
+
+    np.testing.assert_allclose(summary1['pvalue'], summary2['pvalue'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['target'], summary2['target'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['lower'], summary2['lower'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['upper'], summary2['upper'], rtol=1.e-3)
+
+    np.random.seed(0)
+    degenerate_topK.fit(perturb=perturb2)
+    summary3 = degenerate_topK.summary(observed_target,
+                                       target_cov,
+                                       target_score_cov,
+                                       alternatives=['twosided']*K,
+                                       ndraw=10000,
+                                       burnin=2000,
+                                       compute_intervals=True)
+    
+    np.testing.assert_allclose(summary1['pvalue'], summary3['pvalue'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['target'], summary3['target'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['lower'], summary3['lower'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['upper'], summary3['upper'], rtol=1.e-3)
+
+
+def main(nsim=100, use_MLE=True):
+
+    P0, cover = [], []
+    
+    for i in range(nsim):
+        p0, cover_ = test_drop_losers(use_MLE=use_MLE)
+
+        cover.extend(cover_)
+        P0.extend(p0)
+        print('coverage', np.mean(cover))
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 13dae3769..01b5b110a 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -78,14 +78,15 @@ def test_highdim_lasso(n=500,
                                           nonzero,
                                           penalty=conv.penalty)
 
-    _, pval, intervals = conv.summary(observed_target, 
-                                      cov_target, 
-                                      cov_target_score, 
-                                      alternatives,
-                                      ndraw=ndraw,
-                                      burnin=burnin, 
-                                      compute_intervals=True)
-        
+    result = conv.summary(observed_target, 
+                          cov_target, 
+                          cov_target_score, 
+                          alternatives,
+                          ndraw=ndraw,
+                          burnin=burnin, 
+                          compute_intervals=True)
+    pval = result['pvalue']
+
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
 def test_AR_randomization(n=300, 
@@ -165,14 +166,15 @@ def test_AR_randomization(n=300,
                                           nonzero,
                                           penalty=conv.penalty)
 
-    _, pval, intervals = conv.summary(observed_target, 
-                                      cov_target, 
-                                      cov_target_score, 
-                                      alternatives,
-                                      ndraw=ndraw,
-                                      burnin=burnin, 
-                                      compute_intervals=True)
-        
+    result = conv.summary(observed_target, 
+                          cov_target, 
+                          cov_target_score, 
+                          alternatives,
+                          ndraw=ndraw,
+                          burnin=burnin, 
+                          compute_intervals=True)
+    pval = result['pvalue']
+
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
 def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4):
@@ -262,13 +264,14 @@ def test_sqrt_highdim_lasso(n=500,
                                           conv._W, 
                                           nonzero)
 
-    _, pval, intervals = conv.summary(observed_target, 
-                                      cov_target, 
-                                      cov_target_score, 
-                                      alternatives,
-                                      ndraw=ndraw,
-                                      burnin=burnin, 
-                                      compute_intervals=False)
+    result = conv.summary(observed_target, 
+                          cov_target, 
+                          cov_target_score, 
+                          alternatives,
+                          ndraw=ndraw,
+                          burnin=burnin, 
+                          compute_intervals=False)
+    pval = result['pvalue']
 
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
diff --git a/selectinf/randomized/tests/test_marginal_screening.py b/selectinf/randomized/tests/test_marginal_screening.py
index 6e7a564e0..e416cdade 100644
--- a/selectinf/randomized/tests/test_marginal_screening.py
+++ b/selectinf/randomized/tests/test_marginal_screening.py
@@ -57,17 +57,19 @@ def test_marginal(n=500,
                  alternatives) = marginal_select.multivariate_targets(nonzero, dispersion=sigma**2)
 
             if use_MLE:
-                estimate, _, _, pval, intervals, _ = marginal_select.selective_MLE(observed_target,
-                                                                                   cov_target,
-                                                                                   crosscov_target_score)
+                result = marginal_select.selective_MLE(observed_target,
+                                                       cov_target,
+                                                       crosscov_target_score)[0]
             # run summary
             else:
-                _, pval, intervals = marginal_select.summary(observed_target, 
-                                                             cov_target, 
-                                                             crosscov_target_score, 
-                                                             alternatives,
-                                                             compute_intervals=True)
-
+                result = marginal_select.summary(observed_target, 
+                                                 cov_target, 
+                                                 crosscov_target_score, 
+                                                 alternatives,
+                                                 compute_intervals=True)
+
+            intervals = np.asarray(result[['lower', 'upper']])
+            pval = result['pvalue']
             print(pval)
             if marginal:
                 beta_target = true_mean[nonzero]
@@ -138,17 +140,19 @@ def test_simple(n=100,
              alternatives) = marginal_select.marginal_targets(nonzero)
 
             if use_MLE:
-                estimate, _, _, pval, intervals, _ = marginal_select.selective_MLE(observed_target,
-                                                                                   cov_target,
-                                                                                   crosscov_target_score)
+                result = marginal_select.selective_MLE(observed_target,
+                                                       cov_target,
+                                                       crosscov_target_score)
             # run summary
             else:
-                _, pval, intervals = marginal_select.summary(observed_target, 
-                                                             cov_target, 
-                                                             crosscov_target_score, 
-                                                             alternatives,
-                                                             compute_intervals=True)
-
+                result = marginal_select.summary(observed_target, 
+                                                 cov_target, 
+                                                 crosscov_target_score, 
+                                                 alternatives,
+                                                 compute_intervals=True)
+
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower', 'upper']])
             print(pval)
             beta_target = cov_target.dot(true_mean[nonzero])
             print("beta_target and intervals", beta_target, intervals)
diff --git a/selectinf/randomized/tests/test_multiple_queries.py b/selectinf/randomized/tests/test_multiple_queries.py
index 03a921862..38c069f9e 100644
--- a/selectinf/randomized/tests/test_multiple_queries.py
+++ b/selectinf/randomized/tests/test_multiple_queries.py
@@ -12,7 +12,15 @@
 from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
 
 # the test here is marginal_screening + lasso
-def test_multiple_queries(n=500, p=100, signal_fac=1.5, s=5, sigma=3, rho=0.4, randomizer_scale=1, ndraw=5000, burnin=1000):
+def test_multiple_queries(n=500, 
+                          p=100, 
+                          signal_fac=1.5, 
+                          s=5, 
+                          sigma=3, 
+                          rho=0.4, 
+                          randomizer_scale=1, 
+                          ndraw=5000, 
+                          burnin=1000):
 
     inst, const1, const2 = gaussian_instance, marginal_screening, lasso.gaussian
     signal = np.sqrt(signal_fac * np.log(p))
@@ -63,10 +71,11 @@ def test_multiple_queries(n=500, p=100, signal_fac=1.5, s=5, sigma=3, rho=0.4, r
 
     mq = multiple_queries([conv1, conv2])
 
-    _, pval, intervals = mq.summary(observed_target1, 
-                                   [(cov_target1, cov_target_score1), (cov_target2, cov_target_score2)],
-                                    compute_intervals=True)
-        
+    results = mq.summary(observed_target1, 
+                         [(cov_target1, cov_target_score1), 
+                          (cov_target2, cov_target_score2)],
+                         compute_intervals=True)
+    pval = np.asarray(results['pvalue'])
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 8a028d3a7..ba74d5a72 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -70,5 +70,3 @@ def test_sampler(n=500,
 
     print("check ", coverage, length)
 
-
-test_sampler()
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 4a4d4a8a5..a8912718b 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -69,10 +69,12 @@ def test_full_targets(n=200,
                                                   penalty=conv.penalty,
                                                   dispersion=dispersion)
 
-            estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target,
-                                                                    cov_target,
-                                                                    cov_target_score)
-
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            pval = result['pvalue']
+            estimate = result['MLE']
+            intervals = np.asarray(result[['lower', 'upper']])
             print("estimate, intervals", estimate, intervals)
 
             coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
@@ -134,9 +136,12 @@ def test_selected_targets(n=2000,
                                               nonzero, 
                                               dispersion=dispersion)
 
-            estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target,
-                                                                    cov_target,
-                                                                    cov_target_score)
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            pval = result['pvalue']
+            estimate = result['MLE']
+            intervals = np.asarray(result[['lower', 'upper']])
 
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_onedim.py b/selectinf/randomized/tests/test_selective_MLE_onedim.py
index df3aea08d..9587991da 100644
--- a/selectinf/randomized/tests/test_selective_MLE_onedim.py
+++ b/selectinf/randomized/tests/test_selective_MLE_onedim.py
@@ -35,17 +35,27 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=1, randomizer_scale=1):
                                           conv._W, 
                                           nonzero)
             
-            estimate_cur, I_cur, Z_cur, pv_cur = conv.selective_MLE(observed_target, 
-                                                                    cov_target, 
-                                                                    cov_target_score)[:4]
+            result = conv.selective_MLE(observed_target, 
+                                        cov_target, 
+                                        cov_target_score)
+            estimate_cur = float(result[0]['MLE'])
+            Z_cur = float(result[0]['Zvalue'])
+            pv_cur = float(result[0]['pvalue'])
+            I_cur = result[1]
 
             # this matches exactly with old code
 
             target_Z = X.T.dot(Y) / np.sqrt((X**2).sum(0))
 
-            estimate, I, Z, pv = conv.sampler.selective_MLE(target_Z, sigma**2 * np.ones((1,1)), 
-                                                            -sigma**2 * np.ones((1,1)), np.ones((1,)),
-                                                            solve_args={'tol':1.e-12})[:4]
+            result2 = conv.sampler.selective_MLE(target_Z, 
+                                                            sigma**2 * np.ones((1,1)), 
+                                                            -sigma**2 * np.ones((1,1)), 
+                                                            np.ones((1,)),
+                                                            solve_args={'tol':1.e-12})
+            estimate, I, Z, pv = (float(result2[0]['MLE']),
+                                  result2[1],
+                                  float(result2[0]['Zvalue']),
+                                  float(result2[0]['pvalue']))
 
             target_transform = (-np.identity(1), np.zeros(1))
             s = signs
diff --git a/selectinf/randomized/tests/test_slope.py b/selectinf/randomized/tests/test_slope.py
index 05d2ec257..5c31a848f 100644
--- a/selectinf/randomized/tests/test_slope.py
+++ b/selectinf/randomized/tests/test_slope.py
@@ -1,12 +1,12 @@
 
 from ...tests.instance import gaussian_instance
 
-import numpy as np
+import numpy as np, pandas as pd
 from regreg.atoms.slope import slope as slope_atom
 import regreg.api as rr
 
 from ..slope import slope
-from ..lasso import full_targets
+from ..lasso import full_targets, selected_targets
 from ...tests.decorators import rpy_test_safe
 
 try:
@@ -34,7 +34,7 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma
           {
             if(choice_weights == "gaussian"){
             lambda = "gaussian"} else{
-            lambda = "bhq"}
+            lambda = "bh"}
             result = SLOPE(X, Y, fdr = fdr, lambda = lambda, normalize = normalize, sigma = sigma)
            } else{
             result = SLOPE(X, Y, fdr = fdr, lambda = W, normalize = normalize, sigma = sigma)
@@ -57,8 +57,8 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma
             r_W = robjects.NA_Logical
             if choice_weights is "gaussian":
                 r_choice_weights  = robjects.StrVector('gaussian')
-            elif choice_weights is "bhq":
-                r_choice_weights = robjects.StrVector('bhq')
+            elif choice_weights is "bh":
+                r_choice_weights = robjects.StrVector('bh')
         else:
             r_W = robjects.r.matrix(W, nrow=p, ncol=1)
 
@@ -69,12 +69,15 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma
 
         result = r_slope(r_X, r_Y, r_W, r_normalize, r_choice_weights, r_sigma)
 
-        result = np.asarray(result.rx2('beta')), np.asarray(result.rx2('E')), \
-            np.asarray(result.rx2('lambda_seq')), np.asscalar(np.array(result.rx2('sigma')))
+        result = (np.asarray(result.rx2('beta')), 
+                  np.asarray(result.rx2('E')), 
+                  np.asarray(result.rx2('lambda_seq')).reshape(-1), 
+                  np.asscalar(np.array(result.rx2('sigma'))))
         rpy2.robjects.numpy2ri.deactivate()
 
         return result
 
+@np.testing.dec.skipif(True, "extracting beta from SLOPE in R is troublesome here")
 @rpy_test_safe(libraries=['SLOPE'])
 def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35):
 
@@ -97,6 +100,7 @@ def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0
                                                  normalize = True,
                                                  choice_weights = "gaussian",
                                                  sigma = sigma_)
+    
     print("estimated sigma", sigma_, r_sigma)
     print("weights output by R", r_lambda_seq)
     print("output of est coefs R", r_beta)
@@ -108,11 +112,19 @@ def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0
     soln = problem.solve()
     print("output of est coefs python", soln)
 
+    print(r_beta, 'huh')
     print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
 
 @rpy_test_safe(libraries=['SLOPE'])
-def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35, randomizer_scale= np.sqrt(0.25),
-                          target = "full", use_MLE=True):
+def test_randomized_slope(n=2000, 
+                          p=100, 
+                          signal_fac=1.5, 
+                          s=10, 
+                          sigma=1., 
+                          rho=0.35, 
+                          randomizer_scale=0.7,
+                          target = "full", 
+                          use_MLE=True):
 
     while True:
         inst = gaussian_instance
@@ -127,16 +139,10 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35,
                           random_signs=True)[:3]
 
         sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p))
-        r_beta, r_E, r_lambda_seq, r_sigma = slope_R(X,
-                                                     Y,
-                                                     W=None,
-                                                     normalize=True,
-                                                     choice_weights="gaussian", #put gaussian
-                                                     sigma=sigma_)
 
         conv = slope.gaussian(X,
                               Y,
-                              r_sigma * r_lambda_seq,
+                              np.linspace(3, 1, p) * sigma_,
                               randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
@@ -166,27 +172,36 @@ def test_randomized_slope(n=500, p=100, signal_fac=1.2, s=5, sigma=1., rho=0.35,
                 beta_target = beta[nonzero]
             if use_MLE:
 
-                estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target, 
-                                                                        cov_target, 
-                                                                        cov_target_score)
+                result = conv.selective_MLE(observed_target, 
+                                            cov_target, 
+                                            cov_target_score)[0]
             else:
-                _, pval, intervals = conv.summary(observed_target, 
-                                                  cov_target, 
-                                                  cov_target_score, 
-                                                  alternatives, 
-                                                  compute_intervals=True)
-            coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+                result = conv.summary(observed_target, 
+                                      cov_target, 
+                                      cov_target_score, 
+                                      alternatives, 
+                                      compute_intervals=True,
+                                      ndraw=150000)
+            pval = np.asarray(result['pvalue'])
+            lower = np.asarray(result['lower'])
+            upper = np.asarray(result['upper'])
+
+            print(pd.DataFrame({'target':beta_target,
+                                'lower':lower,
+                                'upper':upper}))
+
+            coverage = (beta_target > lower) * (beta_target < upper)
             break
 
     if True:
-        return pval[beta_target == 0], pval[beta_target != 0], coverage, intervals
+        return pval[beta_target == 0], pval[beta_target != 0], coverage, lower, upper
 
-def main(nsim=100):
+def main(nsim=100, use_MLE=True):
 
     P0, PA, cover, length_int = [], [], [], []
     
     for i in range(nsim):
-        p0, pA, cover_, intervals = test_randomized_slope()
+        p0, pA, cover_, _, _ = test_randomized_slope(use_MLE=use_MLE)
 
         cover.extend(cover_)
         P0.extend(p0)
diff --git a/selectinf/randomized/tests/test_split_lasso.py b/selectinf/randomized/tests/test_split_lasso.py
index 768903e3f..68c78cd8d 100644
--- a/selectinf/randomized/tests/test_split_lasso.py
+++ b/selectinf/randomized/tests/test_split_lasso.py
@@ -92,18 +92,21 @@ def test_split_lasso(n=100,
                                               penalty=conv.penalty,
                                               dispersion=sigma**2)
 
-        _, pval, intervals = conv.summary(observed_target, 
-                                          cov_target, 
-                                          cov_target_score, 
-                                          alternatives,
-                                          ndraw=ndraw,
-                                          burnin=burnin, 
-                                          compute_intervals=False)
-
-        final_estimator, observed_info_mean = conv.selective_MLE(
-                                                 observed_target,
-                                                 cov_target,
-                                                 cov_target_score)[:2]
+        result = conv.summary(observed_target, 
+                              cov_target, 
+                              cov_target_score, 
+                              alternatives,
+                              ndraw=ndraw,
+                              burnin=burnin, 
+                              compute_intervals=False)
+
+        MLE_result, observed_info_mean = conv.selective_MLE(
+            observed_target,
+            cov_target,
+            cov_target_score)
+
+        final_estimator = np.asarray(MLE_result['MLE'])
+        pval = np.asarray(result['pvalue'])
         
         if target == 'selected':
             true_target = np.linalg.pinv(X[:,nonzero]).dot(X.dot(beta))
diff --git a/selectinf/randomized/tests/test_topK.py b/selectinf/randomized/tests/test_topK.py
index 77984d545..83c7a6ac0 100644
--- a/selectinf/randomized/tests/test_topK.py
+++ b/selectinf/randomized/tests/test_topK.py
@@ -57,24 +57,27 @@ def test_topK(n=500,
                  alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
                
             if use_MLE:
-                estimate, _, _, pval, intervals, _ = topK_select.selective_MLE(observed_target,
-                                                                               cov_target,
-                                                                               crosscov_target_score)
+                result = topK_select.selective_MLE(observed_target,
+                                                   cov_target,
+                                                   crosscov_target_score)[0]
             # run summary
             else:
-                _, pval, intervals = topK_select.summary(observed_target, 
-                                                         cov_target, 
-                                                         crosscov_target_score, 
-                                                         alternatives,
-                                                         compute_intervals=True)
-
+                result = topK_select.summary(observed_target, 
+                                             cov_target, 
+                                             crosscov_target_score, 
+                                             alternatives,
+                                             compute_intervals=True)
+            lower = np.asarray(result['lower'])
+            upper = np.asarray(result['upper'])
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower', 'upper']])
             print(pval)
             if marginal:
                 beta_target = true_mean[nonzero]
             else:
                 beta_target = beta[nonzero]
-            print("beta_target and intervals", beta_target, intervals)
-            coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+            print("beta_target and intervals", beta_target, lower, upper)
+            coverage = (beta_target > lower) * (beta_target < upper)
             print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 

From 9854fe3cd6faf3bb0f0fc07fb1155854297d96d1 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Sat, 2 May 2020 01:35:41 -0400
Subject: [PATCH 031/187] added a local scaling for the Langevin sampler

---
 selectinf/randomized/posterior_inference.py  | 22 ++++++-----
 selectinf/randomized/query.py                |  3 +-
 selectinf/randomized/tests/test_posterior.py | 40 +++++++++++++++++---
 3 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index a4d0a89e6..6d9c69a36 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -2,7 +2,8 @@
 import numpy as np, sys
 
 from selectinf.randomized.selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
-from scipy.stats import norm as ndist
+from scipy.stats import norm as ndist, invgamma
+from scipy.linalg import fractional_matrix_power
 
 class posterior_inference_lasso():
 
@@ -34,7 +35,6 @@ def __init__(self,
         self.offset = offset
 
         self.initial_estimate = initial_estimate
-
         self.set_marginal_parameters()
 
     def set_marginal_parameters(self):
@@ -57,10 +57,11 @@ def set_marginal_parameters(self):
 
         self.cov_marginal = implied_cov[self.ntarget:, self.ntarget:]
 
-    def prior(self, target_parameter, prior_var=100.):
+    def prior(self, target_parameter, scale=1., prior_var=100.):
+
+        grad_prior = -target_parameter/(scale* prior_var)
+        log_prior = -np.linalg.norm(target_parameter)**2 /(2.* scale * prior_var)
 
-        grad_prior = -target_parameter/prior_var
-        log_prior = -np.linalg.norm(target_parameter)**2 /(2.*prior_var)
         return grad_prior, log_prior
 
     def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
@@ -87,14 +88,15 @@ def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
                    -self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)
 
         grad_prior, log_prior = self.prior(target_parameter)
+
         return grad_lik + grad_prior, log_lik + log_prior
 
-    def posterior_sampler(self, nsample= 2000, nburnin=100, step=1.):
+    def posterior_sampler(self, nsample= 2000, nburnin=100, local_scale = np.identity, step=1.):
 
         state = self.initial_estimate
         stepsize = 1. / (step * self.ntarget)
 
-        sampler = langevin(state, self.log_posterior, stepsize)
+        sampler = langevin(state, self.log_posterior, local_scale, stepsize)
         samples = np.zeros((nsample, self.ntarget))
 
         for i in range(nsample):
@@ -108,6 +110,7 @@ class langevin(object):
     def __init__(self,
                  initial_condition,
                  gradient_map,
+                 local_scale,
                  stepsize):
 
         (self.state,
@@ -115,6 +118,7 @@ def __init__(self,
          self.stepsize) = (np.copy(initial_condition),
                            gradient_map,
                            stepsize)
+        self.local_scale = local_scale
         self._shape = self.state.shape[0]
         self._sqrt_step = np.sqrt(self.stepsize)
         self._noise = ndist(loc=0,scale=1)
@@ -126,8 +130,8 @@ def __iter__(self):
     def next(self):
         while True:
             grad_posterior = self.gradient_map(self.state)
-            candidate = (self.state + self.stepsize * grad_posterior[0]
-                        + np.sqrt(2.)* self._noise.rvs(self._shape) * self._sqrt_step)
+            candidate = (self.state + self.stepsize * self.local_scale.dot(grad_posterior[0])
+                        + np.sqrt(2.)* (fractional_matrix_power(self.local_scale, 0.5).dot(self._noise.rvs(self._shape))) * self._sqrt_step)
 
             if not np.all(np.isfinite(self.gradient_map(candidate)[0])):
                 self.stepsize *= 0.5
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index eae063fcc..26f370409 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1604,7 +1604,8 @@ def selective_MLE(observed_target,
     intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
                            final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
-    return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator
+    return final_estimator, observed_info_mean, Z_scores, pvalues, intervals, ind_unbiased_estimator, \
+           val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg)/2.
 
 
 def normalizing_constant(target_parameter,
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index a0a7ffb10..b67849eee 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -3,6 +3,7 @@
 from selectinf.randomized.lasso import lasso, selected_targets
 from selectinf.randomized.posterior_inference import posterior_inference_lasso
 
+
 def test_sampler(n=500,
                  p=100,
                  signal_fac=1.,
@@ -26,18 +27,19 @@ def test_sampler(n=500,
     n, p = X.shape
 
     sigma_ = np.std(Y)
-    W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
 
     conv = const(X,
                  Y,
                  W,
-                 randomizer_scale=randomizer_scale * sigma_)
+                 randomizer_scale=randomizer_scale * dispersion)
 
     signs = conv.fit()
     nonzero = signs != 0
 
     beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
     (observed_target,
      cov_target,
@@ -47,6 +49,12 @@ def test_sampler(n=500,
                                       nonzero,
                                       dispersion=dispersion)
 
+    _, inverse_info, _, _, _, _, log_ref = conv.selective_MLE(observed_target,
+                                               cov_target,
+                                               cov_target_score)
+
+    adaptive_ = np.linalg.inv(np.linalg.inv(inverse_info) + 1./100)
+
     posterior_inf = posterior_inference_lasso(observed_target,
                                               cov_target,
                                               cov_target_score,
@@ -58,13 +66,33 @@ def test_sampler(n=500,
                                               conv.b_scaling,
                                               observed_target)
 
-    samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, step=1.)
+    samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, local_scale = adaptive_, step=1.)
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
     coverage = (lci < beta_target) * (uci > beta_target)
     length = uci - lci
 
-    print("check ", coverage, length)
+    return np.mean(coverage), np.mean(length)
+
+
+def main(ndraw=10):
+
+    coverage_ = 0.
+    length_ = 0.
+    for n in range(ndraw):
+        cov, len = test_sampler(n=400,
+                                p=200,
+                                signal_fac=1.,
+                                s=5,
+                                sigma=2.,
+                                rho=0.4,
+                                randomizer_scale=1.)
+
+        coverage_ += cov
+        length_ += len
 
+        print("coverage so far ", coverage_ / (n + 1.))
+        print("lengths so far ", length_ / (n + 1.))
+        print("iteration completed ", n + 1)
 
-test_sampler()
+main(ndraw=10)

From 5b74daf4eb46801a66bc5be6b51a078f250a68d5 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Sat, 2 May 2020 01:50:28 -0400
Subject: [PATCH 032/187] added prior var in test

---
 selectinf/randomized/posterior_inference.py  | 10 ++++++----
 selectinf/randomized/tests/test_posterior.py | 11 +++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 6d9c69a36..1e444c080 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -17,7 +17,8 @@ def __init__(self,
                  logdens_linear,
                  linear_part,
                  offset,
-                 initial_estimate):
+                 initial_estimate,
+                 prior_var):
 
         self.ntarget = cov_target.shape[0]
         self.nopt = cond_cov.shape[0]
@@ -35,6 +36,7 @@ def __init__(self,
         self.offset = offset
 
         self.initial_estimate = initial_estimate
+        self.prior_var = prior_var
         self.set_marginal_parameters()
 
     def set_marginal_parameters(self):
@@ -57,10 +59,10 @@ def set_marginal_parameters(self):
 
         self.cov_marginal = implied_cov[self.ntarget:, self.ntarget:]
 
-    def prior(self, target_parameter, scale=1., prior_var=100.):
+    def prior(self, target_parameter, scale=1.):
 
-        grad_prior = -target_parameter/(scale* prior_var)
-        log_prior = -np.linalg.norm(target_parameter)**2 /(2.* scale * prior_var)
+        grad_prior = -target_parameter/(scale* self.prior_var)
+        log_prior = -np.linalg.norm(target_parameter)**2 /(2.* scale * self.prior_var)
 
         return grad_prior, log_prior
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index b67849eee..f9e58fffc 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -10,7 +10,8 @@ def test_sampler(n=500,
                  s=5,
                  sigma=3.,
                  rho=0.4,
-                 randomizer_scale=1.):
+                 randomizer_scale=1.,
+                 prior_var = 100.):
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
@@ -53,7 +54,7 @@ def test_sampler(n=500,
                                                cov_target,
                                                cov_target_score)
 
-    adaptive_ = np.linalg.inv(np.linalg.inv(inverse_info) + 1./100)
+    adaptive_ = np.linalg.inv(np.linalg.inv(inverse_info) + 1/prior_var)
 
     posterior_inf = posterior_inference_lasso(observed_target,
                                               cov_target,
@@ -64,7 +65,8 @@ def test_sampler(n=500,
                                               conv.logdens_linear,
                                               conv.A_scaling,
                                               conv.b_scaling,
-                                              observed_target)
+                                              observed_target,
+                                              prior_var)
 
     samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, local_scale = adaptive_, step=1.)
     lci = np.percentile(samples, 5, axis=0)
@@ -86,7 +88,8 @@ def main(ndraw=10):
                                 s=5,
                                 sigma=2.,
                                 rho=0.4,
-                                randomizer_scale=1.)
+                                randomizer_scale=1.,
+                                prior_var =100)
 
         coverage_ += cov
         length_ += len

From 58e9d85da72daa07eceeae7858f398cd78571ddd Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Sun, 3 May 2020 17:34:29 -0400
Subject: [PATCH 033/187] added both samplers to posterior inference

---
 selectinf/randomized/posterior_inference.py  | 51 +++++++++++++++-----
 selectinf/randomized/tests/test_posterior.py | 23 +++++----
 2 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 1e444c080..731b9859a 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -18,6 +18,8 @@ def __init__(self,
                  linear_part,
                  offset,
                  initial_estimate,
+                 log_ref,
+                 dispersion,
                  prior_var):
 
         self.ntarget = cov_target.shape[0]
@@ -37,6 +39,9 @@ def __init__(self,
 
         self.initial_estimate = initial_estimate
         self.prior_var = prior_var
+        self.dispersion = dispersion
+        self.log_ref = log_ref
+
         self.set_marginal_parameters()
 
     def set_marginal_parameters(self):
@@ -66,7 +71,7 @@ def prior(self, target_parameter, scale=1.):
 
         return grad_prior, log_prior
 
-    def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
+    def log_posterior(self, target_parameter, scale=1., solve_args={'tol':1.e-12}):
 
         mean_marginal = self.linear_coef.dot(target_parameter) + self.offset_coef
         prec_marginal = np.linalg.inv(self.cov_marginal)
@@ -91,28 +96,50 @@ def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
 
         grad_prior, log_prior = self.prior(target_parameter)
 
-        return grad_lik + grad_prior, log_lik + log_prior
+        return self.dispersion * grad_lik/scale + grad_prior, self.dispersion * log_lik/scale + log_prior - (self.dispersion* self.log_ref / scale)
 
-    def posterior_sampler(self, nsample= 2000, nburnin=100, local_scale = np.identity, step=1.):
+    def langevin_sampler(self, nsample= 2000, nburnin=100, proposal_scale = np.identity, step=1.):
 
         state = self.initial_estimate
         stepsize = 1. / (step * self.ntarget)
 
-        sampler = langevin(state, self.log_posterior, local_scale, stepsize)
+        sampler = langevin(state, self.log_posterior, proposal_scale, stepsize)
+        samples = np.zeros((nsample, self.ntarget))
+
+        for i in range(nsample):
+            sampler.next(scaling_ = self.dispersion)
+            sys.stderr.write("sample number: " + str(i) + "sample: " + str(sampler.state.copy()) + "\n")
+            samples[i, :] = sampler.state.copy()
+
+        return samples[nburnin:, :]
+
+    def gibbs_sampler(self, nsample= 2000, nburnin=100, proposal_scale = np.identity, step=1.):
+
+        state = self.initial_estimate
+        scale_state = self.dispersion
+        stepsize = 1. /step
+
+        sampler = langevin(state, self.log_posterior, proposal_scale, stepsize)
         samples = np.zeros((nsample, self.ntarget))
 
         for i in range(nsample):
-            sampler.next()
-            sys.stderr.write("sample number: " + str(i) + "sample: " + str(sampler.state.copy())+ "\n")
+            sampler.next(scaling_=scale_state)
+            scale_update = invgamma.rvs(a=(0.001 + self.ntarget), scale=0.001 - (scale_state * sampler.grad_posterior[1]), size=1)
+
+            scale_state = scale_update
             samples[i, :] = sampler.state.copy()
+            sys.stderr.write("sample number: " + str(i) + "sample: " + str(samples[i, :]) + "\n")
+            sys.stderr.write("sample number: " + str(i) + "sigma: " + str(scale_state) + "\n")
+
         return samples[nburnin:, :]
 
+
 class langevin(object):
 
     def __init__(self,
                  initial_condition,
                  gradient_map,
-                 local_scale,
+                 proposal_scale,
                  stepsize):
 
         (self.state,
@@ -120,7 +147,7 @@ def __init__(self,
          self.stepsize) = (np.copy(initial_condition),
                            gradient_map,
                            stepsize)
-        self.local_scale = local_scale
+        self.proposal_scale = proposal_scale
         self._shape = self.state.shape[0]
         self._sqrt_step = np.sqrt(self.stepsize)
         self._noise = ndist(loc=0,scale=1)
@@ -129,11 +156,11 @@ def __init__(self,
     def __iter__(self):
         return self
 
-    def next(self):
+    def next(self, scaling_):
         while True:
-            grad_posterior = self.gradient_map(self.state)
-            candidate = (self.state + self.stepsize * self.local_scale.dot(grad_posterior[0])
-                        + np.sqrt(2.)* (fractional_matrix_power(self.local_scale, 0.5).dot(self._noise.rvs(self._shape))) * self._sqrt_step)
+            self.grad_posterior = self.gradient_map(self.state, scaling_)
+            candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.grad_posterior[0])
+                        + np.sqrt(2.)* (fractional_matrix_power(self.proposal_scale, 0.5).dot(self._noise.rvs(self._shape))) * self._sqrt_step)
 
             if not np.all(np.isfinite(self.gradient_map(candidate)[0])):
                 self.stepsize *= 0.5
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index f9e58fffc..556441b04 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -4,7 +4,7 @@
 from selectinf.randomized.posterior_inference import posterior_inference_lasso
 
 
-def test_sampler(n=500,
+def test_Langevin(n=500,
                  p=100,
                  signal_fac=1.,
                  s=5,
@@ -66,9 +66,11 @@ def test_sampler(n=500,
                                               conv.A_scaling,
                                               conv.b_scaling,
                                               observed_target,
+                                              log_ref,
+                                              dispersion,
                                               prior_var)
 
-    samples = posterior_inf.posterior_sampler(nsample=2000, nburnin=200, local_scale = adaptive_, step=1.)
+    samples = posterior_inf.langevin_sampler(nsample=2000, nburnin=200, proposal_scale=adaptive_, step=1.)
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
     coverage = (lci < beta_target) * (uci > beta_target)
@@ -77,19 +79,20 @@ def test_sampler(n=500,
     return np.mean(coverage), np.mean(length)
 
 
+
 def main(ndraw=10):
 
     coverage_ = 0.
     length_ = 0.
     for n in range(ndraw):
-        cov, len = test_sampler(n=400,
-                                p=200,
-                                signal_fac=1.,
-                                s=5,
-                                sigma=2.,
-                                rho=0.4,
-                                randomizer_scale=1.,
-                                prior_var =100)
+        cov, len = test_Langevin(n=500,
+                                 p=200,
+                                 signal_fac=1.5,
+                                 s=5,
+                                 sigma=2.,
+                                 rho=0.2,
+                                 randomizer_scale=1.,
+                                 prior_var =100)
 
         coverage_ += cov
         length_ += len

From 202ee2e28ad7279c8b394fed998fc1611280ede1 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Mon, 4 May 2020 01:23:12 -0400
Subject: [PATCH 034/187] fixed subgradient in split lasso

---
 selectinf/randomized/lasso.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index c9a5ec466..a4be3683d 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -928,15 +928,15 @@ def _solve_randomized_problem(self,
         quad = rr.identity_quadratic(self.ridge_term,
                                      0,
                                      0,
-                                     0,)
+                                     0)
         
         randomized_loss = self.loglike.subsample(self._selection_idx)
         randomized_loss.coef *= inv_frac
 
         problem = rr.simple_problem(randomized_loss, self.penalty)
         initial_soln = problem.solve(quad, **solve_args) 
-        initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 
-                                                          'grad') +
+        initial_subgrad = -(randomized_loss.smooth_objective(initial_soln,
+                                                             'grad') +
                             quad.objective(initial_soln, 'grad'))
 
         return initial_soln, initial_subgrad

From bfc82d06e0e86830f0be39cee0dd5702a10ea7d2 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Sun, 17 May 2020 14:14:50 -0400
Subject: [PATCH 035/187] posterior samplers-- some changes

---
 selectinf/randomized/posterior_inference.py  | 4 ++--
 selectinf/randomized/tests/test_posterior.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 731b9859a..2af6b12ea 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -117,14 +117,14 @@ def gibbs_sampler(self, nsample= 2000, nburnin=100, proposal_scale = np.identity
 
         state = self.initial_estimate
         scale_state = self.dispersion
-        stepsize = 1. /step
+        stepsize = 1. /(step* self.ntarget)
 
         sampler = langevin(state, self.log_posterior, proposal_scale, stepsize)
         samples = np.zeros((nsample, self.ntarget))
 
         for i in range(nsample):
             sampler.next(scaling_=scale_state)
-            scale_update = invgamma.rvs(a=(0.001 + self.ntarget), scale=0.001 - (scale_state * sampler.grad_posterior[1]), size=1)
+            scale_update = invgamma.rvs(a=(0.1 + self.ntarget + self.ntarget/2), scale=0.1 - (scale_state * sampler.grad_posterior[1]), size=1)
 
             scale_state = scale_update
             samples[i, :] = sampler.state.copy()
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 556441b04..f8c4973e9 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -70,7 +70,7 @@ def test_Langevin(n=500,
                                               dispersion,
                                               prior_var)
 
-    samples = posterior_inf.langevin_sampler(nsample=2000, nburnin=200, proposal_scale=adaptive_, step=1.)
+    samples = posterior_inf.langevin_sampler(nsample=2000, nburnin=200, proposal_scale=adaptive_, step=1)
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
     coverage = (lci < beta_target) * (uci > beta_target)
@@ -79,7 +79,6 @@ def test_Langevin(n=500,
     return np.mean(coverage), np.mean(length)
 
 
-
 def main(ndraw=10):
 
     coverage_ = 0.

From fa23e8902faf172602a32b530230212c89541ea7 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Mon, 22 Jun 2020 11:16:13 -0400
Subject: [PATCH 036/187] added test instances

---
 selectinf/randomized/posterior_inference.py   |  6 +-
 selectinf/randomized/tests/test_posterior.py  | 91 ++++++++++++++++---
 .../tests/test_selective_MLE_high.py          | 60 +++++++++++-
 3 files changed, 137 insertions(+), 20 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 2af6b12ea..dbaee2faf 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -86,7 +86,7 @@ def log_posterior(self, target_parameter, scale=1., solve_args={'tol':1.e-12}):
                                  self.offset,
                                  **solve_args)
 
-        log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal)/2
+        log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal)/2.
 
         log_lik = -((self.observed_target - target_parameter).T.dot(self.prec_target).dot(self.observed_target - target_parameter)) / 2.\
                   - log_normalizer
@@ -96,7 +96,7 @@ def log_posterior(self, target_parameter, scale=1., solve_args={'tol':1.e-12}):
 
         grad_prior, log_prior = self.prior(target_parameter)
 
-        return self.dispersion * grad_lik/scale + grad_prior, self.dispersion * log_lik/scale + log_prior - (self.dispersion* self.log_ref / scale)
+        return self.dispersion * grad_lik/scale + grad_prior, self.dispersion * log_lik/scale + log_prior - (self.dispersion* self.log_ref/scale)
 
     def langevin_sampler(self, nsample= 2000, nburnin=100, proposal_scale = np.identity, step=1.):
 
@@ -123,7 +123,7 @@ def gibbs_sampler(self, nsample= 2000, nburnin=100, proposal_scale = np.identity
         samples = np.zeros((nsample, self.ntarget))
 
         for i in range(nsample):
-            sampler.next(scaling_=scale_state)
+            sampler.next(scaling_= scale_state)
             scale_update = invgamma.rvs(a=(0.1 + self.ntarget + self.ntarget/2), scale=0.1 - (scale_state * sampler.grad_posterior[1]), size=1)
 
             scale_state = scale_update
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index f8c4973e9..f18dc0bde 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -54,7 +54,11 @@ def test_Langevin(n=500,
                                                cov_target,
                                                cov_target_score)
 
-    adaptive_ = np.linalg.inv(np.linalg.inv(inverse_info) + 1/prior_var)
+    adaptive_ = np.linalg.inv(np.linalg.inv(inverse_info) + np.identity(observed_target.shape[0])/ prior_var)
+
+    A_scaling = conv.sampler.affine_con.linear_part
+    b_scaling = conv.sampler.affine_con.offset
+    logdens_linear = conv.sampler.logdens_transform[0]
 
     posterior_inf = posterior_inference_lasso(observed_target,
                                               cov_target,
@@ -62,9 +66,68 @@ def test_Langevin(n=500,
                                               conv.observed_opt_state,
                                               conv.cond_mean,
                                               conv.cond_cov,
-                                              conv.logdens_linear,
-                                              conv.A_scaling,
-                                              conv.b_scaling,
+                                              logdens_linear,
+                                              A_scaling,
+                                              b_scaling,
+                                              observed_target,
+                                              log_ref, ## extra argument introduced for Gibbs update of sigma
+                                              dispersion, ## scale back the likelihood if sigma is unknown
+                                              prior_var ## prior var for the Gaussian prior
+                                              )
+
+    samples = posterior_inf.langevin_sampler(nsample=2000, nburnin=200, proposal_scale=adaptive_, step=1)
+    lci = np.percentile(samples, 5, axis=0)
+    uci = np.percentile(samples, 95, axis=0)
+    coverage = (lci < beta_target) * (uci > beta_target)
+    length = uci - lci
+
+    return np.mean(coverage), np.mean(length)
+
+def test_instance():
+
+    n, p, s = 500, 100, 5
+    prior_var = 100.
+    X = np.random.standard_normal((n, p))
+    beta = np.zeros(p)
+    #beta[:s] = np.sqrt(2 * np.log(p) / n)
+    Y = X.dot(beta) + np.random.standard_normal(n)
+
+    scale_ = np.std(Y)
+    # uses noise of variance n * scale_ / 4 by default
+    L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n)))
+    signs = L.fit()
+    E = (signs != 0)
+
+    M = E.copy()
+    M[-3:] = 1
+    dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+    (observed_target,
+     cov_target,
+     cov_target_score,
+     alternatives) = selected_targets(L.loglike,
+                                      L._W,
+                                      M,
+                                      dispersion=dispersion)
+
+    print("check shapes", observed_target.shape, E.sum())
+    _, inverse_info, _, _, _, _, log_ref = L.selective_MLE(observed_target,
+                                                              cov_target,
+                                                              cov_target_score)
+
+    adaptive_ = np.linalg.inv(np.linalg.inv(inverse_info) + np.identity(observed_target.shape[0])/ prior_var)
+    A_scaling = L.sampler.affine_con.linear_part
+    b_scaling = L.sampler.affine_con.offset
+    logdens_linear = L.sampler.logdens_transform[0]
+
+    posterior_inf = posterior_inference_lasso(observed_target,
+                                              cov_target,
+                                              cov_target_score,
+                                              L.observed_opt_state,
+                                              L.cond_mean,
+                                              L.cond_cov,
+                                              logdens_linear,
+                                              A_scaling,
+                                              b_scaling,
                                               observed_target,
                                               log_ref,
                                               dispersion,
@@ -73,6 +136,8 @@ def test_Langevin(n=500,
     samples = posterior_inf.langevin_sampler(nsample=2000, nburnin=200, proposal_scale=adaptive_, step=1)
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
+
+    beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
     coverage = (lci < beta_target) * (uci > beta_target)
     length = uci - lci
 
@@ -84,14 +149,16 @@ def main(ndraw=10):
     coverage_ = 0.
     length_ = 0.
     for n in range(ndraw):
-        cov, len = test_Langevin(n=500,
-                                 p=200,
-                                 signal_fac=1.5,
-                                 s=5,
-                                 sigma=2.,
-                                 rho=0.2,
-                                 randomizer_scale=1.,
-                                 prior_var =100)
+        # cov, len = test_Langevin(n=500,
+        #                          p=200,
+        #                          signal_fac=1.5,
+        #                          s=5,
+        #                          sigma=2.,
+        #                          rho=0.2,
+        #                          randomizer_scale=1.,
+        #                          prior_var =100)
+
+        cov, len = test_instance()
 
         coverage_ += cov
         length_ += len
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 4a4d4a8a5..ea98a9c02 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -1,8 +1,8 @@
 import numpy as np
 import nose.tools as nt
 
-from ..lasso import lasso, full_targets, selected_targets, debiased_targets
-from ...tests.instance import gaussian_instance
+from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
+from selectinf.tests.instance import gaussian_instance
 
 def test_full_targets(n=200, 
                       p=1000, 
@@ -81,7 +81,7 @@ def test_full_targets(n=200,
 
 def test_selected_targets(n=2000, 
                           p=200, 
-                          signal_fac=1., 
+                          signal_fac=10.,
                           s=5, 
                           sigma=3, 
                           rho=0.4, 
@@ -120,6 +120,7 @@ def test_selected_targets(n=2000,
 
         signs = conv.fit()
         nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
 
         if nonzero.sum() > 0:
             dispersion = None
@@ -134,7 +135,7 @@ def test_selected_targets(n=2000,
                                               nonzero, 
                                               dispersion=dispersion)
 
-            estimate, _, _, pval, intervals, _ = conv.selective_MLE(observed_target,
+            estimate, _, _, pval, intervals, _, _ = conv.selective_MLE(observed_target,
                                                                     cov_target,
                                                                     cov_target_score)
 
@@ -148,7 +149,7 @@ def main(nsim=500, full=False):
     P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
 
-    n, p, s = 500, 100, 10
+    n, p, s = 500, 100, 5
 
     for i in range(nsim):
         if full:
@@ -171,3 +172,52 @@ def main(nsim=500, full=False):
             np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
             np.mean(avg_length), 'null pvalue + power + length')
 
+
+def test_instance():
+
+    n, p, s = 500, 100, 5
+    X = np.random.standard_normal((n, p))
+    beta = np.zeros(p)
+    #beta[:s] = np.sqrt(2 * np.log(p) / n)
+    Y = X.dot(beta) + np.random.standard_normal(n)
+
+    scale_ = np.std(Y)
+    # uses noise of variance n * scale_ / 4 by default
+    L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n)))
+    signs = L.fit()
+    E = (signs != 0)
+
+    M = E.copy()
+    M[-3:] = 1
+    dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+    (observed_target,
+     cov_target,
+     cov_target_score,
+     alternatives) = selected_targets(L.loglike,
+                                      L._W,
+                                      M,
+                                      dispersion=dispersion)
+
+    print("check shapes", observed_target.shape, E.sum())
+
+    estimate, _, _, pval, intervals, _, _ = L.selective_MLE(observed_target,
+                                                            cov_target,
+                                                            cov_target_score)
+
+    beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
+
+    coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+
+    return coverage
+
+def main(nsim=500):
+
+    cover = []
+    for i in range(nsim):
+
+        cover_ = test_instance()
+        cover.extend(cover_)
+        print(np.mean(cover), 'coverage so far ')
+
+
+main(nsim=500)
\ No newline at end of file

From 19bbd55777ca5c11f8a329f309f5ac68cb48029d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 23 Jun 2020 09:25:15 -0700
Subject: [PATCH 037/187] some cleanup

---
 selectinf/learning/utils.py                 |  2 ++
 selectinf/randomized/posterior_inference.py | 18 +++++++++++++-----
 selectinf/randomized/query.py               |  4 ++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/selectinf/learning/utils.py b/selectinf/learning/utils.py
index d68bc5b6a..6a0cf897a 100644
--- a/selectinf/learning/utils.py
+++ b/selectinf/learning/utils.py
@@ -402,8 +402,10 @@ def naive_partial_model_inference(X,
         return pd.DataFrame({'naive_pivot':naive_pivots,
                              'naive_coverage':naive_covered,
                              'naive_length':naive_lengths,
+                             'naive_pvalue':naive_pvalues,
                              'nfeature':X.shape[1],
                              'naive_lower':naive_lower,
+                             'naive_upper':naive_upper,
                              'target':final_target,
                              'variable':observed_list
                              })
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 99b8be9e2..19076431e 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -3,9 +3,10 @@
 import numpy as np
 
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from .query import _solve_barrier_affine_py
 from scipy.stats import norm as ndist
 
-class posterior_inference_lasso():
+class posterior_inference_lasso(object):
 
     def __init__(self,
                  observed_target,
@@ -17,7 +18,8 @@ def __init__(self,
                  logdens_linear,
                  linear_part,
                  offset,
-                 initial_estimate):
+                 initial_estimate,
+                 prior_var=100.):
 
         self.ntarget = cov_target.shape[0]
         self.nopt = cond_cov.shape[0]
@@ -38,6 +40,8 @@ def __init__(self,
 
         self.set_marginal_parameters()
 
+        self.prior_var = prior_var
+
     def set_marginal_parameters(self):
 
         target_linear = -self.logdens_linear.dot(self.cov_target_score.T.dot(self.prec_target))
@@ -58,8 +62,8 @@ def set_marginal_parameters(self):
 
         self.cov_marginal = implied_cov[self.ntarget:, self.ntarget:]
 
-    def prior(self, target_parameter, prior_var=100.):
-
+    def prior(self, target_parameter):
+        prior_var = self.prior_var
         grad_prior = -target_parameter/prior_var
         log_prior = -np.linalg.norm(target_parameter)/(2.*prior_var)
         return grad_prior, log_prior
@@ -70,7 +74,11 @@ def log_posterior(self, target_parameter, solve_args={'tol':1.e-12}):
         prec_marginal = np.linalg.inv(self.cov_marginal)
         conjugate_marginal = prec_marginal.dot(mean_marginal)
 
-        solver = solve_barrier_affine_C
+        useC = True
+        if useC:
+            solver = solve_barrier_affine_C
+        else:
+            solver = _solve_barrier_affine_py
 
         val, soln, hess = solver(conjugate_marginal,
                                  prec_marginal,
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 06396878e..73d7aeb87 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1600,7 +1600,7 @@ def selective_MLE(observed_target,
     if useC:
         solver = solve_barrier_affine_C
     else:
-        solver = solve_barrier_affine_py
+        solver = _solve_barrier_affine_py
 
     val, soln, hess = solver(conjugate_arg,
                              prec_opt,
@@ -1734,7 +1734,7 @@ def normalizing_constant(target_parameter,
     if useC:
         solver = solve_barrier_affine_C
     else:
-        solver = solve_barrier_affine_py
+        solver = _solve_barrier_affine_py
 
     value, soln, hess = solver(-linear_term,
                                 full_Q,

From 3932618e6f4e3328ecf36b7bd31273a067f7c062 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 23 Jun 2020 09:30:11 -0700
Subject: [PATCH 038/187] using data frame summary output

---
 selectinf/algorithms/tests/test_compareR.py | 50 +++++++++++++++------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/selectinf/algorithms/tests/test_compareR.py b/selectinf/algorithms/tests/test_compareR.py
index 51ba177cf..3727fe548 100644
--- a/selectinf/algorithms/tests/test_compareR.py
+++ b/selectinf/algorithms/tests/test_compareR.py
@@ -972,21 +972,45 @@ def test_rlasso_gaussian():
                                                   nonzero,
                                                   penalty=L.penalty)
 
-            _, pval, intervals = L.summary(observed_target, 
-                                           cov_target, 
-                                           cov_target_score, 
-                                           alternatives,
-                                           opt_sample=(np.asarray(R_opt_samples),),
-                                           target_sample=np.asarray(R_target_samples),
-                                           ndraw=8000,#ndraw,
-                                           burnin=burnin, 
-                                           compute_intervals=True)
+            result = L.summary(observed_target, 
+                               cov_target, 
+                               cov_target_score, 
+                               alternatives,
+                               opt_sample=(np.asarray(R_opt_samples),),
+                               target_sample=np.asarray(R_target_samples),
+                               ndraw=8000,#ndraw,
+                               burnin=burnin, 
+                               compute_intervals=True)
+            pval = np.asarray(result['pvalue'])
 
             tol = 1.e-5
-            yield np.testing.assert_allclose, initial_soln, R_soln, tol, tol, False, 'checking initial rlasso solution'
-            yield np.testing.assert_allclose, cond_mean, R_cond_mean, tol, tol, False, 'checking conditional mean'
-            yield np.testing.assert_allclose, cond_cov, R_cond_cov, tol, tol, False, 'checking conditional covariance'
-            yield np.testing.assert_allclose, pval, R_pvalues, tol, tol, False, 'checking pvalues'
+            yield (np.testing.assert_allclose, 
+                   initial_soln, 
+                   R_soln, 
+                   tol, 
+                   tol, 
+                   False, 'checking initial rlasso solution')
+            yield (np.testing.assert_allclose, 
+                   cond_mean, 
+                   R_cond_mean, 
+                   tol, 
+                   tol, 
+                   False, 
+                   'checking conditional mean')
+            yield (np.testing.assert_allclose, 
+                   cond_cov, 
+                   R_cond_cov, 
+                   tol, 
+                   tol, 
+                   False, 
+                   'checking conditional covariance')
+            yield (np.testing.assert_allclose, 
+                   pval, 
+                   R_pvalues, 
+                   tol, 
+                   tol, 
+                   False, 
+                   'checking pvalues')
 
             break
 

From af7705a75cd4c58ea97e63c739c12826b0bbbfae Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 23 Jun 2020 09:31:15 -0700
Subject: [PATCH 039/187] drop the losers query

---
 selectinf/randomized/drop_losers.py | 137 ++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 selectinf/randomized/drop_losers.py

diff --git a/selectinf/randomized/drop_losers.py b/selectinf/randomized/drop_losers.py
new file mode 100644
index 000000000..be1287fc5
--- /dev/null
+++ b/selectinf/randomized/drop_losers.py
@@ -0,0 +1,137 @@
+from __future__ import print_function
+
+import numpy as np
+import pandas as pd
+
+from .query import gaussian_query
+
+from .randomization import randomization
+
+class drop_losers(gaussian_query):
+
+    def __init__(self,
+                 df,   # should have columns 'arm', 'stage', 'data'
+                 K=1): # how many should we move forward?
+
+        self.df = df
+        self.K = K
+
+        grouped_arm = df.groupby('arm')
+        self.std = grouped_arm.std()['data']
+        self.means = grouped_arm.mean()['data']
+        self.stages = dict([(k, v) for k, v in df.groupby('stage')])
+        stage1 = df['stage'].min()
+        stage2 = df['stage'].max()
+        
+        df1 = self.stages[stage1]
+        df2 = self.stages[stage2]
+
+        stage1_means = df1.groupby('arm').mean().sort_values('data', ascending=False)
+        self._winners = sorted(list(stage1_means.index[:K]))
+        best_loser = stage1_means['data'].iloc[K]
+
+        n1 = df1.groupby('arm').count()
+        n2 = df2.groupby('arm').count()
+        self._n1_win = n1_win = np.array([n1.loc[lambda df: df.index == winner]['data'].iloc[0] 
+                                          for winner in self._winners])
+        self._n2_win = n2_win = np.array([n2.loc[lambda df: df.index == winner]['data'].iloc[0] 
+                                          for winner in self._winners])
+        std_win = self.std.loc[self._winners]
+
+        A = -np.identity(K)
+        b = -np.ones(K) * best_loser
+        linear = np.identity(K)
+        offset = np.zeros(K)
+        
+        # Work out the implied randomization variance
+        # Let X1=X[stage1].mean(), X2=X[stage2].mean() and Xf = X.mean()
+        # with n1=len(stage1), n2=len(stage2)
+
+        # X1 = Xf + n2/n1 * (Xf-X2)
+        #    = Xf + n2/(n1+n2) * (X1-X2)
+        # so randomization term is w=n2/(n1+n2) * (X1-X2)
+        # with variance 
+        # n2**2 / (n1+n2)**2 * (1/n1 + 1/n2) 
+        # = n2**2 / (n1+n2)**2 * (n1+n2) / (n1*n2)
+        # = n2 / (n1 * (n1 + n2))
+
+        mult = n2_win / (n1_win * (n1_win + n2_win))
+
+        # needed for gaussian_query api
+
+        self.randomizer = randomization.gaussian(np.diag(std_win**2) * mult)
+        self.observed_opt_state = stage1_means['data'].iloc[:K]
+        self.observed_score_state = -self.means[self._winners] # problem is a minimization
+        self.selection_variable = {'winners':self._winners}
+
+        self._setup_sampler(A, b, linear, offset)
+
+    def selective_MLE(self,
+                      level=0.9,
+                      solve_args={'tol':1.e-12}):
+        """
+
+        Parameters
+        ----------
+
+        level : float, optional
+            Confidence level.
+
+        solve_args : dict, optional
+            Arguments passed to solver.
+
+        """
+        
+        observed_target = self.means[self._winners]
+        std_win = self.std.loc[self._winners]
+        target_cov = np.diag(std_win**2 / (self._n1_win + self._n2_win))
+        target_score_cov = -target_cov
+        
+        result = gaussian_query.selective_MLE(self,
+                                              observed_target,
+                                              target_cov,
+                                              target_score_cov,
+                                              level=level,
+                                              solve_args=solve_args)
+        result[0].insert(0, 'arm', self._winners)
+        return result
+
+    def summary(self,
+                level=0.9,
+                ndraw=10000,
+                burnin=2000):
+
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        level : float
+            Confidence level.
+
+        ndraw : int (optional)
+            Defaults to 1000.
+
+        burnin : int (optional)
+            Defaults to 1000.
+
+        """
+        observed_target = self.means[self._winners]
+        std_win = self.std.loc[self._winners]
+        target_cov = np.diag(std_win**2 / (self._n1_win + self._n2_win))
+        target_score_cov = -target_cov
+
+        result = gaussian_query.summary(self,
+                                        observed_target,
+                                        target_cov,
+                                        target_score_cov,
+                                        alternatives=['twosided']*self.K,
+                                        ndraw=ndraw,
+                                        level=level,
+                                        burnin=burnin,
+                                        compute_intervals=True)
+        result.insert(0, 'arm', self._winners)
+        return result
+                             

From 657a212178a2e44c21d69b299b574dd5d998c14e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 23 Jun 2020 09:42:57 -0700
Subject: [PATCH 040/187] make sure we get no 0-sized samples

---
 selectinf/randomized/tests/test_drop_losers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/tests/test_drop_losers.py b/selectinf/randomized/tests/test_drop_losers.py
index 4d78d8afc..6322f5e66 100644
--- a/selectinf/randomized/tests/test_drop_losers.py
+++ b/selectinf/randomized/tests/test_drop_losers.py
@@ -73,7 +73,7 @@ def test_compare_topK(p=20,
     winners = list(stage1_means.index[:K])
 
     for winner in winners:
-        N = int(np.random.poisson(30, size=(1,)))
+        N = int(np.random.poisson(30, size=(1,))) + 5
         arm.extend([winner]*N)
         stage.extend([2]*N)
         data.extend(list(np.random.standard_normal(N)))
@@ -89,7 +89,7 @@ def test_compare_topK(p=20,
 
     for a in range(p):
         if a not in winners:
-            N = int(np.random.poisson(30, size=(1,)))
+            N = int(np.random.poisson(30, size=(1,))) + 5
             arm.extend([a]*N)
             stage.extend([2]*N)
             data.extend(list(np.random.standard_normal(N)))

From 0b7d8a3065d0c3d25628e8654232421784d823bf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 23 Jun 2020 09:44:05 -0700
Subject: [PATCH 041/187] import integer division

---
 selectinf/randomized/drop_losers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selectinf/randomized/drop_losers.py b/selectinf/randomized/drop_losers.py
index be1287fc5..ffe2804ca 100644
--- a/selectinf/randomized/drop_losers.py
+++ b/selectinf/randomized/drop_losers.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function, division
 
 import numpy as np
 import pandas as pd

From 5d0b12808e56f6b7d04ab842bd7562306adf49c4 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Wed, 24 Jun 2020 11:31:45 -0400
Subject: [PATCH 042/187] added code for selective_mle related outputs

---
 selectinf/randomized/tests/test_cv_mle.py | 644 ++++++++++++++++++++++
 1 file changed, 644 insertions(+)
 create mode 100644 selectinf/randomized/tests/test_cv_mle.py

diff --git a/selectinf/randomized/tests/test_cv_mle.py b/selectinf/randomized/tests/test_cv_mle.py
new file mode 100644
index 000000000..052824840
--- /dev/null
+++ b/selectinf/randomized/tests/test_cv_mle.py
@@ -0,0 +1,644 @@
+import numpy as np, os, itertools
+import pandas as pd
+
+import rpy2.robjects as rpy
+from rpy2.robjects import numpy2ri
+rpy.numpy2ri.activate()
+
+from scipy.stats import norm as ndist
+from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
+#from selection.algorithms.lasso import lasso as lasso_full
+
+def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
+
+    rpy.r('''
+            source('~/best-subset/bestsubset/R/sim.R')
+            sim_xy = sim.xy
+            ''')
+
+    r_simulate = rpy.globalenv['sim_xy']
+    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
+    X = np.array(sim.rx2('x'))
+    y = np.array(sim.rx2('y'))
+    X_val = np.array(sim.rx2('xval'))
+    y_val = np.array(sim.rx2('yval'))
+    Sigma = np.array(sim.rx2('Sigma'))
+    beta = np.array(sim.rx2('beta'))
+    sigma = np.array(sim.rx2('sigma'))
+
+    return X, y, X_val, y_val, Sigma, beta, sigma
+
+
+def selInf_R(X, y, beta, lam, sigma, Type, alpha=0.1):
+    rpy.r('''
+               library("selectiveInference")
+               selInf = function(X, y, beta, lam, sigma, Type, alpha= 0.1){
+               y = as.matrix(y)
+               X = as.matrix(X)
+               beta = as.matrix(beta)
+               lam = as.matrix(lam)[1,1]
+               sigma = as.matrix(sigma)[1,1]
+               Type = as.matrix(Type)[1,1]
+               if(Type == 1){
+                   type = "full"} else{
+                   type = "partial"}
+               inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian",
+                                   intercept=FALSE, sigma=sigma, alpha=alpha, type=type)
+               return(list(ci = inf$ci, pvalue = inf$pv))}
+               ''')
+
+    inf_R = rpy.globalenv['selInf']
+    n, p = X.shape
+    r_X = rpy.r.matrix(X, nrow=n, ncol=p)
+    r_y = rpy.r.matrix(y, nrow=n, ncol=1)
+    r_beta = rpy.r.matrix(beta, nrow=p, ncol=1)
+    r_lam = rpy.r.matrix(lam, nrow=1, ncol=1)
+    r_sigma = rpy.r.matrix(sigma, nrow=1, ncol=1)
+    r_Type = rpy.r.matrix(Type, nrow=1, ncol=1)
+    output = inf_R(r_X, r_y, r_beta, r_lam, r_sigma, r_Type)
+    ci = np.array(output.rx2('ci'))
+    pvalue = np.array(output.rx2('pvalue'))
+    return ci, pvalue
+
+
+def glmnet_lasso(X, y, lambda_val):
+    rpy.r('''
+                library(glmnet)
+                glmnet_LASSO = function(X,y, lambda){
+                y = as.matrix(y)
+                X = as.matrix(X)
+                lam = as.matrix(lambda)[1,1]
+                n = nrow(X)
+
+                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
+                estimate = coef(fit, s=lam, exact=TRUE, x=X, y=y)[-1]
+                fit.cv = cv.glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
+                estimate.1se = coef(fit.cv, s='lambda.1se', exact=TRUE, x=X, y=y)[-1]
+                estimate.min = coef(fit.cv, s='lambda.min', exact=TRUE, x=X, y=y)[-1]
+                return(list(estimate = estimate, estimate.1se = estimate.1se, estimate.min = estimate.min, lam.min = fit.cv$lambda.min, lam.1se = fit.cv$lambda.1se))
+                }''')
+
+    lambda_R = rpy.globalenv['glmnet_LASSO']
+    n, p = X.shape
+    r_X = rpy.r.matrix(X, nrow=n, ncol=p)
+    r_y = rpy.r.matrix(y, nrow=n, ncol=1)
+    r_lam = rpy.r.matrix(lambda_val, nrow=1, ncol=1)
+
+    estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate'))
+    estimate_1se = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.1se'))
+    estimate_min = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.min'))
+    lam_min = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.min')))
+    lam_1se = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.1se')))
+    return estimate, estimate_1se, estimate_min, lam_min, lam_1se
+
+
+def coverage(intervals, pval, target, truth):
+    pval_alt = (pval[truth != 0]) < 0.1
+    if pval_alt.sum() > 0:
+        avg_power = np.mean(pval_alt)
+    else:
+        avg_power = 0.
+    return np.mean((target > intervals[:, 0]) * (target < intervals[:, 1])), avg_power
+
+
+def BHfilter(pval, q=0.2):
+    rpy.r.assign('pval', pval)
+    rpy.r.assign('q', q)
+    rpy.r('Pval = p.adjust(pval, method="BH")')
+    rpy.r('S = which((Pval < q)) - 1')
+    S = rpy.r('S')
+    ind = np.zeros(pval.shape[0], np.bool)
+    ind[np.asarray(S, np.int)] = 1
+    return ind
+
+
+def relative_risk(est, truth, Sigma):
+    if (truth != 0).sum > 0:
+        return (est - truth).T.dot(Sigma).dot(est - truth) / truth.T.dot(Sigma).dot(truth)
+    else:
+        return (est - truth).T.dot(Sigma).dot(est - truth)
+
+
+def comparison_cvmetrics_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
+                                  randomizer_scale=np.sqrt(0.50), full_dispersion=True,
+                                  tuning_nonrand="lambda.min", tuning_rand="lambda.1se"):
+
+    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+    true_mean = X.dot(beta)
+    print("snr", snr)
+    X -= X.mean(0)[None, :]
+    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
+    y = y - y.mean()
+    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
+
+    if full_dispersion:
+        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        sigma_ = np.sqrt(dispersion)
+    else:
+        dispersion = None
+        sigma_ = np.std(y)
+    print("estimated and true sigma", sigma, sigma_)
+
+    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+    glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory/float(n))
+    if tuning_nonrand == "lambda.min":
+        lam_LASSO = lam_min
+        glm_LASSO = glm_LASSO_min
+    elif tuning_nonrand == "lambda.1se":
+        lam_LASSO = lam_1se
+        glm_LASSO = glm_LASSO_1se
+    else:
+        lam_LASSO = lam_theory/float(n)
+        glm_LASSO = glm_LASSO_theory
+    active_LASSO = (glm_LASSO != 0)
+    nactive_LASSO = active_LASSO.sum()
+    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)], np.bool)
+
+    rel_LASSO = np.zeros(p)
+    Lee_nreport = 0
+    bias_Lee = 0.
+    bias_naive = 0.
+
+    if nactive_LASSO > 0:
+        post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
+        rel_LASSO[active_LASSO] = post_LASSO_OLS
+        Lee_target = np.linalg.pinv(X[:, active_LASSO]).dot(X.dot(beta))
+        Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=0, alpha=0.1)
+
+        if (Lee_pval.shape[0] == Lee_target.shape[0]):
+
+            cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO])
+            inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0])
+            inf_entries = np.mean(inf_entries_bool)
+            if inf_entries == 1.:
+                length_Lee = 0.
+            else:
+                length_Lee = np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])[~inf_entries_bool])
+            power_Lee = ((active_LASSO_bool) * (np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))) \
+                            .sum() / float((beta != 0).sum())
+            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
+            power_Lee_BH = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+            fdr_Lee_BH = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
+            bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target)
+
+            naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
+            naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
+                                         post_LASSO_OLS + 1.65 * naive_sd]).T
+            naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
+            cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO])
+            length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
+            power_naive = ((active_LASSO_bool) * (
+                np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float(
+                (beta != 0).sum())
+            naive_discoveries = BHfilter(naive_pval, q=0.1)
+            power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+            fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.))
+            bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
+
+            partial_Lasso_risk = (glm_LASSO[active_LASSO]-Lee_target).T.dot(glm_LASSO[active_LASSO]-Lee_target)
+            partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target)
+
+        else:
+            Lee_nreport = 1
+            cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
+            cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
+            naive_discoveries = np.zeros(1)
+            Lee_discoveries = np.zeros(1)
+            partial_Lasso_risk,  partial_relLasso_risk = [0., 0.]
+    elif nactive_LASSO == 0:
+        Lee_nreport = 1
+        cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
+        cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
+        naive_discoveries = np.zeros(1)
+        Lee_discoveries = np.zeros(1)
+        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
+
+    if tuning_rand == "lambda.min":
+        randomized_lasso = lasso.gaussian(X,
+                                          y,
+                                          feature_weights=n * lam_min * np.ones(p),
+                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
+    elif tuning_rand == "lambda.1se":
+        randomized_lasso = lasso.gaussian(X,
+                                          y,
+                                          feature_weights=n * lam_1se * np.ones(p),
+                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
+    else:
+        randomized_lasso = lasso.gaussian(X,
+                                          y,
+                                          feature_weights= lam_theory * np.ones(p),
+                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
+    signs = randomized_lasso.fit()
+    nonzero = signs != 0
+    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool)
+    sel_MLE = np.zeros(p)
+    ind_est = np.zeros(p)
+    randomized_lasso_est = np.zeros(p)
+    randomized_rel_lasso_est = np.zeros(p)
+    MLE_nreport = 0
+
+    if nonzero.sum() > 0:
+        target_randomized = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(randomized_lasso.loglike,
+                                          randomized_lasso._W,
+                                          nonzero,
+                                          dispersion=dispersion)
+
+        result = randomized_lasso.selective_MLE(observed_target,
+                                                cov_target,
+                                                cov_target_score)[0]
+
+        MLE_estimate = result['MLE']
+        ind_unbiased_estimator = result['unbiased']
+
+        sel_MLE[nonzero] = MLE_estimate
+        ind_est[nonzero] = ind_unbiased_estimator
+        MLE_intervals = np.asarray(result[['lower', 'upper']])
+        MLE_pval = np.asarray(result['pvalue'])
+
+        randomized_lasso_est = randomized_lasso.initial_soln
+        randomized_rel_lasso_est = randomized_lasso._beta_full
+
+        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
+        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
+        power_MLE = ((active_rand_bool) * (
+            np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
+        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
+        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
+        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
+        bias_MLE = np.mean(MLE_estimate - target_randomized)
+
+        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
+        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized)
+        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized)
+        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized)
+
+    else:
+        MLE_nreport = 1
+        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.]
+        MLE_discoveries = np.zeros(1)
+        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
+
+    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
+                       relative_risk(ind_est, beta, Sigma),
+                       relative_risk(randomized_lasso_est, beta, Sigma),
+                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
+                       relative_risk(rel_LASSO, beta, Sigma),
+                       relative_risk(glm_LASSO, beta, Sigma)))
+
+    partial_risks = np.vstack((partial_MLE_risk,
+                               partial_ind_risk,
+                               partial_randLasso_risk,
+                               partial_relrandLasso_risk,
+                               partial_relLasso_risk,
+                               partial_Lasso_risk))
+
+    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power, power_naive, power_naive_BH, fdr_naive_BH,
+                           naive_discoveries.sum()))
+    Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power, power_Lee, power_Lee_BH, fdr_Lee_BH,
+                         Lee_discoveries.sum()))
+    Liu_inf = np.zeros((10, 1))
+    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, power_MLE, power_MLE_BH, fdr_MLE_BH,
+                         MLE_discoveries.sum()))
+    nreport = np.vstack((Lee_nreport, 0., MLE_nreport))
+
+    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
+
+
+def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
+                              randomizer_scale=np.sqrt(0.25), full_dispersion=True,
+                              tuning_nonrand="lambda.min", tuning_rand="lambda.1se"):
+
+    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+    print("snr", snr)
+    X -= X.mean(0)[None, :]
+    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
+    y = y - y.mean()
+    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
+
+    if full_dispersion:
+        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
+        sigma_ = np.sqrt(dispersion)
+    else:
+        dispersion = None
+        sigma_ = np.std(y)
+    print("estimated and true sigma", sigma, sigma_)
+
+    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
+    glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory/float(n))
+    if tuning_nonrand == "lambda.min":
+        lam_LASSO = lam_min
+        glm_LASSO = glm_LASSO_min
+    elif tuning_nonrand == "lambda.1se":
+        lam_LASSO = lam_1se
+        glm_LASSO = glm_LASSO_1se
+    else:
+        lam_LASSO = lam_theory/float(n)
+        glm_LASSO = glm_LASSO_theory
+
+    active_LASSO = (glm_LASSO != 0)
+    nactive_LASSO = active_LASSO.sum()
+    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
+    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)],
+                                   np.bool)
+
+    rel_LASSO = np.zeros(p)
+    Lee_nreport = 0
+    bias_Lee = 0.
+    bias_naive = 0.
+
+    if nactive_LASSO > 0:
+        rel_LASSO[active_LASSO] = np.linalg.pinv(X[:, active_LASSO]).dot(y)
+        Lee_target = beta[active_LASSO]
+        Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=1, alpha=0.1)
+
+        if (Lee_pval.shape[0] == Lee_target.shape[0]):
+
+            cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO])
+            inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0])
+            inf_entries = np.mean(inf_entries_bool)
+            if inf_entries == 1.:
+                length_Lee = 0.
+            else:
+                length_Lee = np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])[~inf_entries_bool])
+            power_Lee = ((active_LASSO_bool) * (
+                np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))).sum() / float((beta != 0).sum())
+            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
+            power_Lee_BH = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+            fdr_Lee_BH = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
+            bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target)
+
+            post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
+            naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
+            naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
+                                         post_LASSO_OLS + 1.65 * naive_sd]).T
+            naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
+            cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO])
+            length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
+            power_naive = ((active_LASSO_bool) * (
+                np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float(
+                (beta != 0).sum())
+            naive_discoveries = BHfilter(naive_pval, q=0.1)
+            power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
+            fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.))
+            bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
+
+            partial_Lasso_risk = (glm_LASSO[active_LASSO] - Lee_target).T.dot(glm_LASSO[active_LASSO] - Lee_target)
+            partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target)
+        else:
+            Lee_nreport = 1
+            cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
+            cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power  = [0., 0., 0., 0., 0., 0.]
+            naive_discoveries = np.zeros(1)
+            Lee_discoveries = np.zeros(1)
+            partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
+
+    elif nactive_LASSO == 0:
+        Lee_nreport = 1
+        cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
+        cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
+        naive_discoveries = np.zeros(1)
+        Lee_discoveries = np.zeros(1)
+        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
+
+    lasso_Liu = lasso_full.gaussian(X, y, n * lam_LASSO)
+    Lasso_soln_Liu = lasso_Liu.fit()
+    active_set_Liu = np.nonzero(Lasso_soln_Liu != 0)[0]
+    nactive_Liu = active_set_Liu.shape[0]
+    active_Liu_bool = np.asarray([(np.in1d(active_set_Liu[a], true_set).sum() > 0) for a in range(nactive_Liu)], np.bool)
+    Liu_nreport = 0
+
+    if nactive_Liu > 0:
+        Liu_target = beta[Lasso_soln_Liu != 0]
+        df = lasso_Liu.summary(level=0.90, compute_intervals=True, dispersion=dispersion)
+        Liu_lower, Liu_upper, Liu_pval = np.asarray(df['lower_confidence']), \
+                                         np.asarray(df['upper_confidence']), \
+                                         np.asarray(df['pval'])
+        Liu_intervals = np.vstack((Liu_lower, Liu_upper)).T
+        cov_Liu, selective_Liu_power = coverage(Liu_intervals, Liu_pval, Liu_target, beta[Lasso_soln_Liu != 0])
+        length_Liu = np.mean(Liu_intervals[:, 1] - Liu_intervals[:, 0])
+        power_Liu = ((active_Liu_bool) * (np.logical_or((0. < Liu_intervals[:, 0]),
+                                                        (0. > Liu_intervals[:, 1])))).sum() / float((beta != 0).sum())
+        Liu_discoveries = BHfilter(Liu_pval, q=0.1)
+        power_Liu_BH = (Liu_discoveries * active_Liu_bool).sum() / float((beta != 0).sum())
+        fdr_Liu_BH = (Liu_discoveries * ~active_Liu_bool).sum() / float(max(Liu_discoveries.sum(), 1.))
+
+    else:
+        Liu_nreport = 1
+        cov_Liu, length_Liu, power_Liu, power_Liu_BH, fdr_Liu_BH, selective_Liu_power = [0., 0., 0., 0., 0., 0.]
+        Liu_discoveries = np.zeros(1)
+
+    if tuning_rand == "lambda.min":
+        randomized_lasso = lasso.gaussian(X,
+                                          y,
+                                          feature_weights= n * lam_min * np.ones(p),
+                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
+    elif tuning_rand == "lambda.1se":
+        randomized_lasso = lasso.gaussian(X,
+                                          y,
+                                          feature_weights= n * lam_1se * np.ones(p),
+                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
+    else:
+        randomized_lasso = lasso.gaussian(X,
+                                          y,
+                                          feature_weights= lam_theory * np.ones(p),
+                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
+    signs = randomized_lasso.fit()
+    nonzero = signs != 0
+    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
+    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool)
+    sel_MLE = np.zeros(p)
+    ind_est = np.zeros(p)
+    randomized_lasso_est = np.zeros(p)
+    randomized_rel_lasso_est = np.zeros(p)
+    MLE_nreport = 0
+
+    if nonzero.sum() > 0:
+        target_randomized = beta[nonzero]
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = full_targets(randomized_lasso.loglike,
+                                      randomized_lasso._W,
+                                      nonzero,
+                                      dispersion=dispersion)
+
+        result = randomized_lasso.selective_MLE(observed_target,
+                                                cov_target,
+                                                cov_target_score)[0]
+
+        MLE_estimate = result['MLE']
+        ind_unbiased_estimator = result['unbiased']
+
+        sel_MLE[nonzero] = MLE_estimate
+        ind_est[nonzero] = ind_unbiased_estimator
+        MLE_intervals = np.asarray(result[['lower', 'upper']])
+        MLE_pval = np.asarray(result['pvalue'])
+
+        randomized_lasso_est = randomized_lasso.initial_soln
+        randomized_rel_lasso_est = randomized_lasso._beta_full
+
+        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
+        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
+        power_MLE = ((active_rand_bool) * (np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
+        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
+        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
+        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
+        bias_MLE = np.mean(MLE_estimate - target_randomized)
+
+        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
+        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized)
+        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized)
+        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized)
+    else:
+        MLE_nreport = 1
+        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.]
+        MLE_discoveries = np.zeros(1)
+        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
+
+    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
+                       relative_risk(ind_est, beta, Sigma),
+                       relative_risk(randomized_lasso_est, beta, Sigma),
+                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
+                       relative_risk(rel_LASSO, beta, Sigma),
+                       relative_risk(glm_LASSO, beta, Sigma)))
+
+    partial_risks = np.vstack((partial_MLE_risk,
+                               partial_ind_risk,
+                               partial_randLasso_risk,
+                               partial_relrandLasso_risk,
+                               partial_relLasso_risk,
+                               partial_Lasso_risk))
+
+    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power,
+                           power_naive, power_naive_BH, fdr_naive_BH, naive_discoveries.sum()))
+    Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power,
+                         power_Lee, power_Lee_BH, fdr_Lee_BH, Lee_discoveries.sum()))
+    Liu_inf = np.vstack((cov_Liu, length_Liu, 0., nactive_Liu, bias_Lee, selective_Liu_power,
+                         power_Liu, power_Liu_BH, fdr_Liu_BH, Liu_discoveries.sum()))
+    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power,
+                         power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum()))
+    nreport = np.vstack((Lee_nreport, Liu_nreport, MLE_nreport))
+
+    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
+
+
+
+def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.20, 0.31]),
+         target="selected", tuning_nonrand="lambda.1se", tuning_rand="lambda.1se",
+         randomizing_scale = np.sqrt(0.50), ndraw = 50, outpath = None):
+
+    df_selective_inference = pd.DataFrame()
+    df_risk = pd.DataFrame()
+
+    if n > p:
+        full_dispersion = True
+    else:
+        full_dispersion = False
+
+    snr_list = []
+    snr_list_0 = []
+    for snr in snr_values:
+        snr_list.append(snr*np.ones(4))
+        snr_list_0.append(snr*np.ones(2))
+        output_overall = np.zeros(55)
+        if target == "selected":
+            for i in range(ndraw):
+                output_overall += np.squeeze(comparison_cvmetrics_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+                                                                           randomizer_scale=randomizing_scale, full_dispersion=full_dispersion,
+                                                                           tuning_nonrand =tuning_nonrand, tuning_rand=tuning_rand))
+        elif target == "full":
+            for i in range(ndraw):
+                output_overall += np.squeeze(comparison_cvmetrics_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
+                                                                       randomizer_scale=randomizing_scale, full_dispersion=full_dispersion,
+                                                                       tuning_nonrand =tuning_nonrand, tuning_rand=tuning_rand))
+
+        nLee = output_overall[52]
+        nLiu = output_overall[53]
+        nMLE = output_overall[54]
+
+        relative_risk = (output_overall[0:6] / float(ndraw)).reshape((1, 6))
+        partial_risk = np.hstack(((output_overall[46:50] / float(ndraw-nMLE)).reshape((1, 4)),
+                                  (output_overall[50:52] / float(ndraw - nLee)).reshape((1, 2))))
+
+        nonrandomized_naive_inf = np.hstack(((output_overall[6:12] / float(ndraw - nLee)).reshape((1, 6)),
+                                             (output_overall[12:16] / float(ndraw)).reshape((1, 4))))
+        nonrandomized_Lee_inf = np.hstack(((output_overall[16:22] / float(ndraw - nLee)).reshape((1, 6)),
+                                          (output_overall[22:26] / float(ndraw)).reshape((1, 4))))
+        nonrandomized_Liu_inf = np.hstack(((output_overall[26:32] / float(ndraw - nLiu)).reshape((1, 6)),
+                                          (output_overall[32:36] / float(ndraw)).reshape((1, 4))))
+        randomized_MLE_inf = np.hstack(((output_overall[36:42] / float(ndraw - nMLE)).reshape((1, 6)),
+                                       (output_overall[42:46] / float(ndraw)).reshape((1, 4))))
+
+        if target=="selected":
+            nonrandomized_Liu_inf[nonrandomized_Liu_inf==0] = 'NaN'
+        if target == "debiased":
+            nonrandomized_Liu_inf[nonrandomized_Liu_inf == 0] = 'NaN'
+            nonrandomized_Lee_inf[nonrandomized_Lee_inf == 0] = 'NaN'
+
+        df_naive = pd.DataFrame(data=nonrandomized_naive_inf,columns=['coverage', 'length', 'prop-infty', 'tot-active', 'bias', 'sel-power',
+                                                                      'power', 'power-BH', 'fdr-BH','tot-discoveries'])
+        df_naive['method'] = "Naive"
+        df_Lee = pd.DataFrame(data=nonrandomized_Lee_inf, columns=['coverage', 'length', 'prop-infty','tot-active','bias', 'sel-power',
+                                                                   'power', 'power-BH', 'fdr-BH','tot-discoveries'])
+        df_Lee['method'] = "Lee"
+
+        df_Liu = pd.DataFrame(data=nonrandomized_Liu_inf,columns=['coverage', 'length', 'prop-infty', 'tot-active','bias', 'sel-power',
+                                                                  'power', 'power-BH', 'fdr-BH', 'tot-discoveries'])
+        df_Liu['method'] = "Liu"
+
+        df_MLE = pd.DataFrame(data=randomized_MLE_inf, columns=['coverage', 'length', 'prop-infty', 'tot-active','bias', 'sel-power',
+                                                                'power', 'power-BH', 'fdr-BH', 'tot-discoveries'])
+        df_MLE['method'] = "MLE"
+
+        df_risk_metrics = pd.DataFrame(data=relative_risk, columns=['sel-MLE', 'ind-est', 'rand-LASSO','rel-rand-LASSO', 'rel-LASSO', 'LASSO'])
+        df_risk_metrics['metric'] = "Full"
+        df_prisk_metrics = pd.DataFrame(data=partial_risk,columns=['sel-MLE', 'ind-est', 'rand-LASSO', 'rel-rand-LASSO', 'rel-LASSO','LASSO'])
+        df_prisk_metrics['metric'] = "Partial"
+
+        df_selective_inference = df_selective_inference.append(df_naive, ignore_index=True)
+        df_selective_inference = df_selective_inference.append(df_Lee, ignore_index=True)
+        df_selective_inference = df_selective_inference.append(df_Liu, ignore_index=True)
+        df_selective_inference = df_selective_inference.append(df_MLE, ignore_index=True)
+
+        df_risk = df_risk.append(df_risk_metrics, ignore_index=True)
+        df_risk = df_risk.append(df_prisk_metrics, ignore_index=True)
+
+    snr_list = list(itertools.chain.from_iterable(snr_list))
+    df_selective_inference['n'] = n
+    df_selective_inference['p'] = p
+    df_selective_inference['s'] = s
+    df_selective_inference['rho'] = rho
+    df_selective_inference['beta-type'] = beta_type
+    df_selective_inference['snr'] = pd.Series(np.asarray(snr_list))
+    df_selective_inference['target'] = target
+
+    snr_list_0 = list(itertools.chain.from_iterable(snr_list_0))
+    df_risk['n'] = n
+    df_risk['p'] = p
+    df_risk['s'] = s
+    df_risk['rho'] = rho
+    df_risk['beta-type'] = beta_type
+    df_risk['snr'] = pd.Series(np.asarray(snr_list_0))
+    df_risk['target'] = target
+
+    if outpath is None:
+        outpath = os.path.dirname(__file__)
+
+    outfile_inf_csv = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_inference_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".csv")
+    outfile_risk_csv = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".csv")
+    outfile_inf_html = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_inference_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".html")
+    outfile_risk_html = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".html")
+    df_selective_inference.to_csv(outfile_inf_csv, index=False)
+    df_risk.to_csv(outfile_risk_csv, index=False)
+    df_selective_inference.to_html(outfile_inf_html)
+    df_risk.to_html(outfile_risk_html)
+
+if __name__ == "__main__":
+    main()

From 2cf0e52a4c13befe944b0d2ecfd6c0ecdc7fd793 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Wed, 24 Jun 2020 13:49:44 -0400
Subject: [PATCH 043/187] added plots to the examples-MLE file

---
 selectinf/randomized/tests/test_cv_mle.py | 180 +++++++++++++++++++++-
 1 file changed, 179 insertions(+), 1 deletion(-)

diff --git a/selectinf/randomized/tests/test_cv_mle.py b/selectinf/randomized/tests/test_cv_mle.py
index 052824840..bd356ef6b 100644
--- a/selectinf/randomized/tests/test_cv_mle.py
+++ b/selectinf/randomized/tests/test_cv_mle.py
@@ -118,10 +118,183 @@ def relative_risk(est, truth, Sigma):
     else:
         return (est - truth).T.dot(Sigma).dot(est - truth)
 
+from rpy2 import robjects
+
+def plotRisk(df_risk):
+    robjects.r("""
+               library("ggplot2")
+               library("magrittr")
+               library("tidyr")
+               library("dplyr")
+
+               plot_risk <- function(df_risk, outpath="/Users/psnigdha/adjusted_MLE/plots/", resolution=300, height= 7.5, width=15)
+                { 
+                   date = 1:length(unique(df_risk$snr))
+                   df_risk = filter(df_risk, metric == "Full")
+                   df = cbind(df_risk, date)
+                   risk = df %>%
+                   gather(key, value, sel.MLE, rand.LASSO, LASSO) %>%
+                   ggplot(aes(x=date, y=value, colour=key, shape=key, linetype=key)) +
+                   geom_point(size=3) +
+                   geom_line(aes(linetype=key), size=1) +
+                   ylim(0.01,1.2)+
+                   labs(y="relative risk", x = "Signal regimes: snr") +
+                   scale_x_continuous(breaks=1:length(unique(df_risk$snr)), label = sapply(df_risk$snr, toString)) +
+                   theme(legend.position="top", legend.title = element_blank())
+                   indices = sort(c("sel.MLE", "rand.LASSO", "LASSO"), index.return= TRUE)$ix
+                   names = c("sel-MLE", "rand-LASSO", "LASSO")
+                   risk = risk + scale_color_manual(labels = names[indices], values=c("#008B8B", "#104E8B","#B22222")[indices]) +
+                   scale_shape_manual(labels = names[indices], values=c(15, 17, 16)[indices]) +
+                                      scale_linetype_manual(labels = names[indices], values = c(1,1,2)[indices])
+                                      outfile = paste(outpath, 'risk.png', sep="")
+                   outfile = paste(outpath, 'risk.png', sep="")                   
+                   ggsave(outfile, plot = risk, dpi=resolution, dev='png', height=height, width=width, units="cm")}
+                """)
+
+    robjects.pandas2ri.activate()
+    r_df_risk = robjects.conversion.py2ri(df_risk)
+    R_plot = robjects.globalenv['plot_risk']
+    R_plot(r_df_risk)
+
+
+def plotCoveragePower(df_inference):
+    robjects.r("""
+               library("ggplot2")
+               library("magrittr")
+               library("tidyr")
+               library("reshape")
+               library("cowplot")
+               library("dplyr")
+
+               plot_coverage_lengths <- function(df_inference, outpath="/Users/psnigdha/adjusted_MLE/plots/", 
+                                                 resolution=200, height_plot1= 6.5, width_plot1=12, 
+                                                 height_plot2=13, width_plot2=13)
+               {
+                 snr.len = length(unique(df_inference$snr))
+                 df_inference = arrange(df_inference, method)
+                 target = toString(df_inference$target[1])
+                 df = data.frame(snr = sapply(unique(df_inference$snr), toString),
+                                 MLE = 100*df_inference$coverage[((2*snr.len)+1):(3*snr.len)],
+                                 Lee = 100*df_inference$coverage[1:snr.len],
+                                 Naive = 100*df_inference$coverage[((3*snr.len)+1):(4*snr.len)])
+                 if(target== "selected"){
+                      data.m <- melt(df, id.vars='snr')
+                      coverage = ggplot(data.m, aes(snr, value)) + 
+                                 geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
+                                 geom_hline(yintercept = 90, linetype="dotted") +
+                                 labs(y="coverage: partial", x = "Signal regimes: snr") +
+                                 theme(legend.position="top", 
+                                       legend.title = element_blank()) 
+                      coverage = coverage + 
+                                 scale_fill_manual(labels = c("MLE-based","Lee", "Naive"), values=c("#008B8B", "#B22222", "#FF6347"))} else{
+                 df = cbind(df, Liu = 100*df_inference$coverage[((snr.len)+1):(2*snr.len)])
+                 df <- df[c("snr", "MLE", "Liu", "Lee", "Naive")]
+                 data.m <- melt(df, id.vars='snr')
+                 coverage = ggplot(data.m, aes(snr, value)) + 
+                            geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
+                            geom_hline(yintercept = 90, linetype="dotted") +
+                            labs(y="coverage: full", x = "Signal regimes: snr") +
+                            theme(legend.position="top", legend.title = element_blank()) 
+                  coverage = coverage + 
+                  scale_fill_manual(labels = c("MLE-based", "Liu", "Lee", "Naive"), values=c("#008B8B", "#104E8B", "#B22222", "#FF6347"))}
+
+                 outfile = paste(outpath, 'coverage.png', sep="")
+                 ggsave(outfile, plot = coverage, dpi=resolution, dev='png', height=height_plot1, width=width_plot1, units="cm")
+
+                 df = data.frame(snr = sapply(unique(df_inference$snr), toString),
+                                 MLE = 100*df_inference$sel.power[((2*snr.len)+1):(3*snr.len)],
+                                 Lee = 100*df_inference$sel.power[1:snr.len])
+                 if(target== "selected"){
+                   data.m <- melt(df, id.vars='snr')
+                   sel_power = ggplot(data.m, aes(snr, value)) + 
+                               geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
+                               labs(y="power: partial", x = "Signal regimes: snr") +
+                               theme(legend.position="top", legend.title = element_blank()) 
+                   sel_power = sel_power + scale_fill_manual(labels = c("MLE-based","Lee"), values=c("#008B8B", "#B22222"))} else{
+                   df = cbind(df, Liu = 100*df_inference$sel.power[((snr.len)+1):(2*snr.len)])
+                   df <- df[,c("snr", "MLE", "Liu", "Lee")]
+                   data.m <- melt(df, id.vars='snr')
+                   sel_power = ggplot(data.m, aes(snr, value)) + 
+                               geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
+                               labs(y="power: full", x = "Signal regimes: snr") +
+                               theme(legend.position="top", legend.title = element_blank()) 
+                   sel_power = sel_power + scale_fill_manual(labels = c("MLE-based","Liu","Lee"), values=c("#008B8B", "#104E8B", "#B22222"))}
+
+                 outfile = paste(outpath, 'selective_power.png', sep="")
+                 ggsave(outfile, plot = sel_power, dpi=resolution, dev='png', height=height_plot1, width=width_plot1, units="cm")
+
+               if(target== "selected"){
+                   test_data <-data.frame(MLE = filter(df_inference, method == "MLE")$length,
+                   Lee = filter(df_inference, method == "Lee")$length,
+                   Naive = filter(df_inference, method == "Naive")$length,
+                   date = 1:length(unique(df_inference$snr)))
+                   lengths = test_data %>%
+                             gather(key, value, MLE, Lee, Naive) %>%
+                             ggplot(aes(x=date, y=value, colour=key, shape=key, linetype=key)) +
+                             geom_point(size=3) +
+                             geom_line(aes(linetype=key), size=1) +
+                             ylim(0.,max(test_data$MLE, test_data$Lee, test_data$Naive) + 0.2)+
+                             labs(y="lengths:partial", x = "Signal regimes: snr") +
+                             scale_x_continuous(breaks=1:length(unique(df_inference$snr)), label = sapply(unique(df_inference$snr), toString))+
+                             theme(legend.position="top", legend.title = element_blank())
+
+                   indices = sort(c("MLE", "Lee", "Naive"), index.return= TRUE)$ix
+                   names = c("MLE-based", "Lee", "Naive")
+                   lengths = lengths + scale_color_manual(labels = names[indices], values=c("#008B8B","#B22222", "#FF6347")[indices]) +
+                             scale_shape_manual(labels = names[indices], values=c(15, 17, 16)[indices]) +
+                             scale_linetype_manual(labels = names[indices], values = c(1,1,2)[indices])} else{
+                   test_data <-data.frame(MLE = filter(df_inference, method == "MLE")$length,
+                                          Lee = filter(df_inference, method == "Lee")$length,
+                                          Naive = filter(df_inference, method == "Naive")$length,
+                                          Liu = filter(df_inference, method == "Liu")$length,
+                                          date = 1:length(unique(df_inference$snr)))
+                   lengths= test_data %>%
+                            gather(key, value, MLE, Lee, Naive, Liu) %>%
+                            ggplot(aes(x=date, y=value, colour=key, shape=key, linetype=key)) +
+                            geom_point(size=3) +
+                            geom_line(aes(linetype=key), size=1) +
+                            ylim(0.,max(test_data$MLE, test_data$Lee, test_data$Naive, test_data$Liu) + 0.2)+
+                            labs(y="lengths: full", x = "Signal regimes: snr") +
+                            scale_x_continuous(breaks=1:length(unique(df_inference$snr)), label = sapply(unique(df_inference$snr), toString))+
+                            theme(legend.position="top", legend.title = element_blank())
+
+                   indices = sort(c("MLE", "Liu", "Lee", "Naive"), index.return= TRUE)$ix
+                   names = c("MLE-based", "Lee", "Naive", "Liu")
+                   lengths = lengths + scale_color_manual(labels = names[indices], values=c("#008B8B","#B22222", "#FF6347", "#104E8B")[indices]) +
+                             scale_shape_manual(labels = names[indices], values=c(15, 17, 16, 15)[indices]) +
+                             scale_linetype_manual(labels = names[indices], values = c(1,1,2,1)[indices])}
+
+               prop = filter(df_inference, method == "Lee")$prop.infty
+               df = data.frame(snr = sapply(unique(df_inference$snr), toString),
+               infinite = 100*prop)
+               data.prop <- melt(df, id.vars='snr')
+               pL = ggplot(data.prop, aes(snr, value)) +
+                    geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
+                    labs(y="infinite intervals (%)", x = "Signal regimes: snr") +
+                    theme(legend.position="top", 
+                    legend.title = element_blank()) 
+               pL = pL + scale_fill_manual(labels = c("Lee"), values=c("#B22222"))
+               prow <- plot_grid( pL + theme(legend.position="none"),
+                                  lengths + theme(legend.position="none"),
+                                  align = 'vh',
+                                  hjust = -1,
+                                  ncol = 1)
+
+               legend <- get_legend(lengths+ theme(legend.direction = "horizontal",legend.justification="center" ,legend.box.just = "bottom"))
+               p <- plot_grid(prow, ncol=1, legend, rel_heights = c(2., .2)) 
+               outfile = paste(outpath, 'length.png', sep="")
+               ggsave(outfile, plot = p, dpi=resolution, dev='png', height=height_plot2, width=width_plot2, units="cm")}
+               """)
+
+    robjects.pandas2ri.activate()
+    r_df_inference = robjects.conversion.py2ri(df_inference)
+    R_plot = robjects.globalenv['plot_coverage_lengths']
+    R_plot(r_df_inference)
 
 def comparison_cvmetrics_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
                                   randomizer_scale=np.sqrt(0.50), full_dispersion=True,
-                                  tuning_nonrand="lambda.min", tuning_rand="lambda.1se"):
+                                  tuning_nonrand="lambda.min", tuning_rand="lambda.1se",
+                                  plot=False):
 
     X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
     true_mean = X.dot(beta)
@@ -640,5 +813,10 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
     df_selective_inference.to_html(outfile_inf_html)
     df_risk.to_html(outfile_risk_html)
 
+    if plot is True:
+        plotRisk(df_risk)
+        plotCoveragePower(df_selective_inference)
+
+
 if __name__ == "__main__":
     main()

From c91670c8a2bd2c703742e4109d5f82b1f5b0c96b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 10:46:38 -0700
Subject: [PATCH 044/187] including sim_xy directly in script

---
 selectinf/randomized/tests/test_cv_mle.py | 116 ++++++++++++++++++++--
 1 file changed, 107 insertions(+), 9 deletions(-)

diff --git a/selectinf/randomized/tests/test_cv_mle.py b/selectinf/randomized/tests/test_cv_mle.py
index bd356ef6b..7c8e16c64 100644
--- a/selectinf/randomized/tests/test_cv_mle.py
+++ b/selectinf/randomized/tests/test_cv_mle.py
@@ -7,14 +7,107 @@
 
 from scipy.stats import norm as ndist
 from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
-#from selection.algorithms.lasso import lasso as lasso_full
+from selectinf.algorithms.lasso import ROSI
 
 def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
 
     rpy.r('''
-            source('~/best-subset/bestsubset/R/sim.R')
-            sim_xy = sim.xy
-            ''')
+
+    #' Predictors and responses generation.
+    #'
+    #' Generate a predictor matrix x, and response vector y, following a specified
+    #'   setup.  Actually, two pairs of predictors and responses are generated:
+    #'   one for training, and one for validation.
+    #'
+    #' @param n,p The number of training observations, and the number of predictors.
+    #' @param nval The number of validation observations.
+    #' @param rho Parameter that drives pairwise correlations of the predictor
+    #'   variables; specifically, predictors i and j have population correlation
+    #'   rho^abs(i-j). Default is 0.
+    #' @param s number of nonzero coefficients in the underlying regression model.
+    #'   Default is 5. (Ignored if beta.type is 4, in which case the number of
+    #'   nonzero coefficients is 6; and if beta.type is 5, it is interpreted as a
+    #'   the number of strongly nonzero coefficients in a weak sparsity model.)
+    #' @param beta.type Integer taking values in between 1 and 5, used to specify
+    #'   the pattern of nonzero coefficients in the underlying regression model; see
+    #'   details below. Default is 1.
+    #' @param snr Desired signal-to-noise ratio (SNR), i.e., var(mu)/sigma^2 where
+    #'   mu is mean and sigma^2 is the error variance. The error variance is set so
+    #'   that the given SNR is achieved. Default is 1.
+    #' @return A list with the following components: x, y, xval, yval, Sigma, beta,
+    #'   and sigma.
+    #'
+    #' @details The data model is: \eqn{Y \sim N(X\beta, \sigma^2 I)}.
+    #'   The predictor variables have covariance matrix Sigma, with (i,j)th entry
+    #'   rho^abs(i-j). The error variance sigma^2 is set according to the desired
+    #'   signal-to-noise ratio. The first 4 options for the nonzero pattern
+    #'   of the underlying regression coefficients beta follow the simulation setup
+    #'   in Bertsimas, King, and Mazumder (2016), and the 5th is a weak sparsity
+    #'   option:
+    #'   \itemize{
+    #'   \item 1: beta has s components of 1, occurring at (roughly) equally-spaced
+    #'      indices in between 1 and p
+    #'   \item 2: beta has its first s components equal to 1
+    #'   \item 3: beta has its first s components taking nonzero values, where the
+    #'       decay in a linear fashion from 10 to 0.5
+    #'   \item 4: beta has its first 6 components taking the nonzero values -10,-6,
+    #'       -2,2,6,10
+    #'   \item 5: beta has its first s components equal to 1, and the rest decaying
+    #'       to zero at an exponential rate
+    #'   }
+    #'
+    #' @author Trevor Hastie, Rob Tibshirani, Ryan Tibshirani
+    #' @references Simulation setup based on "Best subset selection via a modern
+    #'   optimization lens" by Dimitris Bertsimas, Angela King, and Rahul Mazumder,
+    #'   Annals of Statistics, 44(2), 813-852, 2016.
+    #' @example examples/ex.fs.R
+    #' @export sim.xy
+
+    sim.xy = function(n, p, nval, rho=0, s=5, beta.type=1, snr=1) {
+      # Generate predictors
+      x = matrix(rnorm(n*p),n,p)
+      xval = matrix(rnorm(nval*p),nval,p)
+
+      # Introduce autocorrelation, if needed
+      if (rho != 0) {
+        inds = 1:p
+        Sigma = rho^abs(outer(inds, inds, "-"))
+        obj = svd(Sigma)
+        Sigma.half = obj$u %*% (sqrt(diag(obj$d))) %*% t(obj$v)
+        x = x %*% Sigma.half
+        xval = xval %*% Sigma.half
+      }
+      else Sigma = diag(1,p)
+
+      # Generate underlying coefficients
+      s = min(s,p)
+      beta = rep(0,p)
+      if (beta.type==1) {
+        beta[round(seq(1,p,length=s))] = 1
+      } else if (beta.type==2) {
+        beta[1:s] = 1
+      } else if (beta.type==3) {
+        beta[1:s] = seq(10,0.5,length=s)
+      } else if (beta.type==4) {
+        beta[1:6] = c(-10,-6,-2,2,6,10)
+      } else {
+        beta[1:s] = 1
+        beta[(s+1):p] = 0.5^(1:(p-s))
+      }
+
+      # Set snr based on sample variance on infinitely large test set
+      vmu = as.numeric(t(beta) %*% Sigma %*% beta)
+      sigma = sqrt(vmu/snr)
+
+      # Generate responses
+      y = as.numeric(x %*% beta + rnorm(n)*sigma)
+      yval = as.numeric(xval %*% beta + rnorm(nval)*sigma)
+
+      list(x=x,y=y,xval=xval,yval=yval,Sigma=Sigma,beta=beta,sigma=sigma)
+    }
+
+    sim_xy = sim.xy
+    ''')
 
     r_simulate = rpy.globalenv['sim_xy']
     sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
@@ -113,7 +206,7 @@ def BHfilter(pval, q=0.2):
 
 
 def relative_risk(est, truth, Sigma):
-    if (truth != 0).sum > 0:
+    if (truth != 0).sum() > 0:
         return (est - truth).T.dot(Sigma).dot(est - truth) / truth.T.dot(Sigma).dot(truth)
     else:
         return (est - truth).T.dot(Sigma).dot(est - truth)
@@ -337,8 +430,11 @@ def comparison_cvmetrics_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
         post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
         rel_LASSO[active_LASSO] = post_LASSO_OLS
         Lee_target = np.linalg.pinv(X[:, active_LASSO]).dot(X.dot(beta))
-        Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=0, alpha=0.1)
-
+        try:
+            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=0, alpha=0.1)
+        except:
+            Lee_intervals, Lee_pval = np.array([]), np.array([])
+            
         if (Lee_pval.shape[0] == Lee_target.shape[0]):
 
             cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO])
@@ -580,7 +676,8 @@ def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1
         Lee_discoveries = np.zeros(1)
         partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
 
-    lasso_Liu = lasso_full.gaussian(X, y, n * lam_LASSO)
+    lasso_Liu = ROSI.gaussian(X, y, n * lam_LASSO)
+    print(type(lasso_Liu))
     Lasso_soln_Liu = lasso_Liu.fit()
     active_set_Liu = np.nonzero(Lasso_soln_Liu != 0)[0]
     nactive_Liu = active_set_Liu.shape[0]
@@ -705,7 +802,7 @@ def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1
 
 def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.20, 0.31]),
          target="selected", tuning_nonrand="lambda.1se", tuning_rand="lambda.1se",
-         randomizing_scale = np.sqrt(0.50), ndraw = 50, outpath = None):
+         randomizing_scale = np.sqrt(0.50), ndraw=2, outpath = None):
 
     df_selective_inference = pd.DataFrame()
     df_risk = pd.DataFrame()
@@ -820,3 +917,4 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
 
 if __name__ == "__main__":
     main()
+    main(target="full")

From 7aab2a944f78f4cf0fca31233526d1117aa7350c Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 14:54:52 -0700
Subject: [PATCH 045/187] renaming columns in output

---
 selectinf/algorithms/lasso.py | 14 +++++++-------
 selectinf/randomized/query.py | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/selectinf/algorithms/lasso.py b/selectinf/algorithms/lasso.py
index f885eb964..674174510 100644
--- a/selectinf/algorithms/lasso.py
+++ b/selectinf/algorithms/lasso.py
@@ -323,7 +323,7 @@ def summary(self,
 
         df = pd.DataFrame(index=self.active,
                           data=dict([(n, d) for n, d in zip(['variable',
-                                                             'pval',
+                                                             'pvalue',
                                                              'lasso',
                                                              'onestep',
                                                              'lower_confidence',
@@ -1487,7 +1487,7 @@ def _data_carving_deprec(X, y,
                        splitting_pvalues,
                        splitting_intervals), L
     else:
-        pvalues = [p for _, p in L.summary("twosided")['pval']]
+        pvalues = [p for _, p in L.summary("twosided")['pvalue']]
         intervals = np.array([L.intervals['lower'], L.intervals['upper']]).T
         if splitting:
             splitting_pvalues = np.random.sample(len(pvalues))
@@ -1957,14 +1957,14 @@ def summary(self,
             Estimate of dispersion. Defaults to a Pearson's X^2 estimate in the relaxed model.
 
         truth : np.array
-            True values of each beta for selected variables. If not None, a column 'pval' are p-values
+            True values of each beta for selected variables. If not None, a column 'pvalue' are p-values
             computed under these corresponding null hypotheses.
 
         Returns
         -------
         pval_summary : np.recarray
             Array with one entry per active variable.
-            Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
+            Columns are 'variable', 'pvalue', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
         """
 
         if len(self.active) > 0:
@@ -2009,7 +2009,7 @@ def summary(self,
 
             df = pd.DataFrame(index=self.active,
                               data=dict([(n, d) for n, d in zip(['variable',
-                                                                 'pval',
+                                                                 'pvalue',
                                                                  'lasso',
                                                                  'onestep',
                                                                  'sd',
@@ -2328,7 +2328,7 @@ def summary(self, level=0.05,
         -------
         pval_summary : np.recarray
             Array with one entry per active variable.
-            Columns are 'variable', 'pval', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
+            Columns are 'variable', 'pvalue', 'lasso', 'onestep', 'lower_trunc', 'upper_trunc', 'sd'.
         """
 
         if len(self.active) > 0:
@@ -2366,7 +2366,7 @@ def summary(self, level=0.05,
 
             df = pd.DataFrame(index=self.active,
                               data=dict([(n, d) for n, d in zip(['variable',
-                                                                 'pval',
+                                                                 'pvalue',
                                                                  'lasso',
                                                                  'onestep',
                                                                  'sd',
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index c703afa8c..19fb677bb 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -194,7 +194,7 @@ def summary(self,
                                       observed_target,
                                       target_cov,
                                       target_score_cov)[0]
-            MLE_intervals = np.asarray(MLE[['lower', 'upper']])
+            MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
 
             intervals = self.sampler.confidence_intervals(  
                 observed_target,
@@ -205,8 +205,8 @@ def summary(self,
                 initial_guess=MLE_intervals,
                 level=level)
 
-            result.insert(2, 'lower', intervals[:,0])
-            result.insert(3, 'upper', intervals[:,1])
+            result.insert(2, 'lower_confidence', intervals[:,0])
+            result.insert(3, 'upper_confidence', intervals[:,1])
 
         if not np.all(parameter == 0):
             result.insert(4, 'pivot', pivots)
@@ -518,8 +518,8 @@ def summary(self,
 
         result = pd.DataFrame({'target':observed_target,
                                'pvalue':pvalues,
-                               'lower':intervals[:,0],
-                               'upper':intervals[:,1]})
+                               'lower_confidence':intervals[:,0],
+                               'upper_confidence':intervals[:,1]})
 
         if not np.all(parameter == 0):
             result.insert(4, 'pivot', pivots)
@@ -1684,8 +1684,8 @@ def selective_MLE(observed_target,
                            'SE':np.sqrt(np.diag(observed_info_mean)),
                            'Zvalue':Z_scores,
                            'pvalue':pvalues,
-                           'lower':intervals[:,0],
-                           'upper':intervals[:,1],
+                           'lower_confidence':intervals[:,0],
+                           'upper_confidence':intervals[:,1],
                            'unbiased':ind_unbiased_estimator})
     return result, observed_info_mean, log_ref
 

From 0d3b6a55f61f3e93a94ee793be79d5ab394f631f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 14:55:44 -0700
Subject: [PATCH 046/187] updates on mle script

---
 selectinf/randomized/tests/test_cv_mle.py | 160 +++++++++++++++++-----
 1 file changed, 127 insertions(+), 33 deletions(-)

diff --git a/selectinf/randomized/tests/test_cv_mle.py b/selectinf/randomized/tests/test_cv_mle.py
index 7c8e16c64..f7a1bbc19 100644
--- a/selectinf/randomized/tests/test_cv_mle.py
+++ b/selectinf/randomized/tests/test_cv_mle.py
@@ -1,9 +1,13 @@
+from __future__ import division
+
 import numpy as np, os, itertools
 import pandas as pd
 
 import rpy2.robjects as rpy
 from rpy2.robjects import numpy2ri
 rpy.numpy2ri.activate()
+from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
 
 from scipy.stats import norm as ndist
 from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
@@ -220,7 +224,7 @@ def plotRisk(df_risk):
                library("tidyr")
                library("dplyr")
 
-               plot_risk <- function(df_risk, outpath="/Users/psnigdha/adjusted_MLE/plots/", resolution=300, height= 7.5, width=15)
+               plot_risk <- function(df_risk, outpath="plots/", resolution=300, height= 7.5, width=15)
                 { 
                    date = 1:length(unique(df_risk$snr))
                    df_risk = filter(df_risk, metric == "Full")
@@ -244,8 +248,9 @@ def plotRisk(df_risk):
                    ggsave(outfile, plot = risk, dpi=resolution, dev='png', height=height, width=width, units="cm")}
                 """)
 
-    robjects.pandas2ri.activate()
-    r_df_risk = robjects.conversion.py2ri(df_risk)
+    #pandas2ri.activate()
+    with localconverter(robjects.default_converter + pandas2ri.converter):
+        r_df_risk = robjects.conversion.py2rpy(df_risk)
     R_plot = robjects.globalenv['plot_risk']
     R_plot(r_df_risk)
 
@@ -259,7 +264,7 @@ def plotCoveragePower(df_inference):
                library("cowplot")
                library("dplyr")
 
-               plot_coverage_lengths <- function(df_inference, outpath="/Users/psnigdha/adjusted_MLE/plots/", 
+               plot_coverage_lengths <- function(df_inference, outpath="plots/", 
                                                  resolution=200, height_plot1= 6.5, width_plot1=12, 
                                                  height_plot2=13, width_plot2=13)
                {
@@ -379,21 +384,41 @@ def plotCoveragePower(df_inference):
                ggsave(outfile, plot = p, dpi=resolution, dev='png', height=height_plot2, width=width_plot2, units="cm")}
                """)
 
-    robjects.pandas2ri.activate()
-    r_df_inference = robjects.conversion.py2ri(df_inference)
+    #pandas2ri.activate()
+    with localconverter(robjects.default_converter + pandas2ri.converter):
+        r_df_inference = robjects.conversion.py2rpy(df_inference)
     R_plot = robjects.globalenv['plot_coverage_lengths']
     R_plot(r_df_inference)
 
-def comparison_cvmetrics_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
-                                  randomizer_scale=np.sqrt(0.50), full_dispersion=True,
-                                  tuning_nonrand="lambda.min", tuning_rand="lambda.1se",
-                                  plot=False):
-
-    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
+def comparison_cvmetrics_selected(n=500,
+                                  p=100,
+                                  nval=500,
+                                  rho=0.35,
+                                  s=5,
+                                  beta_type=1,
+                                  snr=0.20,
+                                  randomizer_scale=np.sqrt(0.50),
+                                  full_dispersion=True,
+                                  tuning_nonrand="lambda.min",
+                                  tuning_rand="lambda.1se"):
+
+    (X,
+     y,
+     _,
+     _,
+     Sigma,
+     beta,
+     sigma) = sim_xy(n=n,
+                     p=p,
+                     nval=nval,
+                     rho=rho,
+                     s=s,
+                     beta_type=beta_type,
+                     snr=snr)
     true_mean = X.dot(beta)
-    print("snr", snr)
+
     X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
+    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1)))
     y = y - y.mean()
     true_set = np.asarray([u for u in range(p) if beta[u] != 0])
 
@@ -405,8 +430,17 @@ def comparison_cvmetrics_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
         sigma_ = np.std(y)
     print("estimated and true sigma", sigma, sigma_)
 
-    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-    glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory/float(n))
+    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T,
+                                                      np.random.standard_normal((n,
+                                                                                 2000)))).max(0))
+    (glm_LASSO_theory,
+     glm_LASSO_1se,
+     glm_LASSO_min,
+     lam_min,
+     lam_1se) = glmnet_lasso(X,
+                             y,
+                             lam_theory/float(n))
+
     if tuning_nonrand == "lambda.min":
         lam_LASSO = lam_min
         glm_LASSO = glm_LASSO_min
@@ -416,6 +450,7 @@ def comparison_cvmetrics_selected(n=500, p=100, nval=500, rho=0.35, s=5, beta_ty
     else:
         lam_LASSO = lam_theory/float(n)
         glm_LASSO = glm_LASSO_theory
+
     active_LASSO = (glm_LASSO != 0)
     nactive_LASSO = active_LASSO.sum()
     active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
@@ -802,7 +837,7 @@ def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1
 
 def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.20, 0.31]),
          target="selected", tuning_nonrand="lambda.1se", tuning_rand="lambda.1se",
-         randomizing_scale = np.sqrt(0.50), ndraw=2, outpath = None):
+         randomizing_scale = np.sqrt(0.50), ndraw=4, outpath = None, plot=True):
 
     df_selective_inference = pd.DataFrame()
     df_risk = pd.DataFrame()
@@ -820,14 +855,30 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
         output_overall = np.zeros(55)
         if target == "selected":
             for i in range(ndraw):
-                output_overall += np.squeeze(comparison_cvmetrics_selected(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                                           randomizer_scale=randomizing_scale, full_dispersion=full_dispersion,
-                                                                           tuning_nonrand =tuning_nonrand, tuning_rand=tuning_rand))
+                output_overall += np.squeeze(comparison_cvmetrics_selected(n=n,
+                                                                           p=p,
+                                                                           nval=n,
+                                                                           rho=rho,
+                                                                           s=s,
+                                                                           beta_type=beta_type,
+                                                                           snr=snr,
+                                                                           randomizer_scale=randomizing_scale,
+                                                                           full_dispersion=full_dispersion,
+                                                                           tuning_nonrand =tuning_nonrand,
+                                                                           tuning_rand=tuning_rand))
         elif target == "full":
             for i in range(ndraw):
-                output_overall += np.squeeze(comparison_cvmetrics_full(n=n, p=p, nval=n, rho=rho, s=s, beta_type=beta_type, snr=snr,
-                                                                       randomizer_scale=randomizing_scale, full_dispersion=full_dispersion,
-                                                                       tuning_nonrand =tuning_nonrand, tuning_rand=tuning_rand))
+                output_overall += np.squeeze(comparison_cvmetrics_full(n=n,
+                                                                       p=p,
+                                                                       nval=n,
+                                                                       rho=rho,
+                                                                       s=s,
+                                                                       beta_type=beta_type,
+                                                                       snr=snr,
+                                                                       randomizer_scale=randomizing_scale,
+                                                                       full_dispersion=full_dispersion,
+                                                                       tuning_nonrand =tuning_nonrand,
+                                                                       tuning_rand=tuning_rand))
 
         nLee = output_overall[52]
         nLiu = output_overall[53]
@@ -852,24 +903,66 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
             nonrandomized_Liu_inf[nonrandomized_Liu_inf == 0] = 'NaN'
             nonrandomized_Lee_inf[nonrandomized_Lee_inf == 0] = 'NaN'
 
-        df_naive = pd.DataFrame(data=nonrandomized_naive_inf,columns=['coverage', 'length', 'prop-infty', 'tot-active', 'bias', 'sel-power',
-                                                                      'power', 'power-BH', 'fdr-BH','tot-discoveries'])
+        df_naive = pd.DataFrame(data=nonrandomized_naive_inf,columns=['coverage',
+                                                                      'length',
+                                                                      'prop-infty',
+                                                                      'tot-active',
+                                                                      'bias',
+                                                                      'sel-power',
+                                                                      'power',
+                                                                      'power-BH',
+                                                                      'fdr-BH',
+                                                                      'tot-discoveries'])
         df_naive['method'] = "Naive"
-        df_Lee = pd.DataFrame(data=nonrandomized_Lee_inf, columns=['coverage', 'length', 'prop-infty','tot-active','bias', 'sel-power',
-                                                                   'power', 'power-BH', 'fdr-BH','tot-discoveries'])
+        df_Lee = pd.DataFrame(data=nonrandomized_Lee_inf, columns=['coverage',
+                                                                   'length',
+                                                                   'prop-infty',
+                                                                   'tot-active',
+                                                                   'bias',
+                                                                   'sel-power',
+                                                                   'power',
+                                                                   'power-BH',
+                                                                   'fdr-BH',
+                                                                   'tot-discoveries'])
         df_Lee['method'] = "Lee"
 
-        df_Liu = pd.DataFrame(data=nonrandomized_Liu_inf,columns=['coverage', 'length', 'prop-infty', 'tot-active','bias', 'sel-power',
-                                                                  'power', 'power-BH', 'fdr-BH', 'tot-discoveries'])
+        df_Liu = pd.DataFrame(data=nonrandomized_Liu_inf,columns=['coverage',
+                                                                  'length',
+                                                                  'prop-infty',
+                                                                  'tot-active',
+                                                                  'bias',
+                                                                  'sel-power',
+                                                                  'power',
+                                                                  'power-BH',
+                                                                  'fdr-BH',
+                                                                  'tot-discoveries'])
         df_Liu['method'] = "Liu"
 
-        df_MLE = pd.DataFrame(data=randomized_MLE_inf, columns=['coverage', 'length', 'prop-infty', 'tot-active','bias', 'sel-power',
-                                                                'power', 'power-BH', 'fdr-BH', 'tot-discoveries'])
+        df_MLE = pd.DataFrame(data=randomized_MLE_inf, columns=['coverage',
+                                                                'length',
+                                                                'prop-infty',
+                                                                'tot-active',
+                                                                'bias',
+                                                                'sel-power',
+                                                                'power',
+                                                                'power-BH',
+                                                                'fdr-BH',
+                                                                'tot-discoveries'])
         df_MLE['method'] = "MLE"
 
-        df_risk_metrics = pd.DataFrame(data=relative_risk, columns=['sel-MLE', 'ind-est', 'rand-LASSO','rel-rand-LASSO', 'rel-LASSO', 'LASSO'])
+        df_risk_metrics = pd.DataFrame(data=relative_risk, columns=['sel-MLE',
+                                                                    'ind-est',
+                                                                    'rand-LASSO',
+                                                                    'rel-rand-LASSO',
+                                                                    'rel-LASSO',
+                                                                    'LASSO'])
         df_risk_metrics['metric'] = "Full"
-        df_prisk_metrics = pd.DataFrame(data=partial_risk,columns=['sel-MLE', 'ind-est', 'rand-LASSO', 'rel-rand-LASSO', 'rel-LASSO','LASSO'])
+        df_prisk_metrics = pd.DataFrame(data=partial_risk,columns=['sel-MLE',
+                                                                   'ind-est',
+                                                                   'rand-LASSO',
+                                                                   'rel-rand-LASSO',
+                                                                   'rel-LASSO',
+                                                                   'LASSO'])
         df_prisk_metrics['metric'] = "Partial"
 
         df_selective_inference = df_selective_inference.append(df_naive, ignore_index=True)
@@ -910,6 +1003,7 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
     df_selective_inference.to_html(outfile_inf_html)
     df_risk.to_html(outfile_risk_html)
 
+    stop
     if plot is True:
         plotRisk(df_risk)
         plotCoveragePower(df_selective_inference)

From e77d9477cc188fd1a4fd47df1a3875a640e6a489 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 23:06:12 -0700
Subject: [PATCH 047/187] adding a dispersion option to Lee et al lasso

---
 selectinf/algorithms/lasso.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/selectinf/algorithms/lasso.py b/selectinf/algorithms/lasso.py
index 674174510..26c8cb41b 100644
--- a/selectinf/algorithms/lasso.py
+++ b/selectinf/algorithms/lasso.py
@@ -242,6 +242,7 @@ def summary(self,
                 alternative='twosided',
                 level=0.95,
                 compute_intervals=False,
+                dispersion=None,
                 truth=None):
         """
         Summary table for inference adjusted for selection.
@@ -258,6 +259,9 @@ def summary(self,
         compute_intervals : bool
             Should we compute confidence intervals?
 
+        dispersion : float
+            Scalar to multiply `self.constraints.covaraince`
+
         truth : np.array
             True values of each beta for selected variables. If not None, a column 'pval' are p-values
             computed under these corresponding null hypotheses.
@@ -275,9 +279,14 @@ def summary(self,
         if truth is None:
             truth = np.zeros_like(self.active_signs)
 
+        if dispersion is None:
+            dispersion = 1.
+            
         result = []
-        C = self._constraints
+        C = self.constraints
         if C is not None:
+            _cov = C.covariance.copy()
+            C.covariance = _cov * dispersion
             one_step = self.onestep_estimator
             for i in range(one_step.shape[0]):
                 eta = np.zeros_like(one_step)
@@ -296,7 +305,8 @@ def summary(self,
                 if compute_intervals:
                     if C.linear_part.shape[0] > 0:  # there were some constraints
                         try:
-                            _interval = C.interval(eta, one_step,
+                            _interval = C.interval(eta,
+                                                   one_step,
                                                    alpha=alpha)
                         except OverflowError:
                             _interval = (-np.inf, np.inf)
@@ -320,7 +330,8 @@ def summary(self,
                                lower_trunc,
                                upper_trunc,
                                sd))
-
+            C.covariance = _cov
+            
         df = pd.DataFrame(index=self.active,
                           data=dict([(n, d) for n, d in zip(['variable',
                                                              'pvalue',
@@ -2311,7 +2322,8 @@ def fit(self,
             self.inactive = np.arange(lasso_solution.shape[0])
         return self.lasso_solution
 
-    def summary(self, level=0.05,
+    def summary(self,
+                level=0.95,
                 compute_intervals=False,
                 dispersion=None):
         """

From c3f38ab13c1a0eeb0f737c38bad574c4b84d195a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 23:06:37 -0700
Subject: [PATCH 048/187] a few minor changes to test_cv_mle script -- to be
 replaced by an example in compare-selection

---
 selectinf/randomized/tests/test_cv_mle.py | 30 +++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/selectinf/randomized/tests/test_cv_mle.py b/selectinf/randomized/tests/test_cv_mle.py
index f7a1bbc19..30b8da4c5 100644
--- a/selectinf/randomized/tests/test_cv_mle.py
+++ b/selectinf/randomized/tests/test_cv_mle.py
@@ -167,11 +167,11 @@ def glmnet_lasso(X, y, lambda_val):
                 lam = as.matrix(lambda)[1,1]
                 n = nrow(X)
 
-                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
+                fit = glmnet(X, y, standardize=FALSE, intercept=FALSE, thresh=1.e-10)
                 estimate = coef(fit, s=lam, exact=TRUE, x=X, y=y)[-1]
-                fit.cv = cv.glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
-                estimate.1se = coef(fit.cv, s='lambda.1se', exact=TRUE, x=X, y=y)[-1]
-                estimate.min = coef(fit.cv, s='lambda.min', exact=TRUE, x=X, y=y)[-1]
+                fit.cv = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE, thresh=1.e-10)
+                estimate.1se = coef(fit, s=fit.cv$lambda.1se, exact=TRUE, x=X, y=y)[-1]
+                estimate.min = coef(fit, s=fit.cv$lambda.min, exact=TRUE, x=X, y=y)[-1]
                 return(list(estimate = estimate, estimate.1se = estimate.1se, estimate.min = estimate.min, lam.min = fit.cv$lambda.min, lam.1se = fit.cv$lambda.1se))
                 }''')
 
@@ -181,11 +181,12 @@ def glmnet_lasso(X, y, lambda_val):
     r_y = rpy.r.matrix(y, nrow=n, ncol=1)
     r_lam = rpy.r.matrix(lambda_val, nrow=1, ncol=1)
 
-    estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate'))
-    estimate_1se = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.1se'))
-    estimate_min = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.min'))
-    lam_min = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.min')))
-    lam_1se = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.1se')))
+    val = lambda_R(r_X, r_y, r_lam)
+    estimate = np.array(val.rx2('estimate'))
+    estimate_1se = np.array(val.rx2('estimate.1se'))
+    estimate_min = np.array(val.rx2('estimate.min'))
+    lam_min = np.asscalar(np.array(val.rx2('lam.min')))
+    lam_1se = np.asscalar(np.array(val.rx2('lam.1se')))
     return estimate, estimate_1se, estimate_min, lam_min, lam_1se
 
 
@@ -563,7 +564,7 @@ def comparison_cvmetrics_selected(n=500,
 
         sel_MLE[nonzero] = MLE_estimate
         ind_est[nonzero] = ind_unbiased_estimator
-        MLE_intervals = np.asarray(result[['lower', 'upper']])
+        MLE_intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
         MLE_pval = np.asarray(result['pvalue'])
 
         randomized_lasso_est = randomized_lasso.initial_soln
@@ -724,7 +725,7 @@ def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1
         df = lasso_Liu.summary(level=0.90, compute_intervals=True, dispersion=dispersion)
         Liu_lower, Liu_upper, Liu_pval = np.asarray(df['lower_confidence']), \
                                          np.asarray(df['upper_confidence']), \
-                                         np.asarray(df['pval'])
+                                         np.asarray(df['pvalue'])
         Liu_intervals = np.vstack((Liu_lower, Liu_upper)).T
         cov_Liu, selective_Liu_power = coverage(Liu_intervals, Liu_pval, Liu_target, beta[Lasso_soln_Liu != 0])
         length_Liu = np.mean(Liu_intervals[:, 1] - Liu_intervals[:, 0])
@@ -783,7 +784,7 @@ def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1
 
         sel_MLE[nonzero] = MLE_estimate
         ind_est[nonzero] = ind_unbiased_estimator
-        MLE_intervals = np.asarray(result[['lower', 'upper']])
+        MLE_intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
         MLE_pval = np.asarray(result['pvalue'])
 
         randomized_lasso_est = randomized_lasso.initial_soln
@@ -837,7 +838,7 @@ def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1
 
 def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.20, 0.31]),
          target="selected", tuning_nonrand="lambda.1se", tuning_rand="lambda.1se",
-         randomizing_scale = np.sqrt(0.50), ndraw=4, outpath = None, plot=True):
+         randomizing_scale = np.sqrt(0.50), ndraw=20, outpath = None, plot=True):
 
     df_selective_inference = pd.DataFrame()
     df_risk = pd.DataFrame()
@@ -1003,7 +1004,6 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
     df_selective_inference.to_html(outfile_inf_html)
     df_risk.to_html(outfile_risk_html)
 
-    stop
     if plot is True:
         plotRisk(df_risk)
         plotCoveragePower(df_selective_inference)
@@ -1011,4 +1011,4 @@ def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.
 
 if __name__ == "__main__":
     main()
-    main(target="full")
+

From 7fd336a94086b23e86a18dc8ab4865af95e86b43 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 23:08:02 -0700
Subject: [PATCH 049/187] script replaced by a notebook in compare-selection

---
 selectinf/randomized/tests/test_cv_mle.py | 1014 ---------------------
 1 file changed, 1014 deletions(-)
 delete mode 100644 selectinf/randomized/tests/test_cv_mle.py

diff --git a/selectinf/randomized/tests/test_cv_mle.py b/selectinf/randomized/tests/test_cv_mle.py
deleted file mode 100644
index 30b8da4c5..000000000
--- a/selectinf/randomized/tests/test_cv_mle.py
+++ /dev/null
@@ -1,1014 +0,0 @@
-from __future__ import division
-
-import numpy as np, os, itertools
-import pandas as pd
-
-import rpy2.robjects as rpy
-from rpy2.robjects import numpy2ri
-rpy.numpy2ri.activate()
-from rpy2.robjects import pandas2ri
-from rpy2.robjects.conversion import localconverter
-
-from scipy.stats import norm as ndist
-from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
-from selectinf.algorithms.lasso import ROSI
-
-def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
-
-    rpy.r('''
-
-    #' Predictors and responses generation.
-    #'
-    #' Generate a predictor matrix x, and response vector y, following a specified
-    #'   setup.  Actually, two pairs of predictors and responses are generated:
-    #'   one for training, and one for validation.
-    #'
-    #' @param n,p The number of training observations, and the number of predictors.
-    #' @param nval The number of validation observations.
-    #' @param rho Parameter that drives pairwise correlations of the predictor
-    #'   variables; specifically, predictors i and j have population correlation
-    #'   rho^abs(i-j). Default is 0.
-    #' @param s number of nonzero coefficients in the underlying regression model.
-    #'   Default is 5. (Ignored if beta.type is 4, in which case the number of
-    #'   nonzero coefficients is 6; and if beta.type is 5, it is interpreted as a
-    #'   the number of strongly nonzero coefficients in a weak sparsity model.)
-    #' @param beta.type Integer taking values in between 1 and 5, used to specify
-    #'   the pattern of nonzero coefficients in the underlying regression model; see
-    #'   details below. Default is 1.
-    #' @param snr Desired signal-to-noise ratio (SNR), i.e., var(mu)/sigma^2 where
-    #'   mu is mean and sigma^2 is the error variance. The error variance is set so
-    #'   that the given SNR is achieved. Default is 1.
-    #' @return A list with the following components: x, y, xval, yval, Sigma, beta,
-    #'   and sigma.
-    #'
-    #' @details The data model is: \eqn{Y \sim N(X\beta, \sigma^2 I)}.
-    #'   The predictor variables have covariance matrix Sigma, with (i,j)th entry
-    #'   rho^abs(i-j). The error variance sigma^2 is set according to the desired
-    #'   signal-to-noise ratio. The first 4 options for the nonzero pattern
-    #'   of the underlying regression coefficients beta follow the simulation setup
-    #'   in Bertsimas, King, and Mazumder (2016), and the 5th is a weak sparsity
-    #'   option:
-    #'   \itemize{
-    #'   \item 1: beta has s components of 1, occurring at (roughly) equally-spaced
-    #'      indices in between 1 and p
-    #'   \item 2: beta has its first s components equal to 1
-    #'   \item 3: beta has its first s components taking nonzero values, where the
-    #'       decay in a linear fashion from 10 to 0.5
-    #'   \item 4: beta has its first 6 components taking the nonzero values -10,-6,
-    #'       -2,2,6,10
-    #'   \item 5: beta has its first s components equal to 1, and the rest decaying
-    #'       to zero at an exponential rate
-    #'   }
-    #'
-    #' @author Trevor Hastie, Rob Tibshirani, Ryan Tibshirani
-    #' @references Simulation setup based on "Best subset selection via a modern
-    #'   optimization lens" by Dimitris Bertsimas, Angela King, and Rahul Mazumder,
-    #'   Annals of Statistics, 44(2), 813-852, 2016.
-    #' @example examples/ex.fs.R
-    #' @export sim.xy
-
-    sim.xy = function(n, p, nval, rho=0, s=5, beta.type=1, snr=1) {
-      # Generate predictors
-      x = matrix(rnorm(n*p),n,p)
-      xval = matrix(rnorm(nval*p),nval,p)
-
-      # Introduce autocorrelation, if needed
-      if (rho != 0) {
-        inds = 1:p
-        Sigma = rho^abs(outer(inds, inds, "-"))
-        obj = svd(Sigma)
-        Sigma.half = obj$u %*% (sqrt(diag(obj$d))) %*% t(obj$v)
-        x = x %*% Sigma.half
-        xval = xval %*% Sigma.half
-      }
-      else Sigma = diag(1,p)
-
-      # Generate underlying coefficients
-      s = min(s,p)
-      beta = rep(0,p)
-      if (beta.type==1) {
-        beta[round(seq(1,p,length=s))] = 1
-      } else if (beta.type==2) {
-        beta[1:s] = 1
-      } else if (beta.type==3) {
-        beta[1:s] = seq(10,0.5,length=s)
-      } else if (beta.type==4) {
-        beta[1:6] = c(-10,-6,-2,2,6,10)
-      } else {
-        beta[1:s] = 1
-        beta[(s+1):p] = 0.5^(1:(p-s))
-      }
-
-      # Set snr based on sample variance on infinitely large test set
-      vmu = as.numeric(t(beta) %*% Sigma %*% beta)
-      sigma = sqrt(vmu/snr)
-
-      # Generate responses
-      y = as.numeric(x %*% beta + rnorm(n)*sigma)
-      yval = as.numeric(xval %*% beta + rnorm(nval)*sigma)
-
-      list(x=x,y=y,xval=xval,yval=yval,Sigma=Sigma,beta=beta,sigma=sigma)
-    }
-
-    sim_xy = sim.xy
-    ''')
-
-    r_simulate = rpy.globalenv['sim_xy']
-    sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
-    X = np.array(sim.rx2('x'))
-    y = np.array(sim.rx2('y'))
-    X_val = np.array(sim.rx2('xval'))
-    y_val = np.array(sim.rx2('yval'))
-    Sigma = np.array(sim.rx2('Sigma'))
-    beta = np.array(sim.rx2('beta'))
-    sigma = np.array(sim.rx2('sigma'))
-
-    return X, y, X_val, y_val, Sigma, beta, sigma
-
-
-def selInf_R(X, y, beta, lam, sigma, Type, alpha=0.1):
-    rpy.r('''
-               library("selectiveInference")
-               selInf = function(X, y, beta, lam, sigma, Type, alpha= 0.1){
-               y = as.matrix(y)
-               X = as.matrix(X)
-               beta = as.matrix(beta)
-               lam = as.matrix(lam)[1,1]
-               sigma = as.matrix(sigma)[1,1]
-               Type = as.matrix(Type)[1,1]
-               if(Type == 1){
-                   type = "full"} else{
-                   type = "partial"}
-               inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian",
-                                   intercept=FALSE, sigma=sigma, alpha=alpha, type=type)
-               return(list(ci = inf$ci, pvalue = inf$pv))}
-               ''')
-
-    inf_R = rpy.globalenv['selInf']
-    n, p = X.shape
-    r_X = rpy.r.matrix(X, nrow=n, ncol=p)
-    r_y = rpy.r.matrix(y, nrow=n, ncol=1)
-    r_beta = rpy.r.matrix(beta, nrow=p, ncol=1)
-    r_lam = rpy.r.matrix(lam, nrow=1, ncol=1)
-    r_sigma = rpy.r.matrix(sigma, nrow=1, ncol=1)
-    r_Type = rpy.r.matrix(Type, nrow=1, ncol=1)
-    output = inf_R(r_X, r_y, r_beta, r_lam, r_sigma, r_Type)
-    ci = np.array(output.rx2('ci'))
-    pvalue = np.array(output.rx2('pvalue'))
-    return ci, pvalue
-
-
-def glmnet_lasso(X, y, lambda_val):
-    rpy.r('''
-                library(glmnet)
-                glmnet_LASSO = function(X,y, lambda){
-                y = as.matrix(y)
-                X = as.matrix(X)
-                lam = as.matrix(lambda)[1,1]
-                n = nrow(X)
-
-                fit = glmnet(X, y, standardize=FALSE, intercept=FALSE, thresh=1.e-10)
-                estimate = coef(fit, s=lam, exact=TRUE, x=X, y=y)[-1]
-                fit.cv = cv.glmnet(X, y, standardize=FALSE, intercept=FALSE, thresh=1.e-10)
-                estimate.1se = coef(fit, s=fit.cv$lambda.1se, exact=TRUE, x=X, y=y)[-1]
-                estimate.min = coef(fit, s=fit.cv$lambda.min, exact=TRUE, x=X, y=y)[-1]
-                return(list(estimate = estimate, estimate.1se = estimate.1se, estimate.min = estimate.min, lam.min = fit.cv$lambda.min, lam.1se = fit.cv$lambda.1se))
-                }''')
-
-    lambda_R = rpy.globalenv['glmnet_LASSO']
-    n, p = X.shape
-    r_X = rpy.r.matrix(X, nrow=n, ncol=p)
-    r_y = rpy.r.matrix(y, nrow=n, ncol=1)
-    r_lam = rpy.r.matrix(lambda_val, nrow=1, ncol=1)
-
-    val = lambda_R(r_X, r_y, r_lam)
-    estimate = np.array(val.rx2('estimate'))
-    estimate_1se = np.array(val.rx2('estimate.1se'))
-    estimate_min = np.array(val.rx2('estimate.min'))
-    lam_min = np.asscalar(np.array(val.rx2('lam.min')))
-    lam_1se = np.asscalar(np.array(val.rx2('lam.1se')))
-    return estimate, estimate_1se, estimate_min, lam_min, lam_1se
-
-
-def coverage(intervals, pval, target, truth):
-    pval_alt = (pval[truth != 0]) < 0.1
-    if pval_alt.sum() > 0:
-        avg_power = np.mean(pval_alt)
-    else:
-        avg_power = 0.
-    return np.mean((target > intervals[:, 0]) * (target < intervals[:, 1])), avg_power
-
-
-def BHfilter(pval, q=0.2):
-    rpy.r.assign('pval', pval)
-    rpy.r.assign('q', q)
-    rpy.r('Pval = p.adjust(pval, method="BH")')
-    rpy.r('S = which((Pval < q)) - 1')
-    S = rpy.r('S')
-    ind = np.zeros(pval.shape[0], np.bool)
-    ind[np.asarray(S, np.int)] = 1
-    return ind
-
-
-def relative_risk(est, truth, Sigma):
-    if (truth != 0).sum() > 0:
-        return (est - truth).T.dot(Sigma).dot(est - truth) / truth.T.dot(Sigma).dot(truth)
-    else:
-        return (est - truth).T.dot(Sigma).dot(est - truth)
-
-from rpy2 import robjects
-
-def plotRisk(df_risk):
-    robjects.r("""
-               library("ggplot2")
-               library("magrittr")
-               library("tidyr")
-               library("dplyr")
-
-               plot_risk <- function(df_risk, outpath="plots/", resolution=300, height= 7.5, width=15)
-                { 
-                   date = 1:length(unique(df_risk$snr))
-                   df_risk = filter(df_risk, metric == "Full")
-                   df = cbind(df_risk, date)
-                   risk = df %>%
-                   gather(key, value, sel.MLE, rand.LASSO, LASSO) %>%
-                   ggplot(aes(x=date, y=value, colour=key, shape=key, linetype=key)) +
-                   geom_point(size=3) +
-                   geom_line(aes(linetype=key), size=1) +
-                   ylim(0.01,1.2)+
-                   labs(y="relative risk", x = "Signal regimes: snr") +
-                   scale_x_continuous(breaks=1:length(unique(df_risk$snr)), label = sapply(df_risk$snr, toString)) +
-                   theme(legend.position="top", legend.title = element_blank())
-                   indices = sort(c("sel.MLE", "rand.LASSO", "LASSO"), index.return= TRUE)$ix
-                   names = c("sel-MLE", "rand-LASSO", "LASSO")
-                   risk = risk + scale_color_manual(labels = names[indices], values=c("#008B8B", "#104E8B","#B22222")[indices]) +
-                   scale_shape_manual(labels = names[indices], values=c(15, 17, 16)[indices]) +
-                                      scale_linetype_manual(labels = names[indices], values = c(1,1,2)[indices])
-                                      outfile = paste(outpath, 'risk.png', sep="")
-                   outfile = paste(outpath, 'risk.png', sep="")                   
-                   ggsave(outfile, plot = risk, dpi=resolution, dev='png', height=height, width=width, units="cm")}
-                """)
-
-    #pandas2ri.activate()
-    with localconverter(robjects.default_converter + pandas2ri.converter):
-        r_df_risk = robjects.conversion.py2rpy(df_risk)
-    R_plot = robjects.globalenv['plot_risk']
-    R_plot(r_df_risk)
-
-
-def plotCoveragePower(df_inference):
-    robjects.r("""
-               library("ggplot2")
-               library("magrittr")
-               library("tidyr")
-               library("reshape")
-               library("cowplot")
-               library("dplyr")
-
-               plot_coverage_lengths <- function(df_inference, outpath="plots/", 
-                                                 resolution=200, height_plot1= 6.5, width_plot1=12, 
-                                                 height_plot2=13, width_plot2=13)
-               {
-                 snr.len = length(unique(df_inference$snr))
-                 df_inference = arrange(df_inference, method)
-                 target = toString(df_inference$target[1])
-                 df = data.frame(snr = sapply(unique(df_inference$snr), toString),
-                                 MLE = 100*df_inference$coverage[((2*snr.len)+1):(3*snr.len)],
-                                 Lee = 100*df_inference$coverage[1:snr.len],
-                                 Naive = 100*df_inference$coverage[((3*snr.len)+1):(4*snr.len)])
-                 if(target== "selected"){
-                      data.m <- melt(df, id.vars='snr')
-                      coverage = ggplot(data.m, aes(snr, value)) + 
-                                 geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
-                                 geom_hline(yintercept = 90, linetype="dotted") +
-                                 labs(y="coverage: partial", x = "Signal regimes: snr") +
-                                 theme(legend.position="top", 
-                                       legend.title = element_blank()) 
-                      coverage = coverage + 
-                                 scale_fill_manual(labels = c("MLE-based","Lee", "Naive"), values=c("#008B8B", "#B22222", "#FF6347"))} else{
-                 df = cbind(df, Liu = 100*df_inference$coverage[((snr.len)+1):(2*snr.len)])
-                 df <- df[c("snr", "MLE", "Liu", "Lee", "Naive")]
-                 data.m <- melt(df, id.vars='snr')
-                 coverage = ggplot(data.m, aes(snr, value)) + 
-                            geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
-                            geom_hline(yintercept = 90, linetype="dotted") +
-                            labs(y="coverage: full", x = "Signal regimes: snr") +
-                            theme(legend.position="top", legend.title = element_blank()) 
-                  coverage = coverage + 
-                  scale_fill_manual(labels = c("MLE-based", "Liu", "Lee", "Naive"), values=c("#008B8B", "#104E8B", "#B22222", "#FF6347"))}
-
-                 outfile = paste(outpath, 'coverage.png', sep="")
-                 ggsave(outfile, plot = coverage, dpi=resolution, dev='png', height=height_plot1, width=width_plot1, units="cm")
-
-                 df = data.frame(snr = sapply(unique(df_inference$snr), toString),
-                                 MLE = 100*df_inference$sel.power[((2*snr.len)+1):(3*snr.len)],
-                                 Lee = 100*df_inference$sel.power[1:snr.len])
-                 if(target== "selected"){
-                   data.m <- melt(df, id.vars='snr')
-                   sel_power = ggplot(data.m, aes(snr, value)) + 
-                               geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
-                               labs(y="power: partial", x = "Signal regimes: snr") +
-                               theme(legend.position="top", legend.title = element_blank()) 
-                   sel_power = sel_power + scale_fill_manual(labels = c("MLE-based","Lee"), values=c("#008B8B", "#B22222"))} else{
-                   df = cbind(df, Liu = 100*df_inference$sel.power[((snr.len)+1):(2*snr.len)])
-                   df <- df[,c("snr", "MLE", "Liu", "Lee")]
-                   data.m <- melt(df, id.vars='snr')
-                   sel_power = ggplot(data.m, aes(snr, value)) + 
-                               geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
-                               labs(y="power: full", x = "Signal regimes: snr") +
-                               theme(legend.position="top", legend.title = element_blank()) 
-                   sel_power = sel_power + scale_fill_manual(labels = c("MLE-based","Liu","Lee"), values=c("#008B8B", "#104E8B", "#B22222"))}
-
-                 outfile = paste(outpath, 'selective_power.png', sep="")
-                 ggsave(outfile, plot = sel_power, dpi=resolution, dev='png', height=height_plot1, width=width_plot1, units="cm")
-
-               if(target== "selected"){
-                   test_data <-data.frame(MLE = filter(df_inference, method == "MLE")$length,
-                   Lee = filter(df_inference, method == "Lee")$length,
-                   Naive = filter(df_inference, method == "Naive")$length,
-                   date = 1:length(unique(df_inference$snr)))
-                   lengths = test_data %>%
-                             gather(key, value, MLE, Lee, Naive) %>%
-                             ggplot(aes(x=date, y=value, colour=key, shape=key, linetype=key)) +
-                             geom_point(size=3) +
-                             geom_line(aes(linetype=key), size=1) +
-                             ylim(0.,max(test_data$MLE, test_data$Lee, test_data$Naive) + 0.2)+
-                             labs(y="lengths:partial", x = "Signal regimes: snr") +
-                             scale_x_continuous(breaks=1:length(unique(df_inference$snr)), label = sapply(unique(df_inference$snr), toString))+
-                             theme(legend.position="top", legend.title = element_blank())
-
-                   indices = sort(c("MLE", "Lee", "Naive"), index.return= TRUE)$ix
-                   names = c("MLE-based", "Lee", "Naive")
-                   lengths = lengths + scale_color_manual(labels = names[indices], values=c("#008B8B","#B22222", "#FF6347")[indices]) +
-                             scale_shape_manual(labels = names[indices], values=c(15, 17, 16)[indices]) +
-                             scale_linetype_manual(labels = names[indices], values = c(1,1,2)[indices])} else{
-                   test_data <-data.frame(MLE = filter(df_inference, method == "MLE")$length,
-                                          Lee = filter(df_inference, method == "Lee")$length,
-                                          Naive = filter(df_inference, method == "Naive")$length,
-                                          Liu = filter(df_inference, method == "Liu")$length,
-                                          date = 1:length(unique(df_inference$snr)))
-                   lengths= test_data %>%
-                            gather(key, value, MLE, Lee, Naive, Liu) %>%
-                            ggplot(aes(x=date, y=value, colour=key, shape=key, linetype=key)) +
-                            geom_point(size=3) +
-                            geom_line(aes(linetype=key), size=1) +
-                            ylim(0.,max(test_data$MLE, test_data$Lee, test_data$Naive, test_data$Liu) + 0.2)+
-                            labs(y="lengths: full", x = "Signal regimes: snr") +
-                            scale_x_continuous(breaks=1:length(unique(df_inference$snr)), label = sapply(unique(df_inference$snr), toString))+
-                            theme(legend.position="top", legend.title = element_blank())
-
-                   indices = sort(c("MLE", "Liu", "Lee", "Naive"), index.return= TRUE)$ix
-                   names = c("MLE-based", "Lee", "Naive", "Liu")
-                   lengths = lengths + scale_color_manual(labels = names[indices], values=c("#008B8B","#B22222", "#FF6347", "#104E8B")[indices]) +
-                             scale_shape_manual(labels = names[indices], values=c(15, 17, 16, 15)[indices]) +
-                             scale_linetype_manual(labels = names[indices], values = c(1,1,2,1)[indices])}
-
-               prop = filter(df_inference, method == "Lee")$prop.infty
-               df = data.frame(snr = sapply(unique(df_inference$snr), toString),
-               infinite = 100*prop)
-               data.prop <- melt(df, id.vars='snr')
-               pL = ggplot(data.prop, aes(snr, value)) +
-                    geom_bar(aes(fill = variable), width = 0.4, position = position_dodge(width=0.5), stat="identity") + 
-                    labs(y="infinite intervals (%)", x = "Signal regimes: snr") +
-                    theme(legend.position="top", 
-                    legend.title = element_blank()) 
-               pL = pL + scale_fill_manual(labels = c("Lee"), values=c("#B22222"))
-               prow <- plot_grid( pL + theme(legend.position="none"),
-                                  lengths + theme(legend.position="none"),
-                                  align = 'vh',
-                                  hjust = -1,
-                                  ncol = 1)
-
-               legend <- get_legend(lengths+ theme(legend.direction = "horizontal",legend.justification="center" ,legend.box.just = "bottom"))
-               p <- plot_grid(prow, ncol=1, legend, rel_heights = c(2., .2)) 
-               outfile = paste(outpath, 'length.png', sep="")
-               ggsave(outfile, plot = p, dpi=resolution, dev='png', height=height_plot2, width=width_plot2, units="cm")}
-               """)
-
-    #pandas2ri.activate()
-    with localconverter(robjects.default_converter + pandas2ri.converter):
-        r_df_inference = robjects.conversion.py2rpy(df_inference)
-    R_plot = robjects.globalenv['plot_coverage_lengths']
-    R_plot(r_df_inference)
-
-def comparison_cvmetrics_selected(n=500,
-                                  p=100,
-                                  nval=500,
-                                  rho=0.35,
-                                  s=5,
-                                  beta_type=1,
-                                  snr=0.20,
-                                  randomizer_scale=np.sqrt(0.50),
-                                  full_dispersion=True,
-                                  tuning_nonrand="lambda.min",
-                                  tuning_rand="lambda.1se"):
-
-    (X,
-     y,
-     _,
-     _,
-     Sigma,
-     beta,
-     sigma) = sim_xy(n=n,
-                     p=p,
-                     nval=nval,
-                     rho=rho,
-                     s=s,
-                     beta_type=beta_type,
-                     snr=snr)
-    true_mean = X.dot(beta)
-
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1)))
-    y = y - y.mean()
-    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
-
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        sigma_ = np.sqrt(dispersion)
-    else:
-        dispersion = None
-        sigma_ = np.std(y)
-    print("estimated and true sigma", sigma, sigma_)
-
-    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T,
-                                                      np.random.standard_normal((n,
-                                                                                 2000)))).max(0))
-    (glm_LASSO_theory,
-     glm_LASSO_1se,
-     glm_LASSO_min,
-     lam_min,
-     lam_1se) = glmnet_lasso(X,
-                             y,
-                             lam_theory/float(n))
-
-    if tuning_nonrand == "lambda.min":
-        lam_LASSO = lam_min
-        glm_LASSO = glm_LASSO_min
-    elif tuning_nonrand == "lambda.1se":
-        lam_LASSO = lam_1se
-        glm_LASSO = glm_LASSO_1se
-    else:
-        lam_LASSO = lam_theory/float(n)
-        glm_LASSO = glm_LASSO_theory
-
-    active_LASSO = (glm_LASSO != 0)
-    nactive_LASSO = active_LASSO.sum()
-    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)], np.bool)
-
-    rel_LASSO = np.zeros(p)
-    Lee_nreport = 0
-    bias_Lee = 0.
-    bias_naive = 0.
-
-    if nactive_LASSO > 0:
-        post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-        rel_LASSO[active_LASSO] = post_LASSO_OLS
-        Lee_target = np.linalg.pinv(X[:, active_LASSO]).dot(X.dot(beta))
-        try:
-            Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=0, alpha=0.1)
-        except:
-            Lee_intervals, Lee_pval = np.array([]), np.array([])
-            
-        if (Lee_pval.shape[0] == Lee_target.shape[0]):
-
-            cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO])
-            inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0])
-            inf_entries = np.mean(inf_entries_bool)
-            if inf_entries == 1.:
-                length_Lee = 0.
-            else:
-                length_Lee = np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])[~inf_entries_bool])
-            power_Lee = ((active_LASSO_bool) * (np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))) \
-                            .sum() / float((beta != 0).sum())
-            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
-            power_Lee_BH = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            fdr_Lee_BH = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
-            bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target)
-
-            naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
-            naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
-                                         post_LASSO_OLS + 1.65 * naive_sd]).T
-            naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
-            cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO])
-            length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
-            power_naive = ((active_LASSO_bool) * (
-                np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float(
-                (beta != 0).sum())
-            naive_discoveries = BHfilter(naive_pval, q=0.1)
-            power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.))
-            bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
-
-            partial_Lasso_risk = (glm_LASSO[active_LASSO]-Lee_target).T.dot(glm_LASSO[active_LASSO]-Lee_target)
-            partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target)
-
-        else:
-            Lee_nreport = 1
-            cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
-            cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
-            naive_discoveries = np.zeros(1)
-            Lee_discoveries = np.zeros(1)
-            partial_Lasso_risk,  partial_relLasso_risk = [0., 0.]
-    elif nactive_LASSO == 0:
-        Lee_nreport = 1
-        cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
-        cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
-        naive_discoveries = np.zeros(1)
-        Lee_discoveries = np.zeros(1)
-        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    if tuning_rand == "lambda.min":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_min * np.ones(p),
-                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.1se":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_1se * np.ones(p),
-                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
-    else:
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= lam_theory * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool)
-    sel_MLE = np.zeros(p)
-    ind_est = np.zeros(p)
-    randomized_lasso_est = np.zeros(p)
-    randomized_rel_lasso_est = np.zeros(p)
-    MLE_nreport = 0
-
-    if nonzero.sum() > 0:
-        target_randomized = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(randomized_lasso.loglike,
-                                          randomized_lasso._W,
-                                          nonzero,
-                                          dispersion=dispersion)
-
-        result = randomized_lasso.selective_MLE(observed_target,
-                                                cov_target,
-                                                cov_target_score)[0]
-
-        MLE_estimate = result['MLE']
-        ind_unbiased_estimator = result['unbiased']
-
-        sel_MLE[nonzero] = MLE_estimate
-        ind_est[nonzero] = ind_unbiased_estimator
-        MLE_intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
-        MLE_pval = np.asarray(result['pvalue'])
-
-        randomized_lasso_est = randomized_lasso.initial_soln
-        randomized_rel_lasso_est = randomized_lasso._beta_full
-
-        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
-        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
-        power_MLE = ((active_rand_bool) * (
-            np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
-        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
-        bias_MLE = np.mean(MLE_estimate - target_randomized)
-
-        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
-        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized)
-        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized)
-        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized)
-
-    else:
-        MLE_nreport = 1
-        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.]
-        MLE_discoveries = np.zeros(1)
-        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
-
-    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
-                       relative_risk(ind_est, beta, Sigma),
-                       relative_risk(randomized_lasso_est, beta, Sigma),
-                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
-                       relative_risk(rel_LASSO, beta, Sigma),
-                       relative_risk(glm_LASSO, beta, Sigma)))
-
-    partial_risks = np.vstack((partial_MLE_risk,
-                               partial_ind_risk,
-                               partial_randLasso_risk,
-                               partial_relrandLasso_risk,
-                               partial_relLasso_risk,
-                               partial_Lasso_risk))
-
-    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power, power_naive, power_naive_BH, fdr_naive_BH,
-                           naive_discoveries.sum()))
-    Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power, power_Lee, power_Lee_BH, fdr_Lee_BH,
-                         Lee_discoveries.sum()))
-    Liu_inf = np.zeros((10, 1))
-    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, power_MLE, power_MLE_BH, fdr_MLE_BH,
-                         MLE_discoveries.sum()))
-    nreport = np.vstack((Lee_nreport, 0., MLE_nreport))
-
-    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
-
-
-def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
-                              randomizer_scale=np.sqrt(0.25), full_dispersion=True,
-                              tuning_nonrand="lambda.min", tuning_rand="lambda.1se"):
-
-    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-    print("snr", snr)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
-    y = y - y.mean()
-    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
-
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        sigma_ = np.sqrt(dispersion)
-    else:
-        dispersion = None
-        sigma_ = np.std(y)
-    print("estimated and true sigma", sigma, sigma_)
-
-    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-    glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory/float(n))
-    if tuning_nonrand == "lambda.min":
-        lam_LASSO = lam_min
-        glm_LASSO = glm_LASSO_min
-    elif tuning_nonrand == "lambda.1se":
-        lam_LASSO = lam_1se
-        glm_LASSO = glm_LASSO_1se
-    else:
-        lam_LASSO = lam_theory/float(n)
-        glm_LASSO = glm_LASSO_theory
-
-    active_LASSO = (glm_LASSO != 0)
-    nactive_LASSO = active_LASSO.sum()
-    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)],
-                                   np.bool)
-
-    rel_LASSO = np.zeros(p)
-    Lee_nreport = 0
-    bias_Lee = 0.
-    bias_naive = 0.
-
-    if nactive_LASSO > 0:
-        rel_LASSO[active_LASSO] = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-        Lee_target = beta[active_LASSO]
-        Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=1, alpha=0.1)
-
-        if (Lee_pval.shape[0] == Lee_target.shape[0]):
-
-            cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO])
-            inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0])
-            inf_entries = np.mean(inf_entries_bool)
-            if inf_entries == 1.:
-                length_Lee = 0.
-            else:
-                length_Lee = np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])[~inf_entries_bool])
-            power_Lee = ((active_LASSO_bool) * (
-                np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))).sum() / float((beta != 0).sum())
-            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
-            power_Lee_BH = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            fdr_Lee_BH = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
-            bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target)
-
-            post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-            naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
-            naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
-                                         post_LASSO_OLS + 1.65 * naive_sd]).T
-            naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
-            cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO])
-            length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
-            power_naive = ((active_LASSO_bool) * (
-                np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float(
-                (beta != 0).sum())
-            naive_discoveries = BHfilter(naive_pval, q=0.1)
-            power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.))
-            bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
-
-            partial_Lasso_risk = (glm_LASSO[active_LASSO] - Lee_target).T.dot(glm_LASSO[active_LASSO] - Lee_target)
-            partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target)
-        else:
-            Lee_nreport = 1
-            cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
-            cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power  = [0., 0., 0., 0., 0., 0.]
-            naive_discoveries = np.zeros(1)
-            Lee_discoveries = np.zeros(1)
-            partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    elif nactive_LASSO == 0:
-        Lee_nreport = 1
-        cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
-        cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
-        naive_discoveries = np.zeros(1)
-        Lee_discoveries = np.zeros(1)
-        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    lasso_Liu = ROSI.gaussian(X, y, n * lam_LASSO)
-    print(type(lasso_Liu))
-    Lasso_soln_Liu = lasso_Liu.fit()
-    active_set_Liu = np.nonzero(Lasso_soln_Liu != 0)[0]
-    nactive_Liu = active_set_Liu.shape[0]
-    active_Liu_bool = np.asarray([(np.in1d(active_set_Liu[a], true_set).sum() > 0) for a in range(nactive_Liu)], np.bool)
-    Liu_nreport = 0
-
-    if nactive_Liu > 0:
-        Liu_target = beta[Lasso_soln_Liu != 0]
-        df = lasso_Liu.summary(level=0.90, compute_intervals=True, dispersion=dispersion)
-        Liu_lower, Liu_upper, Liu_pval = np.asarray(df['lower_confidence']), \
-                                         np.asarray(df['upper_confidence']), \
-                                         np.asarray(df['pvalue'])
-        Liu_intervals = np.vstack((Liu_lower, Liu_upper)).T
-        cov_Liu, selective_Liu_power = coverage(Liu_intervals, Liu_pval, Liu_target, beta[Lasso_soln_Liu != 0])
-        length_Liu = np.mean(Liu_intervals[:, 1] - Liu_intervals[:, 0])
-        power_Liu = ((active_Liu_bool) * (np.logical_or((0. < Liu_intervals[:, 0]),
-                                                        (0. > Liu_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        Liu_discoveries = BHfilter(Liu_pval, q=0.1)
-        power_Liu_BH = (Liu_discoveries * active_Liu_bool).sum() / float((beta != 0).sum())
-        fdr_Liu_BH = (Liu_discoveries * ~active_Liu_bool).sum() / float(max(Liu_discoveries.sum(), 1.))
-
-    else:
-        Liu_nreport = 1
-        cov_Liu, length_Liu, power_Liu, power_Liu_BH, fdr_Liu_BH, selective_Liu_power = [0., 0., 0., 0., 0., 0.]
-        Liu_discoveries = np.zeros(1)
-
-    if tuning_rand == "lambda.min":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= n * lam_min * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.1se":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= n * lam_1se * np.ones(p),
-                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
-    else:
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= lam_theory * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool)
-    sel_MLE = np.zeros(p)
-    ind_est = np.zeros(p)
-    randomized_lasso_est = np.zeros(p)
-    randomized_rel_lasso_est = np.zeros(p)
-    MLE_nreport = 0
-
-    if nonzero.sum() > 0:
-        target_randomized = beta[nonzero]
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = full_targets(randomized_lasso.loglike,
-                                      randomized_lasso._W,
-                                      nonzero,
-                                      dispersion=dispersion)
-
-        result = randomized_lasso.selective_MLE(observed_target,
-                                                cov_target,
-                                                cov_target_score)[0]
-
-        MLE_estimate = result['MLE']
-        ind_unbiased_estimator = result['unbiased']
-
-        sel_MLE[nonzero] = MLE_estimate
-        ind_est[nonzero] = ind_unbiased_estimator
-        MLE_intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
-        MLE_pval = np.asarray(result['pvalue'])
-
-        randomized_lasso_est = randomized_lasso.initial_soln
-        randomized_rel_lasso_est = randomized_lasso._beta_full
-
-        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
-        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
-        power_MLE = ((active_rand_bool) * (np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
-        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
-        bias_MLE = np.mean(MLE_estimate - target_randomized)
-
-        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
-        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized)
-        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized)
-        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized)
-    else:
-        MLE_nreport = 1
-        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.]
-        MLE_discoveries = np.zeros(1)
-        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
-
-    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
-                       relative_risk(ind_est, beta, Sigma),
-                       relative_risk(randomized_lasso_est, beta, Sigma),
-                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
-                       relative_risk(rel_LASSO, beta, Sigma),
-                       relative_risk(glm_LASSO, beta, Sigma)))
-
-    partial_risks = np.vstack((partial_MLE_risk,
-                               partial_ind_risk,
-                               partial_randLasso_risk,
-                               partial_relrandLasso_risk,
-                               partial_relLasso_risk,
-                               partial_Lasso_risk))
-
-    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power,
-                           power_naive, power_naive_BH, fdr_naive_BH, naive_discoveries.sum()))
-    Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power,
-                         power_Lee, power_Lee_BH, fdr_Lee_BH, Lee_discoveries.sum()))
-    Liu_inf = np.vstack((cov_Liu, length_Liu, 0., nactive_Liu, bias_Lee, selective_Liu_power,
-                         power_Liu, power_Liu_BH, fdr_Liu_BH, Liu_discoveries.sum()))
-    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power,
-                         power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum()))
-    nreport = np.vstack((Lee_nreport, Liu_nreport, MLE_nreport))
-
-    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
-
-
-
-def main(n=500, p=100, rho=0.35, s=5, beta_type=1, snr_values=np.array([0.15, 0.20, 0.31]),
-         target="selected", tuning_nonrand="lambda.1se", tuning_rand="lambda.1se",
-         randomizing_scale = np.sqrt(0.50), ndraw=20, outpath = None, plot=True):
-
-    df_selective_inference = pd.DataFrame()
-    df_risk = pd.DataFrame()
-
-    if n > p:
-        full_dispersion = True
-    else:
-        full_dispersion = False
-
-    snr_list = []
-    snr_list_0 = []
-    for snr in snr_values:
-        snr_list.append(snr*np.ones(4))
-        snr_list_0.append(snr*np.ones(2))
-        output_overall = np.zeros(55)
-        if target == "selected":
-            for i in range(ndraw):
-                output_overall += np.squeeze(comparison_cvmetrics_selected(n=n,
-                                                                           p=p,
-                                                                           nval=n,
-                                                                           rho=rho,
-                                                                           s=s,
-                                                                           beta_type=beta_type,
-                                                                           snr=snr,
-                                                                           randomizer_scale=randomizing_scale,
-                                                                           full_dispersion=full_dispersion,
-                                                                           tuning_nonrand =tuning_nonrand,
-                                                                           tuning_rand=tuning_rand))
-        elif target == "full":
-            for i in range(ndraw):
-                output_overall += np.squeeze(comparison_cvmetrics_full(n=n,
-                                                                       p=p,
-                                                                       nval=n,
-                                                                       rho=rho,
-                                                                       s=s,
-                                                                       beta_type=beta_type,
-                                                                       snr=snr,
-                                                                       randomizer_scale=randomizing_scale,
-                                                                       full_dispersion=full_dispersion,
-                                                                       tuning_nonrand =tuning_nonrand,
-                                                                       tuning_rand=tuning_rand))
-
-        nLee = output_overall[52]
-        nLiu = output_overall[53]
-        nMLE = output_overall[54]
-
-        relative_risk = (output_overall[0:6] / float(ndraw)).reshape((1, 6))
-        partial_risk = np.hstack(((output_overall[46:50] / float(ndraw-nMLE)).reshape((1, 4)),
-                                  (output_overall[50:52] / float(ndraw - nLee)).reshape((1, 2))))
-
-        nonrandomized_naive_inf = np.hstack(((output_overall[6:12] / float(ndraw - nLee)).reshape((1, 6)),
-                                             (output_overall[12:16] / float(ndraw)).reshape((1, 4))))
-        nonrandomized_Lee_inf = np.hstack(((output_overall[16:22] / float(ndraw - nLee)).reshape((1, 6)),
-                                          (output_overall[22:26] / float(ndraw)).reshape((1, 4))))
-        nonrandomized_Liu_inf = np.hstack(((output_overall[26:32] / float(ndraw - nLiu)).reshape((1, 6)),
-                                          (output_overall[32:36] / float(ndraw)).reshape((1, 4))))
-        randomized_MLE_inf = np.hstack(((output_overall[36:42] / float(ndraw - nMLE)).reshape((1, 6)),
-                                       (output_overall[42:46] / float(ndraw)).reshape((1, 4))))
-
-        if target=="selected":
-            nonrandomized_Liu_inf[nonrandomized_Liu_inf==0] = 'NaN'
-        if target == "debiased":
-            nonrandomized_Liu_inf[nonrandomized_Liu_inf == 0] = 'NaN'
-            nonrandomized_Lee_inf[nonrandomized_Lee_inf == 0] = 'NaN'
-
-        df_naive = pd.DataFrame(data=nonrandomized_naive_inf,columns=['coverage',
-                                                                      'length',
-                                                                      'prop-infty',
-                                                                      'tot-active',
-                                                                      'bias',
-                                                                      'sel-power',
-                                                                      'power',
-                                                                      'power-BH',
-                                                                      'fdr-BH',
-                                                                      'tot-discoveries'])
-        df_naive['method'] = "Naive"
-        df_Lee = pd.DataFrame(data=nonrandomized_Lee_inf, columns=['coverage',
-                                                                   'length',
-                                                                   'prop-infty',
-                                                                   'tot-active',
-                                                                   'bias',
-                                                                   'sel-power',
-                                                                   'power',
-                                                                   'power-BH',
-                                                                   'fdr-BH',
-                                                                   'tot-discoveries'])
-        df_Lee['method'] = "Lee"
-
-        df_Liu = pd.DataFrame(data=nonrandomized_Liu_inf,columns=['coverage',
-                                                                  'length',
-                                                                  'prop-infty',
-                                                                  'tot-active',
-                                                                  'bias',
-                                                                  'sel-power',
-                                                                  'power',
-                                                                  'power-BH',
-                                                                  'fdr-BH',
-                                                                  'tot-discoveries'])
-        df_Liu['method'] = "Liu"
-
-        df_MLE = pd.DataFrame(data=randomized_MLE_inf, columns=['coverage',
-                                                                'length',
-                                                                'prop-infty',
-                                                                'tot-active',
-                                                                'bias',
-                                                                'sel-power',
-                                                                'power',
-                                                                'power-BH',
-                                                                'fdr-BH',
-                                                                'tot-discoveries'])
-        df_MLE['method'] = "MLE"
-
-        df_risk_metrics = pd.DataFrame(data=relative_risk, columns=['sel-MLE',
-                                                                    'ind-est',
-                                                                    'rand-LASSO',
-                                                                    'rel-rand-LASSO',
-                                                                    'rel-LASSO',
-                                                                    'LASSO'])
-        df_risk_metrics['metric'] = "Full"
-        df_prisk_metrics = pd.DataFrame(data=partial_risk,columns=['sel-MLE',
-                                                                   'ind-est',
-                                                                   'rand-LASSO',
-                                                                   'rel-rand-LASSO',
-                                                                   'rel-LASSO',
-                                                                   'LASSO'])
-        df_prisk_metrics['metric'] = "Partial"
-
-        df_selective_inference = df_selective_inference.append(df_naive, ignore_index=True)
-        df_selective_inference = df_selective_inference.append(df_Lee, ignore_index=True)
-        df_selective_inference = df_selective_inference.append(df_Liu, ignore_index=True)
-        df_selective_inference = df_selective_inference.append(df_MLE, ignore_index=True)
-
-        df_risk = df_risk.append(df_risk_metrics, ignore_index=True)
-        df_risk = df_risk.append(df_prisk_metrics, ignore_index=True)
-
-    snr_list = list(itertools.chain.from_iterable(snr_list))
-    df_selective_inference['n'] = n
-    df_selective_inference['p'] = p
-    df_selective_inference['s'] = s
-    df_selective_inference['rho'] = rho
-    df_selective_inference['beta-type'] = beta_type
-    df_selective_inference['snr'] = pd.Series(np.asarray(snr_list))
-    df_selective_inference['target'] = target
-
-    snr_list_0 = list(itertools.chain.from_iterable(snr_list_0))
-    df_risk['n'] = n
-    df_risk['p'] = p
-    df_risk['s'] = s
-    df_risk['rho'] = rho
-    df_risk['beta-type'] = beta_type
-    df_risk['snr'] = pd.Series(np.asarray(snr_list_0))
-    df_risk['target'] = target
-
-    if outpath is None:
-        outpath = os.path.dirname(__file__)
-
-    outfile_inf_csv = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_inference_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".csv")
-    outfile_risk_csv = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".csv")
-    outfile_inf_html = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_inference_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".html")
-    outfile_risk_html = os.path.join(outpath, "dims_" + str(n) + "_" + str(p) + "_risk_betatype" + str(beta_type) + target + "_rho_" + str(rho) + ".html")
-    df_selective_inference.to_csv(outfile_inf_csv, index=False)
-    df_risk.to_csv(outfile_risk_csv, index=False)
-    df_selective_inference.to_html(outfile_inf_html)
-    df_risk.to_html(outfile_risk_html)
-
-    if plot is True:
-        plotRisk(df_risk)
-        plotCoveragePower(df_selective_inference)
-
-
-if __name__ == "__main__":
-    main()
-

From 7835ac8ae1393b2dedd18dfaeef57d7545f74684 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 23:15:50 -0700
Subject: [PATCH 050/187] fixinf change of pval to pvalue

---
 selectinf/algorithms/tests/test_ROSI.py     |  2 +-
 selectinf/algorithms/tests/test_compareR.py | 30 ++++++++++-----------
 selectinf/algorithms/tests/test_lasso.py    | 18 ++++++-------
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/selectinf/algorithms/tests/test_ROSI.py b/selectinf/algorithms/tests/test_ROSI.py
index 886648221..9629de691 100644
--- a/selectinf/algorithms/tests/test_ROSI.py
+++ b/selectinf/algorithms/tests/test_ROSI.py
@@ -103,7 +103,7 @@ def test_modelQ():
     LX.fit()
     SX = LX.summary(dispersion=1)
 
-    np.testing.assert_allclose(S['pval'], SX['pval'], rtol=1.e-5, atol=1.e-4)
+    np.testing.assert_allclose(S['pvalue'], SX['pvalue'], rtol=1.e-5, atol=1.e-4)
 
 
 
diff --git a/selectinf/algorithms/tests/test_compareR.py b/selectinf/algorithms/tests/test_compareR.py
index 3727fe548..58ac797cb 100644
--- a/selectinf/algorithms/tests/test_compareR.py
+++ b/selectinf/algorithms/tests/test_compareR.py
@@ -87,7 +87,7 @@ def test_fixed_lambda():
 
         yield np.testing.assert_allclose, L.fit()[1:], beta_hat, 1.e-2, 1.e-2, False, 'fixed lambda, sigma=%f coef' % s
         yield np.testing.assert_equal, L.active, selected_vars
-        yield np.testing.assert_allclose, S['pval'], R_pvals, tol, tol, False, 'fixed lambda, sigma=%f pval' % s
+        yield np.testing.assert_allclose, S['pvalue'], R_pvals, tol, tol, False, 'fixed lambda, sigma=%f pval' % s
         yield np.testing.assert_allclose, S['sd'], sdvar, tol, tol, False, 'fixed lambda, sigma=%f sd ' % s
         yield np.testing.assert_allclose, S['onestep'], coef, tol, tol, False, 'fixed lambda, sigma=%f estimator' % s
 
@@ -252,7 +252,7 @@ def test_coxph():
 
     yield np.testing.assert_equal, np.array(L.active) + 1, selected_vars
     yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'cox coeff'
-    yield np.testing.assert_allclose, L.summary('onesided')['pval'], R_pvals, tol, tol, False, 'cox pvalues'
+    yield np.testing.assert_allclose, L.summary('onesided')['pvalue'], R_pvals, tol, tol, False, 'cox pvalues'
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
 def test_logistic():
@@ -311,7 +311,7 @@ def test_logistic():
 
     yield np.testing.assert_equal, L.active[1:], selected_vars
     yield np.testing.assert_allclose, beta2, beta_hat, tol, tol, False, 'logistic coef'
-    yield np.testing.assert_allclose, L.summary('onesided')['pval'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
+    yield np.testing.assert_allclose, L.summary('onesided')['pvalue'][1:], R_pvals, tol, tol, False, 'logistic pvalues'
 
 
 @np.testing.dec.skipif(not rpy2_available, msg="rpy2 not available, skipping test")
@@ -554,8 +554,8 @@ def test_liu_gaussian():
             active_set = rpy.r('active_vars')
 
             print(pvalues)
-            print(S['pval'])
-            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            print(S['pvalue'])
+            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999)
 
             numpy2ri.deactivate()
             break
@@ -610,8 +610,8 @@ def test_liu_logistic():
             pvalues = pvalues[~np.isnan(pvalues)]
             active_set = rpy.r('active_vars')
             print(pvalues)
-            print(S['pval'])
-            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            print(S['pvalue'])
+            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999)
 
             numpy2ri.deactivate()
             break 
@@ -669,9 +669,9 @@ def test_ROSI_gaussian_JM():
             active_set = rpy.r('active_vars')
 
             print(pvalues)
-            print(np.asarray(S['pval']))
+            print(np.asarray(S['pvalue']))
 
-            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999)
             numpy2ri.deactivate()
             break
 
@@ -724,9 +724,9 @@ def test_ROSI_logistic_JM():
             active_set = rpy.r('active_vars')
 
             print(pvalues)
-            print(np.asarray(S['pval']))
+            print(np.asarray(S['pvalue']))
 
-            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999)
             numpy2ri.deactivate()
             break
 
@@ -790,9 +790,9 @@ def test_ROSI_gaussian_BN():
             active_set = rpy.r('active_vars')
 
             print(pvalues)
-            print(np.asarray(S['pval']))
+            print(np.asarray(S['pvalue']))
 
-            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999)
             numpy2ri.deactivate()
             break
 
@@ -846,9 +846,9 @@ def test_ROSI_logistic_BN():
             active_set = rpy.r('active_vars')
 
             print(pvalues)
-            print(np.asarray(S['pval']))
+            print(np.asarray(S['pvalue']))
 
-            nt.assert_true(np.corrcoef(pvalues, S['pval'])[0,1] > 0.999)
+            nt.assert_true(np.corrcoef(pvalues, S['pvalue'])[0,1] > 0.999)
             numpy2ri.deactivate()
             break
 
diff --git a/selectinf/algorithms/tests/test_lasso.py b/selectinf/algorithms/tests/test_lasso.py
index 3b1a3186e..a64bd869d 100644
--- a/selectinf/algorithms/tests/test_lasso.py
+++ b/selectinf/algorithms/tests/test_lasso.py
@@ -115,7 +115,7 @@ def test_logistic():
             np.dot(L.constraints.linear_part, L.onestep_estimator),
             L.constraints.offset)
 
-        P = L.summary()['pval']
+        P = L.summary()['pvalue']
 
         return L, C, P
 
@@ -137,7 +137,7 @@ def test_poisson():
         np.dot(L.constraints.linear_part, L.onestep_estimator),
         L.constraints.offset)
 
-    P = L.summary()['pval']
+    P = L.summary()['pvalue']
 
     return L, C, P
 
@@ -162,7 +162,7 @@ def test_coxph():
         np.dot(L.constraints.linear_part, L.onestep_estimator),
         L.constraints.offset)
 
-    P = L.summary()['pval']
+    P = L.summary()['pvalue']
 
     return L, C, P
 
@@ -543,7 +543,7 @@ def test_gaussian_pvals(n=100,
     if set(true_active).issubset(L.active):
         S = L.summary('onesided')
         S = L.summary('twosided')
-        return S['pval'], [v in true_active for v in S['variable']]
+        return S['pvalue'], [v in true_active for v in S['variable']]
 
 @wait_for_return_value()
 def test_sqrt_lasso_pvals(n=100,
@@ -572,7 +572,7 @@ def test_sqrt_lasso_pvals(n=100,
     if set(true_active).issubset(L.active):
         S = L.summary('onesided')
         S = L.summary('twosided')
-        return S['pval'], [v in true_active for v in S['variable']]
+        return S['pvalue'], [v in true_active for v in S['variable']]
 
 
 @wait_for_return_value()
@@ -604,7 +604,7 @@ def test_sqrt_lasso_sandwich_pvals(n=200,
 
     if set(true_active).issubset(L_SQ.active):
         S = L_SQ.summary('twosided')
-        return S['pval'], [v in true_active for v in S['variable']]
+        return S['pvalue'], [v in true_active for v in S['variable']]
 
 @wait_for_return_value()
 def test_gaussian_sandwich_pvals(n=200,
@@ -654,13 +654,13 @@ def test_gaussian_sandwich_pvals(n=200,
     if set(true_active).issubset(L_P.active):
 
         S = L_P.summary('twosided')
-        P_P = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active]
+        P_P = [p for p, v in zip(S['pvalue'], S['variable']) if v not in true_active]
 
         L_S = lasso.gaussian(X, y, feature_weights, covariance_estimator=sandwich)
         L_S.fit()
 
         S = L_S.summary('twosided')
-        P_S = [p for p, v in zip(S['pval'], S['variable']) if v not in true_active]
+        P_S = [p for p, v in zip(S['pvalue'], S['variable']) if v not in true_active]
 
         return P_P, P_S, [v in true_active for v in S['variable']]
 
@@ -693,7 +693,7 @@ def test_logistic_pvals(n=500,
 
     print(true_active, L.active)
     if set(true_active).issubset(L.active):
-        return S['pval'], [v in true_active for v in S['variable']]
+        return S['pvalue'], [v in true_active for v in S['variable']]
 
 @set_seed_iftrue(True)
 def test_adding_quadratic_lasso():

From 408f26efb020fcf2df0afac2f6d158e0582bc06d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 24 Jun 2020 23:21:45 -0700
Subject: [PATCH 051/187] change of lower/upper to
 lower_confidence/upper_confidence

---
 selectinf/randomized/tests/test_BH.py                | 10 ++++++----
 selectinf/randomized/tests/test_drop_losers.py       | 12 ++++++------
 .../randomized/tests/test_marginal_screening.py      |  4 ++--
 .../randomized/tests/test_selective_MLE_high.py      |  6 +++---
 selectinf/randomized/tests/test_slope.py             |  4 ++--
 selectinf/randomized/tests/test_topK.py              |  6 +++---
 6 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/selectinf/randomized/tests/test_BH.py b/selectinf/randomized/tests/test_BH.py
index e581c6350..34c26ac5f 100644
--- a/selectinf/randomized/tests/test_BH.py
+++ b/selectinf/randomized/tests/test_BH.py
@@ -158,13 +158,15 @@ def test_BH(n=500,
                                            parameter=beta_target)
                 pivots = np.asarray(result['pivot'])
             pval = np.asarray(result['pvalue'])
-            lower = np.asarray(result['lower'])
-            upper = np.asarray(result['upper'])
+            lower = np.asarray(result['lower_confidence'])
+            upper = np.asarray(result['upper_confidence'])
             print(pval)
-            print("beta_target and intervals", beta_target, result[['lower', 'upper']])
+            print("beta_target and intervals", beta_target, result[['lower_confidence',
+                                                                    'upper_confidence']])
             coverage = (beta_target > lower) * (beta_target < upper)
             print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
-            return pivots[beta_target == 0], pivots[beta_target != 0], coverage, result[['lower', 'upper']], pivots
+            return (pivots[beta_target == 0], pivots[beta_target != 0], coverage,
+                    result[['lower_confidence', 'upper_confidence']], pivots)
         else:
             return [], [], [], [], []
 
diff --git a/selectinf/randomized/tests/test_drop_losers.py b/selectinf/randomized/tests/test_drop_losers.py
index 6322f5e66..46f4b8395 100644
--- a/selectinf/randomized/tests/test_drop_losers.py
+++ b/selectinf/randomized/tests/test_drop_losers.py
@@ -45,8 +45,8 @@ def test_drop_losers(p=50,
     else:
         result = dtl.selective_MLE()[0]
     pvalue = np.asarray(result['pvalue'])
-    lower = np.asarray(result['lower'])
-    upper = np.asarray(result['upper'])
+    lower = np.asarray(result['lower_confidence'])
+    upper = np.asarray(result['upper_confidence'])
     cover = (lower < 0) * (upper > 0)
 
     return pvalue, cover
@@ -155,8 +155,8 @@ def test_compare_topK(p=20,
 
     np.testing.assert_allclose(summary1['pvalue'], summary2['pvalue'], rtol=1.e-3)
     np.testing.assert_allclose(summary1['target'], summary2['target'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['lower'], summary2['lower'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['upper'], summary2['upper'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['lower_confidence'], summary2['lower_confidence'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['upper_confidence'], summary2['upper_confidence'], rtol=1.e-3)
 
     np.random.seed(0)
     degenerate_topK.fit(perturb=perturb2)
@@ -170,8 +170,8 @@ def test_compare_topK(p=20,
     
     np.testing.assert_allclose(summary1['pvalue'], summary3['pvalue'], rtol=1.e-3)
     np.testing.assert_allclose(summary1['target'], summary3['target'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['lower'], summary3['lower'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['upper'], summary3['upper'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['lower_confidence'], summary3['lower_confidence'], rtol=1.e-3)
+    np.testing.assert_allclose(summary1['upper_confidence'], summary3['upper_confidence'], rtol=1.e-3)
 
 
 def main(nsim=100, use_MLE=True):
diff --git a/selectinf/randomized/tests/test_marginal_screening.py b/selectinf/randomized/tests/test_marginal_screening.py
index e416cdade..6db0fbdf2 100644
--- a/selectinf/randomized/tests/test_marginal_screening.py
+++ b/selectinf/randomized/tests/test_marginal_screening.py
@@ -68,7 +68,7 @@ def test_marginal(n=500,
                                                  alternatives,
                                                  compute_intervals=True)
 
-            intervals = np.asarray(result[['lower', 'upper']])
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             pval = result['pvalue']
             print(pval)
             if marginal:
@@ -152,7 +152,7 @@ def test_simple(n=100,
                                                  compute_intervals=True)
 
             pval = result['pvalue']
-            intervals = np.asarray(result[['lower', 'upper']])
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             print(pval)
             beta_target = cov_target.dot(true_mean[nonzero])
             print("beta_target and intervals", beta_target, intervals)
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 01df0630e..578ae66ec 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -74,7 +74,7 @@ def test_full_targets(n=200,
                                         cov_target_score)[0]
             pval = result['pvalue']
             estimate = result['MLE']
-            intervals = np.asarray(result[['lower', 'upper']])
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             print("estimate, intervals", estimate, intervals)
 
             coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
@@ -142,7 +142,7 @@ def test_selected_targets(n=2000,
                                         cov_target_score)[0]
             estimate = result['MLE']
             pval = result['pvalue']
-            intervals = np.asarray(result[['lower', 'upper']])
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
@@ -210,7 +210,7 @@ def test_instance():
                              cov_target_score)[0]
     estimate = result['MLE']
     pval = result['pvalue']
-    intervals = np.asarray(result[['lower', 'upper']])
+    intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
 
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
 
diff --git a/selectinf/randomized/tests/test_slope.py b/selectinf/randomized/tests/test_slope.py
index 5c31a848f..bc3a475a7 100644
--- a/selectinf/randomized/tests/test_slope.py
+++ b/selectinf/randomized/tests/test_slope.py
@@ -183,8 +183,8 @@ def test_randomized_slope(n=2000,
                                       compute_intervals=True,
                                       ndraw=150000)
             pval = np.asarray(result['pvalue'])
-            lower = np.asarray(result['lower'])
-            upper = np.asarray(result['upper'])
+            lower = np.asarray(result['lower_confidence'])
+            upper = np.asarray(result['upper_confidence'])
 
             print(pd.DataFrame({'target':beta_target,
                                 'lower':lower,
diff --git a/selectinf/randomized/tests/test_topK.py b/selectinf/randomized/tests/test_topK.py
index 83c7a6ac0..000c45aba 100644
--- a/selectinf/randomized/tests/test_topK.py
+++ b/selectinf/randomized/tests/test_topK.py
@@ -67,10 +67,10 @@ def test_topK(n=500,
                                              crosscov_target_score, 
                                              alternatives,
                                              compute_intervals=True)
-            lower = np.asarray(result['lower'])
-            upper = np.asarray(result['upper'])
+            lower = np.asarray(result['lower_confidence'])
+            upper = np.asarray(result['upper_confidence'])
             pval = result['pvalue']
-            intervals = np.asarray(result[['lower', 'upper']])
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             print(pval)
             if marginal:
                 beta_target = true_mean[nonzero]

From 915904378afdb93de6e2460b9cfe77d754341bc6 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Thu, 25 Jun 2020 13:44:49 -0400
Subject: [PATCH 052/187] commit changes to test_mle

---
 doc/adjusted_MLE/tests/comparison_metrics.py | 20 ++++++++++++++++++++
 selectinf/randomized/tests/test_lasso.py     |  2 ++
 2 files changed, 22 insertions(+)

diff --git a/doc/adjusted_MLE/tests/comparison_metrics.py b/doc/adjusted_MLE/tests/comparison_metrics.py
index c902ec879..15a003d0e 100644
--- a/doc/adjusted_MLE/tests/comparison_metrics.py
+++ b/doc/adjusted_MLE/tests/comparison_metrics.py
@@ -1,6 +1,26 @@
+<<<<<<< HEAD:doc/adjusted_MLE/tests/comparison_metrics.py
 from __future__ import division, print_function
 import numpy as np, sys, time
 from scipy.stats import norm as ndist
+=======
+import numpy as np, os, itertools
+import pandas as pd
+
+import rpy2.robjects as rpy
+from rpy2.robjects import numpy2ri
+rpy.numpy2ri.activate()
+from scipy.stats import norm as ndist
+
+from ..lasso import lasso, full_targets, selected_targets, debiased_targets
+from ...algorithms.lasso import lasso as lasso_full
+
+def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
+
+    rpy.r('''
+            source('~/best-subset/bestsubset/R/sim.R')
+            sim_xy = sim.xy
+            ''')
+>>>>>>> commit changes to test_mle:selectinf/randomized/tests/test_cv_mle.py
 
 from rpy2 import robjects
 import rpy2.robjects.numpy2ri
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 01b5b110a..507a80d63 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -394,3 +394,5 @@ def main(nsim=500, n=500, p=200, sqrt=False, target='full', sigma=3, AR=True):
     plt.show()
 
 
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 6f546e96f897d1dc46a763dc6ee4e19a600cf61f Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Mon, 29 Jun 2020 02:21:17 -0400
Subject: [PATCH 053/187] add approx log reference

---
 selectinf/randomized/approx_reference.py | 139 +++++++++++++++++++++++
 selectinf/randomized/query.py            |  22 +++-
 2 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 selectinf/randomized/approx_reference.py

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
new file mode 100644
index 000000000..79096b0c4
--- /dev/null
+++ b/selectinf/randomized/approx_reference.py
@@ -0,0 +1,139 @@
+from __future__ import division, print_function
+
+import numpy as np, sys
+from scipy.stats import norm as ndist
+
+from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+
+
+class approximate_grid_inference():
+
+    def __init__(self,
+                 query,
+                 observed_target,
+                 cov_target,
+                 cov_target_score,
+                 grid,
+                 dispersion=1,
+                 level=0.9,
+                 solve_args={'tol':1.e-12}):
+
+        self.solve_args = solve_args
+
+        self.linear_part = query.sampler.affine_con.linear_part
+        self.offset = query.sampler.affine_con.offset
+
+        self.logdens_linear = query.sampler.logdens_transform[0]
+        self.cond_mean = query.cond_mean
+        self.prec_opt = np.linalg.inv(query.cond_cov)
+        self.cond_cov = query.cond_cov
+
+        self.observed_target = observed_target
+        self.cov_target_score = cov_target_score
+        self.cov_target = cov_target
+
+        self.init_soln = query.observed_opt_state
+        self.grid = grid
+
+        self.ntarget = cov_target.shape[0]
+        self.level = level
+
+   def approx_log_reference(self,
+                            observed_target,
+                            cov_target,
+                            cov_target_score):
+
+
+       if np.asarray(observed_target).shape in [(), (0,)]:
+           raise ValueError('no target specified')
+
+
+       observed_target = np.atleast_1d(observed_target)
+       prec_target = np.linalg.inv(cov_target)
+       target_lin = - self.logdens_linear.dot(cov_target_score.T.dot(prec_target))
+
+       ref_hat = []
+       solver = solve_barrier_affine_C
+       for k in range(self.grid.shape[0]):
+           cond_mean_grid = target_lin.dot(np.asarray([self.grid[k]])) + (
+                       self.cond_mean - target_lin.dot(observed_target))
+           conjugate_arg = self.prec_opt.dot(cond_mean_grid)
+
+           val, _, _ = solver(conjugate_arg,
+                              self.prec_opt,
+                              self.init_soln,
+                              self.linear_part,
+                              self.offset,
+                              self.solve_args)
+
+           ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
+
+       return np.asarray(ref_hat)
+
+
+   def approx_density(self,
+                      mean_parameter,
+                      cov_target,
+                      approx_log_ref):
+
+
+       _approx_density = []
+       for k in range(self.grid.shape[0]):
+           _approx_density.append(np.exp(
+               -np.true_divide((self.grid[k] - mean_parameter) ** 2, 2 * cov_target) + approx_log_ref[k]))
+
+       _approx_density_ = np.asarray(_approx_density) / (np.asarray(_approx_density).sum())
+
+       return np.cumsum(_approx_density_)
+
+
+   def approx_ci(self,
+                 param_grid,
+                 cov_target,
+                 approx_log_ref,
+                 indx_obsv):
+
+       area = np.zeros(param_grid.shape[0])
+
+       for k in range(param_grid.shape[0]):
+           area_vec = approx_density(param_grid[k],
+                                     cov_target,
+                                     approx_log_ref)
+
+           area[k] = area_vec[indx_obsv]
+
+       alpha = 1 - self.level
+       region = param_grid[(area >= alpha / 2.) & (area <= (1 - alpha / 2.))]
+
+       if region.size > 0:
+           return np.nanmin(region), np.nanmax(region)
+       else:
+           return 0., 0.
+
+   def approx_pivot(self,
+                    mean_parameter):
+
+       pivot = []
+
+       for m in range(self.ntarget):
+           observed_target_uni = (self.observed_target[m]).reshape((1,))
+           cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+           cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+           grid_indx_obs = np.argmin(np.abs(self.grid - observed_target_uni))
+
+           approx_log_ref = self.approx_log_reference(self.grid,
+                                                      observed_target_uni,
+                                                      cov_target_uni,
+                                                      cov_target_score_uni)
+
+           area_cum = approx_density(self.grid,
+                                     mean_parameter,
+                                     cov_target_uni,
+                                     approx_log_ref)
+
+           pivot.append(2 * np.minimum(area_cum[grid_indx_obs], 1. - area_cum[grid_indx_obs]))
+
+           sys.stderr.write("variable completed " + str(m + 1)+ "\n")
+
+       return pivot
+
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 19fb677bb..557d6216b 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -14,7 +14,7 @@
                                   constraints)
 from .posterior_inference import posterior
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
-
+from .approx_reference import approximate_grid_inference
 
 class query(object):
 
@@ -301,6 +301,25 @@ def prior(target_parameter):
                          dispersion,
                          solve_args=solve_args)
 
+    def approximate_grid_inference(self,
+                                   observed_target,
+                                   target_cov,
+                                   target_score_cov,
+                                   dispersion=None,
+                                   solve_args={'tol': 1.e-12}):
+
+        if dispersion is None:
+            dispersion = 1
+            print('Using dispersion parameter 1...')
+
+
+        return approximate_grid_inference(self,
+                                          observed_target,
+                                          target_cov,
+                                          target_score_cov,
+                                          dispersion,
+                                          solve_args=solve_args)
+
 
 class gaussian_query(query):
 
@@ -1570,6 +1589,7 @@ def _solve_barrier_nonneg(conjugate_arg,
     hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
     return current_value, current, hess
 
+
 def selective_MLE(observed_target, 
                   target_cov, 
                   target_score_cov, 

From 309306c74342a2cf0f154bb5163f8ae5082a9434 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Mon, 29 Jun 2020 02:52:37 -0400
Subject: [PATCH 054/187] added test for pivot b.o. approx reference

---
 selectinf/randomized/approx_reference.py      | 138 +++++++++---------
 selectinf/randomized/query.py                 |   4 +
 .../randomized/tests/test_approx_reference.py |  93 ++++++++++++
 3 files changed, 163 insertions(+), 72 deletions(-)
 create mode 100644 selectinf/randomized/tests/test_approx_reference.py

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 79096b0c4..8fc0b731c 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -1,8 +1,6 @@
 from __future__ import division, print_function
 
 import numpy as np, sys
-from scipy.stats import norm as ndist
-
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 
 
@@ -38,102 +36,98 @@ def __init__(self,
         self.ntarget = cov_target.shape[0]
         self.level = level
 
-   def approx_log_reference(self,
-                            observed_target,
-                            cov_target,
-                            cov_target_score):
-
+    def approx_log_reference(self,
+                             observed_target,
+                             cov_target,
+                             cov_target_score):
 
-       if np.asarray(observed_target).shape in [(), (0,)]:
+        if np.asarray(observed_target).shape in [(), (0,)]:
            raise ValueError('no target specified')
 
+        observed_target = np.atleast_1d(observed_target)
+        prec_target = np.linalg.inv(cov_target)
+        target_lin = - self.logdens_linear.dot(cov_target_score.T.dot(prec_target))
 
-       observed_target = np.atleast_1d(observed_target)
-       prec_target = np.linalg.inv(cov_target)
-       target_lin = - self.logdens_linear.dot(cov_target_score.T.dot(prec_target))
+        ref_hat = []
+        solver = solve_barrier_affine_C
+        for k in range(self.grid.shape[0]):
+            cond_mean_grid = target_lin.dot(np.asarray([self.grid[k]])) + (
+                    self.cond_mean - target_lin.dot(observed_target))
+            conjugate_arg = self.prec_opt.dot(cond_mean_grid)
 
-       ref_hat = []
-       solver = solve_barrier_affine_C
-       for k in range(self.grid.shape[0]):
-           cond_mean_grid = target_lin.dot(np.asarray([self.grid[k]])) + (
-                       self.cond_mean - target_lin.dot(observed_target))
-           conjugate_arg = self.prec_opt.dot(cond_mean_grid)
+            val, _, _ = solver(conjugate_arg,
+                               self.prec_opt,
+                               self.init_soln,
+                               self.linear_part,
+                               self.offset,
+                               **self.solve_args)
 
-           val, _, _ = solver(conjugate_arg,
-                              self.prec_opt,
-                              self.init_soln,
-                              self.linear_part,
-                              self.offset,
-                              self.solve_args)
+            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
 
-           ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
+        return np.asarray(ref_hat)
 
-       return np.asarray(ref_hat)
 
+    def approx_density(self,
+                       mean_parameter,
+                       cov_target,
+                       approx_log_ref):
 
-   def approx_density(self,
-                      mean_parameter,
-                      cov_target,
-                      approx_log_ref):
+        _approx_density = []
+        for k in range(self.grid.shape[0]):
+            _approx_density.append(np.exp(-np.true_divide((self.grid[k] - mean_parameter) ** 2, 2 * cov_target) + approx_log_ref[k]))
 
+        _approx_density_ = np.asarray(_approx_density) / (np.asarray(_approx_density).sum())
+        return np.cumsum(_approx_density_)
 
-       _approx_density = []
-       for k in range(self.grid.shape[0]):
-           _approx_density.append(np.exp(
-               -np.true_divide((self.grid[k] - mean_parameter) ** 2, 2 * cov_target) + approx_log_ref[k]))
+    def approx_ci(self,
+                  param_grid,
+                  cov_target,
+                  approx_log_ref,
+                  indx_obsv):
 
-       _approx_density_ = np.asarray(_approx_density) / (np.asarray(_approx_density).sum())
+        area = np.zeros(param_grid.shape[0])
 
-       return np.cumsum(_approx_density_)
+        for k in range(param_grid.shape[0]):
+            area_vec = self.approx_density(param_grid[k],
+                                           cov_target,
+                                           approx_log_ref)
 
+            area[k] = area_vec[indx_obsv]
 
-   def approx_ci(self,
-                 param_grid,
-                 cov_target,
-                 approx_log_ref,
-                 indx_obsv):
-
-       area = np.zeros(param_grid.shape[0])
+        alpha = 1 - self.level
+        region = param_grid[(area >= alpha / 2.) & (area <= (1 - alpha / 2.))]
 
-       for k in range(param_grid.shape[0]):
-           area_vec = approx_density(param_grid[k],
-                                     cov_target,
-                                     approx_log_ref)
+        if region.size > 0:
+            return np.nanmin(region), np.nanmax(region)
+        else:
+            return 0., 0.
 
-           area[k] = area_vec[indx_obsv]
+    def approx_pivot(self,
+                     mean_parameter):
 
-       alpha = 1 - self.level
-       region = param_grid[(area >= alpha / 2.) & (area <= (1 - alpha / 2.))]
+        pivot = []
 
-       if region.size > 0:
-           return np.nanmin(region), np.nanmax(region)
-       else:
-           return 0., 0.
+        for m in range(self.ntarget):
+            p = self.cov_target_score.shape[1]
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+            grid_indx_obs = np.argmin(np.abs(self.grid - observed_target_uni))
 
-   def approx_pivot(self,
-                    mean_parameter):
+            approx_log_ref = self.approx_log_reference(observed_target_uni,
+                                                       cov_target_uni,
+                                                       cov_target_score_uni)
 
-       pivot = []
+            area_cum = self.approx_density(mean_parameter[m],
+                                           cov_target_uni,
+                                           approx_log_ref)
 
-       for m in range(self.ntarget):
-           observed_target_uni = (self.observed_target[m]).reshape((1,))
-           cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-           cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
-           grid_indx_obs = np.argmin(np.abs(self.grid - observed_target_uni))
+            pivot.append(2 * np.minimum(area_cum[grid_indx_obs], 1. - area_cum[grid_indx_obs]))
 
-           approx_log_ref = self.approx_log_reference(self.grid,
-                                                      observed_target_uni,
-                                                      cov_target_uni,
-                                                      cov_target_score_uni)
+            sys.stderr.write("variable completed " + str(m + 1) + "\n")
 
-           area_cum = approx_density(self.grid,
-                                     mean_parameter,
-                                     cov_target_uni,
-                                     approx_log_ref)
+        return pivot
 
-           pivot.append(2 * np.minimum(area_cum[grid_indx_obs], 1. - area_cum[grid_indx_obs]))
 
-           sys.stderr.write("variable completed " + str(m + 1)+ "\n")
 
-       return pivot
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 557d6216b..1185adebb 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -305,6 +305,7 @@ def approximate_grid_inference(self,
                                    observed_target,
                                    target_cov,
                                    target_score_cov,
+                                   grid = None,
                                    dispersion=None,
                                    solve_args={'tol': 1.e-12}):
 
@@ -312,11 +313,14 @@ def approximate_grid_inference(self,
             dispersion = 1
             print('Using dispersion parameter 1...')
 
+        if grid is None:
+            grid = np.linspace(- 20., 20., num=401)
 
         return approximate_grid_inference(self,
                                           observed_target,
                                           target_cov,
                                           target_score_cov,
+                                          grid,
                                           dispersion,
                                           solve_args=solve_args)
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
new file mode 100644
index 000000000..c817582d7
--- /dev/null
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -0,0 +1,93 @@
+import numpy as np
+
+from ...tests.instance import gaussian_instance
+from ..lasso import lasso, selected_targets
+
+
+def test_approx_pivot(n=500,
+                      p=100,
+                      signal_fac=1.,
+                      s=5,
+                      sigma=2.,
+                      rho=0.4,
+                      randomizer_scale=1.):
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+    conv = const(X,
+                 Y,
+                 W,
+                 randomizer_scale=randomizer_scale * dispersion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    if nonzero.sum()>0:
+        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero,
+                                          dispersion=dispersion)
+
+        grid = np.linspace(- 20., 20., num=401)
+
+        approximate_grid_inf = conv.approximate_grid_inference(observed_target,
+                                                               cov_target,
+                                                               cov_target_score,
+                                                               grid=grid,
+                                                               dispersion=dispersion)
+
+        pivot = approximate_grid_inf.approx_pivot(beta_target)
+
+        return pivot
+
+
+import matplotlib.pyplot as plt
+from statsmodels.distributions.empirical_distribution import ECDF
+
+
+def main(nsim=300):
+
+    _pivot=[]
+    for i in range(nsim):
+
+        _pivot.extend(test_approx_pivot(n=100,
+                                        p=50,
+                                        signal_fac=0.5,
+                                        s=5,
+                                        sigma=2.,
+                                        rho=0.20,
+                                        randomizer_scale=1.))
+
+        print("iteration completed ", i)
+
+    plt.clf()
+    ecdf_MLE = ECDF(np.asarray(_pivot))
+    grid = np.linspace(0, 1, 101)
+    plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
+
+if __name__ =="__main__":
+    main(nsim=50)
\ No newline at end of file

From b35377e90b8432fc09d152750f4c26f8c2067fc6 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Tue, 30 Jun 2020 01:17:38 -0400
Subject: [PATCH 055/187] added approximate ci b.o. approx reference

---
 selectinf/randomized/approx_reference.py      |  29 ++++
 .../randomized/tests/test_approx_reference.py | 145 +++++++++++++++---
 2 files changed, 152 insertions(+), 22 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 8fc0b731c..3c1b1b8fa 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -128,6 +128,35 @@ def approx_pivot(self,
 
         return pivot
 
+    def approx_intervals(self,
+                         param_grid):
+
+        intervals_lci =[]
+        intervals_uci =[]
+
+        for m in range(self.ntarget):
+            p = self.cov_target_score.shape[1]
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+            grid_indx_obs = np.argmin(np.abs(self.grid - observed_target_uni))
+
+            approx_log_ref = self.approx_log_reference(observed_target_uni,
+                                                       cov_target_uni,
+                                                       cov_target_score_uni)
+
+            approx_lci, approx_uci = self.approx_ci(param_grid[m,:],
+                                                    cov_target_uni,
+                                                    approx_log_ref,
+                                                    grid_indx_obs)
+
+            intervals_lci.append(approx_lci)
+            intervals_uci.append(approx_uci)
+
+            sys.stderr.write("variable completed " + str(m + 1) + "\n")
+
+        return np.asarray(intervals_lci), np.asarray(intervals_uci)
+
 
 
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index c817582d7..c394559d6 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -50,7 +50,14 @@ def test_approx_pivot(n=500,
                                           nonzero,
                                           dispersion=dispersion)
 
-        grid = np.linspace(- 20., 20., num=401)
+        inverse_info = conv.selective_MLE(observed_target,
+                                          cov_target,
+                                          cov_target_score)[1]
+
+        scale_ = 4 * np.max(np.sqrt(np.diag(inverse_info)))
+        ngrid = 2 * scale_/0.1
+
+        grid = np.linspace(- scale_, scale_, num=ngrid)
 
         approximate_grid_inf = conv.approximate_grid_inference(observed_target,
                                                                cov_target,
@@ -63,31 +70,125 @@ def test_approx_pivot(n=500,
         return pivot
 
 
-import matplotlib.pyplot as plt
-from statsmodels.distributions.empirical_distribution import ECDF
+def test_approx_ci(n=500,
+                   p=100,
+                   signal_fac=1.,
+                   s=5,
+                   sigma=2.,
+                   rho=0.4,
+                   randomizer_scale=1.):
 
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
 
-def main(nsim=300):
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
 
-    _pivot=[]
-    for i in range(nsim):
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-        _pivot.extend(test_approx_pivot(n=100,
-                                        p=50,
-                                        signal_fac=0.5,
-                                        s=5,
-                                        sigma=2.,
-                                        rho=0.20,
-                                        randomizer_scale=1.))
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+    conv = const(X,
+                 Y,
+                 W,
+                 randomizer_scale=randomizer_scale * dispersion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
 
-        print("iteration completed ", i)
+    if nonzero.sum()>0:
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero,
+                                          dispersion=dispersion)
+
+        ntarget = observed_target.shape[0]
+        result, inverse_info = conv.selective_MLE(observed_target,
+                                                  cov_target,
+                                                  cov_target_score)[:2]
+
+        _scale = 4 * np.sqrt(np.diag(inverse_info))
+        scale_ = np.max(_scale)
+        ngrid = int(2 * scale_/0.1)
+
+        grid = np.linspace(-scale_, scale_, num=ngrid)
+
+        approximate_grid_inf = conv.approximate_grid_inference(observed_target,
+                                                               cov_target,
+                                                               cov_target_score,
+                                                               grid=grid,
+                                                               dispersion=dispersion)
+
+
+        param_grid = np.zeros((ntarget, ngrid))
+        mle = np.asarray(result['MLE'])
+        for j in range(ntarget):
+            param_grid[j,:] = np.linspace(mle[j]-_scale[j], mle[j]+_scale[j], num=ngrid)
+
+        lci, uci = approximate_grid_inf.approx_intervals(param_grid)
+
+    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    coverage = (lci < beta_target) * (uci > beta_target)
+    length = uci - lci
+
+    return np.mean(coverage), np.mean(length)
+
+import matplotlib.pyplot as plt
+from statsmodels.distributions.empirical_distribution import ECDF
 
-    plt.clf()
-    ecdf_MLE = ECDF(np.asarray(_pivot))
-    grid = np.linspace(0, 1, 101)
-    plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
 
-if __name__ =="__main__":
-    main(nsim=50)
\ No newline at end of file
+def main(nsim=300, CI = False):
+
+    if CI is False:
+        _pivot = []
+        for i in range(nsim):
+            _pivot.extend(test_approx_pivot(n=200,
+                                            p=100,
+                                            signal_fac=0.5,
+                                            s=5,
+                                            sigma=3.,
+                                            rho=0.20,
+                                            randomizer_scale=1.))
+
+            print("iteration completed ", i)
+
+        plt.clf()
+        ecdf_MLE = ECDF(np.asarray(_pivot))
+        grid = np.linspace(0, 1, 101)
+        plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^')
+        plt.plot(grid, grid, 'k--')
+        plt.show()
+
+    if CI is True:
+        coverage_ = 0.
+        length_ = 0.
+        for n in range(nsim):
+            cov, len = test_approx_ci(n=500,
+                                      p=100,
+                                      signal_fac=1.,
+                                      s=5,
+                                      sigma=2.,
+                                      rho=0.4,
+                                      randomizer_scale=1.)
+
+            coverage_ += cov
+            length_ += len
+            print("coverage so far ", coverage_ / (n + 1.))
+            print("lengths so far ", length_ / (n + 1.))
+            print("iteration completed ", n + 1)
+
+if __name__ == "__main__":
+    main(nsim=20, CI = True)
\ No newline at end of file

From 9223cccf077fe3d9eb56744ed6dd59e225986442 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Tue, 30 Jun 2020 18:03:05 -0400
Subject: [PATCH 056/187] sigma instead of sigma_sq while setting scale
 parameter of posterior samplers

---
 selectinf/randomized/posterior_inference.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 309779fa0..355bab0e0 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -174,12 +174,12 @@ def langevin_sampler(selective_posterior,
                        selective_posterior.log_posterior,
                        proposal_scale,
                        stepsize,
-                       selective_posterior.dispersion)
+                       np.sqrt(selective_posterior.dispersion))
 
     samples = np.zeros((nsample, selective_posterior.ntarget))
 
     for i, sample in enumerate(sampler):
-        sampler.scaling = selective_posterior.dispersion
+        sampler.scaling = np.sqrt(selective_posterior.dispersion)
         samples[i,:] = sample.copy()
         if i == nsample - 1:
             break
@@ -202,22 +202,22 @@ def gibbs_sampler(selective_posterior,
                        selective_posterior.log_posterior,
                        proposal_scale,
                        stepsize,
-                       selective_posterior.dispersion)
+                       np.sqrt(selective_posterior.dispersion))
     samples = np.zeros((nsample, selective_posterior.ntarget))
     scale_samples = np.zeros(nsample)
-    scale_update = selective_posterior.dispersion
+    scale_update = np.sqrt(selective_posterior.dispersion)
     for i in range(nsample):
 
         sample = sampler.__next__()
         samples[i, :] = sample
 
-        scale_update = invgamma.rvs(a=(0.1 +
+        scale_update_sq = invgamma.rvs(a=(0.1 +
                                        selective_posterior.ntarget +
                                        selective_posterior.ntarget/2),
-                                    scale=0.1-(scale_update * sampler.grad_posterior[1]),
-                                    size=1)
-        scale_samples[i] = scale_update
-        sampler.scaling = scale_update
+                                       scale=0.1-((scale_update**2) * sampler.grad_posterior[1]),
+                                       size=1)
+        scale_samples[i] = np.sqrt(scale_update_sq)
+        sampler.scaling = np.sqrt(scale_update_sq)
 
     return samples[nburnin:, :], scale_samples[nburnin:]
 

From 4b77d8a8129096909d5adac389be15a428533178 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Thu, 2 Jul 2020 12:54:50 -0400
Subject: [PATCH 057/187] added hiv test: carved posterior interval estimates

---
 selectinf/randomized/posterior_inference.py  |   4 +-
 selectinf/randomized/tests/test_posterior.py | 182 ++++++++++++++++---
 2 files changed, 156 insertions(+), 30 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 355bab0e0..d8be08029 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -48,7 +48,7 @@ def __init__(self,
         offset = query.sampler.affine_con.offset
         logdens_linear = query.sampler.logdens_transform[0]
 
-        _, self.inverse_info, log_ref = query.selective_MLE(observed_target,
+        result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
                                                             cov_target,
                                                             cov_target_score)
             
@@ -69,7 +69,7 @@ def __init__(self,
         self.linear_part = linear_part
         self.offset = offset
 
-        self.initial_estimate = observed_target
+        self.initial_estimate = np.asarray(result['MLE'])
         self.dispersion = dispersion
         self.log_ref = log_ref
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 039c72a90..bba39845a 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -1,9 +1,11 @@
 import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from scipy.stats import norm as ndist
 
 from ...tests.instance import gaussian_instance
-from ..lasso import lasso, selected_targets
-from ..posterior_inference import (posterior,
-                                   langevin_sampler,
+from ..lasso import lasso, selected_targets, split_lasso
+from ..posterior_inference import (langevin_sampler,
                                    gibbs_sampler)
 
 def test_Langevin(n=500,
@@ -13,8 +15,8 @@ def test_Langevin(n=500,
                   sigma=3.,
                   rho=0.4,
                   randomizer_scale=1.,
-                  nsample=100,
-                  nburnin=50):
+                  nsample=1500,
+                  nburnin=100):
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
@@ -62,9 +64,9 @@ def test_Langevin(n=500,
                                nsample=nsample,
                                nburnin=nburnin)
 
-    gibbs_samples = gibbs_sampler(posterior_inf,
-                                  nsample=nsample,
-                                  nburnin=nburnin)
+    # gibbs_samples = gibbs_sampler(posterior_inf,
+    #                               nsample=nsample,
+    #                               nburnin=nburnin)
 
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
@@ -109,11 +111,12 @@ def test_instance(nsample=100, nburnin=50):
 
     gibbs_samples = gibbs_sampler(posterior_inf,
                                   nsample=nsample,
-                                  nburnin=nburnin)
+                                  nburnin=nburnin)[0]
 
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
 
+
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
     coverage = (lci < beta_target) * (uci > beta_target)
     length = uci - lci
@@ -225,30 +228,153 @@ def prior(target_parameter):
     return samples
     
 
-def main(ndraw=10):
+def test_hiv_data(nsample=1000,
+                  nburnin=100,
+                  alpha =0.10,
+                  split_proportion=0.50,
+                  seedn = 1):
+
+    np.random.seed(seedn)
+
+    level = 1 - alpha / 2.
+    Z_quantile = ndist.ppf(level)
+
+    NRTI = pd.read_csv("http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt",
+                       na_values="NA", sep='\t')
+
+    NRTI_specific = []
+    NRTI_muts = []
+
+    for i in range(1, 241):
+        d = NRTI['P%d' % i]
+        for mut in np.unique(d):
+            if mut not in ['-', '.'] and len(mut) == 1:
+                test = np.equal(d, mut)
+                if test.sum() > 10:
+                    NRTI_specific.append(np.array(np.equal(d, mut)))
+                    NRTI_muts.append("P%d%s" % (i, mut))
+
+    NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
+
+    X_NRTI = np.array(NRTI_specific, np.float)
+    Y = NRTI['3TC']  # shorthand
+    keep = ~np.isnan(Y).astype(np.bool)
+    X_NRTI = X_NRTI[np.nonzero(keep)]
+
+    Y = Y[keep]
+    Y = np.array(np.log(Y), np.float)
+    Y -= Y.mean()
+    X_NRTI -= X_NRTI.mean(0)[None, :]
+    X_NRTI /= X_NRTI.std(0)[None, :]
+    X = X_NRTI
+    n, p = X.shape
+    X /= np.sqrt(n)
+
 
-    coverage_ = 0.
-    length_ = 0.
-    for n in range(ndraw):
-        # cov, len = test_Langevin(n=500,
-        #                          p=200,
-        #                          signal_fac=1.5,
-        #                          s=5,
-        #                          sigma=2.,
-        #                          rho=0.2,
-        #                          randomizer_scale=1.
-        #                          )
+    ols_fit = sm.OLS(Y, X).fit()
+    _sigma = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1)
 
-        cov, len = test_instance(nsample=2000,
-                                 nburnin=100)
+    const = split_lasso.gaussian
 
-        coverage_ += cov
-        length_ += len
+    dispersion = _sigma ** 2
 
-        print("coverage so far ", coverage_ / (n + 1.))
-        print("lengths so far ", length_ / (n + 1.))
-        print("iteration completed ", n + 1)
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma
 
+    conv = const(X,
+                 Y,
+                 W,
+                 proportion=split_proportion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    (observed_target,
+     cov_target,
+     cov_target_score,
+     alternatives) = selected_targets(conv.loglike,
+                                      conv._W,
+                                      nonzero,
+                                      dispersion=dispersion)
+
+
+    posterior_inf = conv.posterior(observed_target,
+                                   cov_target,
+                                   cov_target_score,
+                                   dispersion=dispersion)
+
+    samples_langevin = langevin_sampler(posterior_inf,
+                                        nsample=nsample,
+                                        nburnin=nburnin,
+                                        step=1.)
+
+    lci_langevin = np.percentile(samples_langevin, int((1-level)*100), axis=0)
+    uci_langevin = np.percentile(samples_langevin, int((level)*100), axis=0)
+
+    samples_gibbs = gibbs_sampler(posterior_inf,
+                                  nsample=nsample,
+                                  nburnin=nburnin)[0]
+
+    lci_gibbs = np.percentile(samples_gibbs, int((1 - level) * 100), axis=0)
+    uci_gibbs = np.percentile(samples_gibbs, int((level) * 100), axis=0)
+
+    naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
+    naive_cov = _sigma * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
+    naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
+                                 naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
+
+    X_split = X[~conv._selection_idx, :]
+    Y_split = Y[~conv._selection_idx]
+    split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
+    split_cov = _sigma * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
+    split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
+                                 split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
+
+    print("lengths: adjusted intervals Langevin, Gibbs, MLE ", np.mean(uci_langevin - lci_langevin), np.mean(uci_gibbs - lci_gibbs),
+          np.mean((2* Z_quantile )* np.sqrt(np.diag(posterior_inf.inverse_info))))
+
+    print("lengths: naive intervals ", np.mean(naive_intervals[:,1]-naive_intervals[:,0]))
+
+    print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
+
+    output = pd.DataFrame({'Langevin_lower_confidence': lci_langevin,
+                           'Langevin_upper_confidence': uci_langevin,
+                           'Gibbs_lower_confidence': lci_gibbs,
+                           'Gibbs_upper_confidence': uci_gibbs,
+                           'Split_lower_confidence': split_intervals[:,0],
+                           'Split_upper_confidence': split_intervals[:, 1],
+                           'Naive_lower_confidence': naive_intervals[:, 0],
+                           'Naive_upper_confidence': naive_intervals[:, 1]
+                           })
+
+    return output
+
+# def main(ndraw=10):
+#
+#     coverage_ = 0.
+#     length_ = 0.
+#     for n in range(ndraw):
+#         cov, len = test_Langevin(n=500,
+#                                  p=200,
+#                                  signal_fac=1.,
+#                                  s=5,
+#                                  sigma=3.,
+#                                  rho=0.2,
+#                                  randomizer_scale=1.
+#                                  )
+#
+#         # cov, len = test_instance(nsample=2000,
+#         #                          nburnin=100)
+#
+#         coverage_ += cov
+#         length_ += len
+#
+#         print("coverage so far ", coverage_ / (n + 1.))
+#         print("lengths so far ", length_ / (n + 1.))
+#         print("iteration completed ", n + 1)
+
+
+def main():
+    test_hiv_data(split_proportion=0.50)
 
 if __name__ == "__main__":
     main()

From 8f4e6128af512fa25cedca2ddaa18e311fddcd91 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 12:13:49 -0700
Subject: [PATCH 058/187] changing order of output of log posterior

---
 doc/adjusted_MLE/tests/comparison_metrics.py | 935 -------------------
 selectinf/randomized/posterior_inference.py  |  40 +-
 selectinf/randomized/query.py                |  59 +-
 3 files changed, 56 insertions(+), 978 deletions(-)
 delete mode 100644 doc/adjusted_MLE/tests/comparison_metrics.py

diff --git a/doc/adjusted_MLE/tests/comparison_metrics.py b/doc/adjusted_MLE/tests/comparison_metrics.py
deleted file mode 100644
index 15a003d0e..000000000
--- a/doc/adjusted_MLE/tests/comparison_metrics.py
+++ /dev/null
@@ -1,935 +0,0 @@
-<<<<<<< HEAD:doc/adjusted_MLE/tests/comparison_metrics.py
-from __future__ import division, print_function
-import numpy as np, sys, time
-from scipy.stats import norm as ndist
-=======
-import numpy as np, os, itertools
-import pandas as pd
-
-import rpy2.robjects as rpy
-from rpy2.robjects import numpy2ri
-rpy.numpy2ri.activate()
-from scipy.stats import norm as ndist
-
-from ..lasso import lasso, full_targets, selected_targets, debiased_targets
-from ...algorithms.lasso import lasso as lasso_full
-
-def sim_xy(n, p, nval, rho=0, s=5, beta_type=2, snr=1):
-
-    rpy.r('''
-            source('~/best-subset/bestsubset/R/sim.R')
-            sim_xy = sim.xy
-            ''')
->>>>>>> commit changes to test_mle:selectinf/randomized/tests/test_cv_mle.py
-
-from rpy2 import robjects
-import rpy2.robjects.numpy2ri
-
-from ...randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
-from ...algorithms.lasso import ROSI
-from ...tests.instance import gaussian_instance
-
-def BHfilter(pval, q=0.2):
-    pval = np.asarray(pval)
-    pval_sort = np.sort(pval)
-    comparison = q * np.arange(1, pval.shape[0] + 1.) / pval.shape[0]
-    passing = pval_sort < comparison
-    if passing.sum():
-        thresh = comparison[np.nonzero(passing)[0].max()]
-        return np.nonzero(pval <= thresh)[0]
-    return []
-
-def sim_xy(n, 
-           p, 
-           nval, 
-           rho=0, 
-           s=5, 
-           beta_type=2, 
-           snr=1):
-    try:
-        rpy2.robjects.numpy2ri.activate()
-        robjects.r('''
-        #library(bestsubset)
-        source('~/best-subset/bestsubset/R/sim.R')
-        sim_xy = sim.xy
-        ''')
-
-        r_simulate = robjects.globalenv['sim_xy']
-        sim = r_simulate(n, p, nval, rho, s, beta_type, snr)
-        X = np.array(sim.rx2('x'))
-        y = np.array(sim.rx2('y'))
-        X_val = np.array(sim.rx2('xval'))
-        y_val = np.array(sim.rx2('yval'))
-        Sigma = np.array(sim.rx2('Sigma'))
-        beta = np.array(sim.rx2('beta'))
-        sigma = np.array(sim.rx2('sigma'))
-        rpy2.robjects.numpy2ri.deactivate()
-        return X, y, X_val, y_val, Sigma, beta, sigma
-    except:
-        X, y, beta, _, sigma, Sigma = gaussian_instance(n=n,
-                                                        p=p,
-                                                        s=s,
-                                                        signal=snr,
-                                                        equicorrelated=False,
-                                                        rho=rho)
-        X_val = gaussian_instance(n=n,
-                                  p=p,
-                                  s=s,
-                                  signal=snr,
-                                  equicorrelated=False,
-                                  rho=rho)[0]
-        y_val = X_val.dot(beta) + sigma * np.random.standard_normal(X_val.shape[0])
-        return X, y, X_val, y_val, Sigma, beta, sigma
-
-def selInf_R(X, y, beta, lam, sigma, Type, alpha=0.1):
-    robjects.r('''
-               library("selectiveInference")
-               selInf = function(X, y, beta, lam, sigma, Type, alpha= 0.1){
-               y = as.matrix(y)
-               X = as.matrix(X)
-               beta = as.matrix(beta)
-               lam = as.matrix(lam)[1,1]
-               sigma = as.matrix(sigma)[1,1]
-               Type = as.matrix(Type)[1,1]
-               if(Type == 1){
-                   type = "full"} else{
-                   type = "partial"}
-               inf = fixedLassoInf(x = X, y = y, beta = beta, lambda=lam, family = "gaussian",
-                                   intercept=FALSE, sigma=sigma, alpha=alpha, type=type)
-               return(list(ci = inf$ci, pvalue = inf$pv))}
-               ''')
-
-    inf_R = robjects.globalenv['selInf']
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-    r_beta = robjects.r.matrix(beta, nrow=p, ncol=1)
-    r_lam = robjects.r.matrix(lam, nrow=1, ncol=1)
-    r_sigma = robjects.r.matrix(sigma, nrow=1, ncol=1)
-    r_Type = robjects.r.matrix(Type, nrow=1, ncol=1)
-    output = inf_R(r_X, r_y, r_beta, r_lam, r_sigma, r_Type)
-    ci = np.array(output.rx2('ci'))
-    pvalue = np.array(output.rx2('pvalue'))
-    return ci, pvalue
-
-
-def glmnet_lasso(X, y, lambda_val):
-    robjects.r('''
-                library(glmnet)
-                glmnet_LASSO = function(X,y, lambda){
-                y = as.matrix(y)
-                X = as.matrix(X)
-                lam = as.matrix(lambda)[1,1]
-                n = nrow(X)
-                
-                fit = glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
-                estimate = coef(fit, s=lam, exact=TRUE, x=X, y=y)[-1]
-                fit.cv = cv.glmnet(X, y, standardize=TRUE, intercept=FALSE, thresh=1.e-10)
-                estimate.1se = coef(fit.cv, s='lambda.1se', exact=TRUE, x=X, y=y)[-1]
-                estimate.min = coef(fit.cv, s='lambda.min', exact=TRUE, x=X, y=y)[-1]
-                return(list(estimate = estimate, estimate.1se = estimate.1se, 
-                            estimate.min = estimate.min, 
-                            lam.min = fit.cv$lambda.min, 
-                            lam.1se = fit.cv$lambda.1se))
-                }''')
-
-    lambda_R = robjects.globalenv['glmnet_LASSO']
-    n, p = X.shape
-    r_X = robjects.r.matrix(X, nrow=n, ncol=p)
-    r_y = robjects.r.matrix(y, nrow=n, ncol=1)
-    r_lam = robjects.r.matrix(lambda_val, nrow=1, ncol=1)
-
-    estimate = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate'))
-    estimate_1se = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.1se'))
-    estimate_min = np.array(lambda_R(r_X, r_y, r_lam).rx2('estimate.min'))
-    lam_min = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.min')))
-    lam_1se = np.asscalar(np.array(lambda_R(r_X, r_y, r_lam).rx2('lam.1se')))
-    return estimate, estimate_1se, estimate_min, lam_min, lam_1se
-
-def coverage(intervals, pval, target, truth):
-    pval_alt = (pval[truth != 0]) < 0.1
-    if pval_alt.sum() > 0:
-        avg_power = np.mean(pval_alt)
-    else:
-        avg_power = 0.
-    return np.mean((target > intervals[:, 0]) * (target < intervals[:, 1])), avg_power
-
-def relative_risk(est, truth, Sigma):
-    if (truth != 0).sum > 0:
-        return (est - truth).T.dot(Sigma).dot(est - truth) / truth.T.dot(Sigma).dot(truth)
-    else:
-        return (est - truth).T.dot(Sigma).dot(est - truth)
-
-
-def comparison_cvmetrics_selected(n=500, 
-                                  p=100, 
-                                  nval=500, 
-                                  rho=0.35, 
-                                  s=5, 
-                                  beta_type=1, 
-                                  snr=0.20,
-                                  randomizer_scale=np.sqrt(0.50), 
-                                  full_dispersion=True,
-                                  tuning_nonrand="lambda.min", 
-                                  tuning_rand="lambda.1se"):
-
-    (X, y, _, _, Sigma, beta, sigma) = sim_xy(n=n, 
-                                              p=p, 
-                                              nval=nval, 
-                                              rho=rho, 
-                                              s=s, 
-                                              beta_type=beta_type, 
-                                              snr=snr)
-
-    true_mean = X.dot(beta)
-    print("snr", snr)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
-    y = y - y.mean()
-    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
-
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        sigma_ = np.sqrt(dispersion)
-    else:
-        dispersion = None
-        sigma_ = np.std(y)
-    print("estimated and true sigma", sigma, sigma_)
-
-    lam_theory = (sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, 
-                    np.random.standard_normal((n, 2000)))).max(0)))
-    (glm_LASSO_theory, 
-     glm_LASSO_1se, 
-     glm_LASSO_min, 
-     lam_min, 
-     lam_1se) = glmnet_lasso(X, y, lam_theory / n)
-
-    if tuning_nonrand == "lambda.min":
-        lam_LASSO = lam_min
-        glm_LASSO = glm_LASSO_min
-    elif tuning_nonrand == "lambda.1se":
-        lam_LASSO = lam_1se
-        glm_LASSO = glm_LASSO_1se
-    else:
-        lam_LASSO = lam_theory/float(n)
-        glm_LASSO = glm_LASSO_theory
-    active_LASSO = (glm_LASSO != 0)
-    nactive_LASSO = active_LASSO.sum()
-    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for 
-                                    z in range(nactive_LASSO)], np.bool)
-
-    rel_LASSO = np.zeros(p)
-    Lee_nreport = 0
-    bias_Lee = 0.
-    bias_naive = 0.
-
-    if nactive_LASSO > 0:
-        post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-        rel_LASSO[active_LASSO] = post_LASSO_OLS
-        Lee_target = np.linalg.pinv(X[:, active_LASSO]).dot(X.dot(beta))
-        Lee_intervals, Lee_pval = selInf_R(X, 
-                                           y, 
-                                           glm_LASSO, 
-                                           n * lam_LASSO, 
-                                           sigma_, 
-                                           Type=0, 
-                                           alpha=0.1)
-
-        if (Lee_pval.shape[0] == Lee_target.shape[0]):
-
-            cov_Lee, selective_Lee_power = coverage(Lee_intervals, 
-                                                    Lee_pval, 
-                                                    Lee_target, 
-                                                    beta[active_LASSO])
-
-            inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0])
-            inf_entries = np.mean(inf_entries_bool)
-            if inf_entries == 1.:
-                length_Lee = 0.
-            else:
-                length_Lee = (np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])
-                                      [~inf_entries_bool]))
-            power_Lee = ((active_LASSO_bool) * (np.logical_or((0. < Lee_intervals[:, 0]), 
-                                                              (0. > Lee_intervals[:, 1])))) \
-                            .sum() / float((beta != 0).sum())
-            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
-            power_Lee_BH = ((Lee_discoveries * active_LASSO_bool).sum() / 
-                            float((beta != 0).sum()))
-            fdr_Lee_BH = ((Lee_discoveries * ~active_LASSO_bool).sum() / 
-                           float(max(Lee_discoveries.sum(), 1.)))
-            bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target)
-
-            naive_sd = sigma_ * np.sqrt(np.diag(
-                    (np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
-            naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
-                                         post_LASSO_OLS + 1.65 * naive_sd]).T
-            naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
-
-            cov_naive, selective_naive_power = coverage(naive_intervals, 
-                                                        naive_pval, 
-                                                        Lee_target, 
-                                                        beta[active_LASSO])
-
-            length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
-            power_naive = ((active_LASSO_bool) * (
-                np.logical_or((0. < naive_intervals[:, 0]), 
-                              (0. > naive_intervals[:, 1])))).sum() / float(
-                (beta != 0).sum())
-
-            naive_discoveries = BHfilter(naive_pval, q=0.1)
-
-            power_naive_BH = ((naive_discoveries * active_LASSO_bool).sum() / 
-                              float((beta != 0).sum()))
-            fdr_naive_BH = ((naive_discoveries * ~active_LASSO_bool).sum() / 
-                            float(max(naive_discoveries.sum(), 1.)))
-
-            bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
-
-            partial_Lasso_risk = (glm_LASSO[active_LASSO]-Lee_target).T.dot(
-                                  glm_LASSO[active_LASSO]-Lee_target)
-            partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(
-                                     post_LASSO_OLS - Lee_target)
-
-        else:
-            Lee_nreport = 1
-            (cov_Lee, 
-             length_Lee, 
-             inf_entries, 
-             power_Lee, 
-             power_Lee_BH, 
-             fdr_Lee_BH, 
-             selective_Lee_power) = [0., 0., 0., 0., 0., 0., 0.]
-
-            (cov_naive, 
-             length_naive, 
-             power_naive, 
-             power_naive_BH, 
-             fdr_naive_BH, 
-             selective_naive_power) = [0., 0., 0., 0., 0., 0.]
-
-            naive_discoveries = np.zeros(1)
-            Lee_discoveries = np.zeros(1)
-            partial_Lasso_risk,  partial_relLasso_risk = [0., 0.]
-
-    elif nactive_LASSO == 0:
-        Lee_nreport = 1
-        (cov_Lee, 
-         length_Lee, 
-         inf_entries, 
-         power_Lee, 
-         power_Lee_BH, 
-         fdr_Lee_BH, 
-         selective_Lee_power) = [0., 0., 0., 0., 0., 0., 0.]
-
-        (cov_naive, 
-         length_naive, 
-         power_naive, 
-         power_naive_BH, 
-         fdr_naive_BH, 
-         selective_naive_power) = [0., 0., 0., 0., 0., 0.]
-
-        naive_discoveries = np.zeros(1)
-        Lee_discoveries = np.zeros(1)
-        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    if tuning_rand == "lambda.min":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_min * np.ones(p),
-                                          randomizer_scale= np.sqrt(n) * 
-                                          randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.1se":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_1se * np.ones(p),
-                                          randomizer_scale= np.sqrt(n) * 
-                                          randomizer_scale * sigma_)
-    else:
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= lam_theory * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * 
-                                          randomizer_scale * sigma_)
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool)
-    sel_MLE = np.zeros(p)
-    ind_est = np.zeros(p)
-    randomized_lasso_est = np.zeros(p)
-    randomized_rel_lasso_est = np.zeros(p)
-    MLE_nreport = 0
-
-    sys.stderr.write("active variables selected by cv LASSO  " + str(nactive_LASSO) + "\n")
-    sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
-
-    if nonzero.sum() > 0:
-        target_randomized = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(randomized_lasso.loglike,
-                                          randomized_lasso._W,
-                                          nonzero,
-                                          dispersion=dispersion)
-
-        MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target,
-                                                                                                             cov_target,
-                                                                                                             cov_target_score,
-                                                                                                             alternatives)
-        sel_MLE[nonzero] = MLE_estimate
-        ind_est[nonzero] = ind_unbiased_estimator
-        randomized_lasso_est = randomized_lasso.initial_soln
-        randomized_rel_lasso_est = randomized_lasso._beta_full
-
-        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
-        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
-        power_MLE = ((active_rand_bool) * (
-            np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
-        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
-        bias_MLE = np.mean(MLE_estimate - target_randomized)
-
-        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
-        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized)
-        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized)
-        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized)
-    else:
-        MLE_nreport = 1
-        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.]
-        MLE_discoveries = np.zeros(1)
-        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
-
-    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
-                       relative_risk(ind_est, beta, Sigma),
-                       relative_risk(randomized_lasso_est, beta, Sigma),
-                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
-                       relative_risk(rel_LASSO, beta, Sigma),
-                       relative_risk(glm_LASSO, beta, Sigma)))
-
-    partial_risks = np.vstack((partial_MLE_risk,
-                               partial_ind_risk,
-                               partial_randLasso_risk,
-                               partial_relrandLasso_risk,
-                               partial_relLasso_risk,
-                               partial_Lasso_risk))
-
-    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power, power_naive, power_naive_BH, fdr_naive_BH,
-                           naive_discoveries.sum()))
-    Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power, power_Lee, power_Lee_BH, fdr_Lee_BH,
-                         Lee_discoveries.sum()))
-    Liu_inf = np.zeros((10, 1))
-    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, power_MLE, power_MLE_BH, fdr_MLE_BH,
-                         MLE_discoveries.sum()))
-    nreport = np.vstack((Lee_nreport, 0., MLE_nreport))
-    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
-
-
-def comparison_cvmetrics_full(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
-                              randomizer_scale=np.sqrt(0.25), full_dispersion=True,
-                              tuning_nonrand="lambda.min", tuning_rand="lambda.1se"):
-
-    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-    print("snr", snr)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
-    y = y - y.mean()
-    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
-
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        sigma_ = np.sqrt(dispersion)
-    else:
-        dispersion = None
-        sigma_ = np.std(y)
-    print("estimated and true sigma", sigma, sigma_)
-
-    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-    glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory/float(n))
-    if tuning_nonrand == "lambda.min":
-        lam_LASSO = lam_min
-        glm_LASSO = glm_LASSO_min
-    elif tuning_nonrand == "lambda.1se":
-        lam_LASSO = lam_1se
-        glm_LASSO = glm_LASSO_1se
-    else:
-        lam_LASSO = lam_theory/float(n)
-        glm_LASSO = glm_LASSO_theory
-
-    active_LASSO = (glm_LASSO != 0)
-    nactive_LASSO = active_LASSO.sum()
-    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)],
-                                   np.bool)
-
-    rel_LASSO = np.zeros(p)
-    Lee_nreport = 0
-    bias_Lee = 0.
-    bias_naive = 0.
-
-    if nactive_LASSO > 0:
-        rel_LASSO[active_LASSO] = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-        Lee_target = beta[active_LASSO]
-        Lee_intervals, Lee_pval = selInf_R(X, y, glm_LASSO, n * lam_LASSO, sigma_, Type=1, alpha=0.1)
-
-        if (Lee_pval.shape[0] == Lee_target.shape[0]):
-
-            cov_Lee, selective_Lee_power = coverage(Lee_intervals, Lee_pval, Lee_target, beta[active_LASSO])
-            inf_entries_bool = np.isinf(Lee_intervals[:, 1] - Lee_intervals[:, 0])
-            inf_entries = np.mean(inf_entries_bool)
-            if inf_entries == 1.:
-                length_Lee = 0.
-            else:
-                length_Lee = np.mean((Lee_intervals[:, 1] - Lee_intervals[:, 0])[~inf_entries_bool])
-            power_Lee = ((active_LASSO_bool) * (
-                np.logical_or((0. < Lee_intervals[:, 0]), (0. > Lee_intervals[:, 1])))).sum() / float((beta != 0).sum())
-            Lee_discoveries = BHfilter(Lee_pval, q=0.1)
-            power_Lee_BH = (Lee_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            fdr_Lee_BH = (Lee_discoveries * ~active_LASSO_bool).sum() / float(max(Lee_discoveries.sum(), 1.))
-            bias_Lee = np.mean(glm_LASSO[active_LASSO] - Lee_target)
-
-            post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-            naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
-            naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
-                                         post_LASSO_OLS + 1.65 * naive_sd]).T
-            naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
-            cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO])
-            length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
-            power_naive = ((active_LASSO_bool) * (
-                np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float(
-                (beta != 0).sum())
-            naive_discoveries = BHfilter(naive_pval, q=0.1)
-            power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-            fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.))
-            bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
-
-            partial_Lasso_risk = (glm_LASSO[active_LASSO] - Lee_target).T.dot(glm_LASSO[active_LASSO] - Lee_target)
-            partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target)
-        else:
-            Lee_nreport = 1
-            cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
-            cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power  = [0., 0., 0., 0., 0., 0.]
-            naive_discoveries = np.zeros(1)
-            Lee_discoveries = np.zeros(1)
-            partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    elif nactive_LASSO == 0:
-        Lee_nreport = 1
-        cov_Lee, length_Lee, inf_entries, power_Lee, power_Lee_BH, fdr_Lee_BH, selective_Lee_power = [0., 0., 0., 0., 0., 0., 0.]
-        cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
-        naive_discoveries = np.zeros(1)
-        Lee_discoveries = np.zeros(1)
-        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    lasso_Liu = ROSI.gaussian(X, y, n * lam_LASSO)
-    Lasso_soln_Liu = lasso_Liu.fit()
-    active_set_Liu = np.nonzero(Lasso_soln_Liu != 0)[0]
-    nactive_Liu = active_set_Liu.shape[0]
-    active_Liu_bool = np.asarray([(np.in1d(active_set_Liu[a], true_set).sum() > 0) for a in range(nactive_Liu)], np.bool)
-    Liu_nreport = 0
-
-    if nactive_Liu > 0:
-        Liu_target = beta[Lasso_soln_Liu != 0]
-        df = lasso_Liu.summary(level=0.90, compute_intervals=True, dispersion=dispersion)
-        Liu_lower, Liu_upper, Liu_pval = np.asarray(df['lower_confidence']), \
-                                         np.asarray(df['upper_confidence']), \
-                                         np.asarray(df['pval'])
-        Liu_intervals = np.vstack((Liu_lower, Liu_upper)).T
-        cov_Liu, selective_Liu_power = coverage(Liu_intervals, Liu_pval, Liu_target, beta[Lasso_soln_Liu != 0])
-        length_Liu = np.mean(Liu_intervals[:, 1] - Liu_intervals[:, 0])
-        power_Liu = ((active_Liu_bool) * (np.logical_or((0. < Liu_intervals[:, 0]),
-                                                        (0. > Liu_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        Liu_discoveries = BHfilter(Liu_pval, q=0.1)
-        power_Liu_BH = (Liu_discoveries * active_Liu_bool).sum() / float((beta != 0).sum())
-        fdr_Liu_BH = (Liu_discoveries * ~active_Liu_bool).sum() / float(max(Liu_discoveries.sum(), 1.))
-
-    else:
-        Liu_nreport = 1
-        cov_Liu, length_Liu, power_Liu, power_Liu_BH, fdr_Liu_BH, selective_Liu_power = [0., 0., 0., 0., 0., 0.]
-        Liu_discoveries = np.zeros(1)
-
-    if tuning_rand == "lambda.min":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= n * lam_min * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.1se":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= n * lam_1se * np.ones(p),
-                                          randomizer_scale= np.sqrt(n) * randomizer_scale * sigma_)
-    else:
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights= lam_theory * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())], np.bool)
-    sel_MLE = np.zeros(p)
-    ind_est = np.zeros(p)
-    randomized_lasso_est = np.zeros(p)
-    randomized_rel_lasso_est = np.zeros(p)
-    MLE_nreport = 0
-
-    if nonzero.sum() > 0:
-        target_randomized = beta[nonzero]
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = full_targets(randomized_lasso.loglike,
-                                      randomized_lasso._W,
-                                      nonzero,
-                                      dispersion=dispersion)
-        MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target,
-                                                                                                             cov_target,
-                                                                                                             cov_target_score,
-                                                                                                             alternatives)
-        sel_MLE[nonzero] = MLE_estimate
-        ind_est[nonzero] = ind_unbiased_estimator
-        randomized_lasso_est = randomized_lasso.initial_soln
-        randomized_rel_lasso_est = randomized_lasso._beta_full
-
-        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
-        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
-        power_MLE = ((active_rand_bool) * (np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
-        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
-        bias_MLE = np.mean(MLE_estimate - target_randomized)
-
-        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
-        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(ind_unbiased_estimator - target_randomized)
-        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(randomized_lasso_est[nonzero] - target_randomized)
-        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(randomized_rel_lasso_est[nonzero] - target_randomized)
-    else:
-        MLE_nreport = 1
-        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0., 0., 0.]
-        MLE_discoveries = np.zeros(1)
-        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
-
-    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
-                       relative_risk(ind_est, beta, Sigma),
-                       relative_risk(randomized_lasso_est, beta, Sigma),
-                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
-                       relative_risk(rel_LASSO, beta, Sigma),
-                       relative_risk(glm_LASSO, beta, Sigma)))
-
-    partial_risks = np.vstack((partial_MLE_risk,
-                               partial_ind_risk,
-                               partial_randLasso_risk,
-                               partial_relrandLasso_risk,
-                               partial_relLasso_risk,
-                               partial_Lasso_risk))
-
-    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power,
-                           power_naive, power_naive_BH, fdr_naive_BH, naive_discoveries.sum()))
-    Lee_inf = np.vstack((cov_Lee, length_Lee, inf_entries, nactive_LASSO, bias_Lee, selective_Lee_power,
-                         power_Lee, power_Lee_BH, fdr_Lee_BH, Lee_discoveries.sum()))
-    Liu_inf = np.vstack((cov_Liu, length_Liu, 0., nactive_Liu, bias_Lee, selective_Liu_power,
-                         power_Liu, power_Liu_BH, fdr_Liu_BH, Liu_discoveries.sum()))
-    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power,
-                         power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum()))
-    nreport = np.vstack((Lee_nreport, Liu_nreport, MLE_nreport))
-    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
-
-def comparison_cvmetrics_debiased(n=100, p=150, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20,
-                                  randomizer_scale=np.sqrt(0.25), full_dispersion=False,
-                                  tuning_nonrand="lambda.min", tuning_rand="lambda.1se"):
-
-    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-    print("snr", snr)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
-    y = y - y.mean()
-    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
-
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        sigma_ = np.sqrt(dispersion)
-    else:
-        dispersion = None
-        _sigma_ = np.std(y)
-
-    lam_theory = _sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-    glm_LASSO_theory, glm_LASSO_1se, glm_LASSO_min, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory / float(n))
-
-    if full_dispersion is False:
-        dispersion = None
-        active_min = (glm_LASSO_min != 0)
-        if active_min.sum() > 0:
-            sigma_ = np.sqrt(np.linalg.norm(y - X[:, active_min].dot(np.linalg.pinv(X[:, active_min]).dot(y))) ** 2
-                             / (n - active_min.sum()))
-        else:
-            sigma_ = _sigma_
-    print("estimated and true sigma", sigma, _sigma_, sigma_)
-
-    if tuning_nonrand == "lambda.min":
-        lam_LASSO = lam_min
-        glm_LASSO = glm_LASSO_min
-    elif tuning_nonrand == "lambda.1se":
-        lam_LASSO = lam_1se
-        glm_LASSO = glm_LASSO_1se
-    else:
-        lam_LASSO = lam_theory / float(n)
-        glm_LASSO = glm_LASSO_theory
-
-    active_LASSO = (glm_LASSO != 0)
-    nactive_LASSO = active_LASSO.sum()
-    active_set_LASSO = np.asarray([r for r in range(p) if active_LASSO[r]])
-    active_LASSO_bool = np.asarray([(np.in1d(active_set_LASSO[z], true_set).sum() > 0) for z in range(nactive_LASSO)],
-                                   np.bool)
-
-    rel_LASSO = np.zeros(p)
-    Lee_nreport = 0.
-    bias_naive = 0.
-
-    if nactive_LASSO > 0:
-        rel_LASSO[active_LASSO] = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-        Lee_target = beta[active_LASSO]
-        post_LASSO_OLS = np.linalg.pinv(X[:, active_LASSO]).dot(y)
-        naive_sd = sigma_ * np.sqrt(np.diag((np.linalg.inv(X[:, active_LASSO].T.dot(X[:, active_LASSO])))))
-        naive_intervals = np.vstack([post_LASSO_OLS - 1.65 * naive_sd,
-                                     post_LASSO_OLS + 1.65 * naive_sd]).T
-        naive_pval = 2 * ndist.cdf(np.abs(post_LASSO_OLS) / naive_sd)
-        cov_naive, selective_naive_power = coverage(naive_intervals, naive_pval, Lee_target, beta[active_LASSO])
-        length_naive = np.mean(naive_intervals[:, 1] - naive_intervals[:, 0])
-        power_naive = ((active_LASSO_bool) * (
-            np.logical_or((0. < naive_intervals[:, 0]), (0. > naive_intervals[:, 1])))).sum() / float(
-            (beta != 0).sum())
-        naive_discoveries = BHfilter(naive_pval, q=0.1)
-        power_naive_BH = (naive_discoveries * active_LASSO_bool).sum() / float((beta != 0).sum())
-        fdr_naive_BH = (naive_discoveries * ~active_LASSO_bool).sum() / float(max(naive_discoveries.sum(), 1.))
-        bias_naive = np.mean(rel_LASSO[active_LASSO] - Lee_target)
-
-        partial_Lasso_risk = (glm_LASSO[active_LASSO] - Lee_target).T.dot(glm_LASSO[active_LASSO] - Lee_target)
-        partial_relLasso_risk = (post_LASSO_OLS - Lee_target).T.dot(post_LASSO_OLS - Lee_target)
-
-    elif nactive_LASSO == 0:
-        Lee_nreport += 1
-        cov_naive, length_naive, power_naive, power_naive_BH, fdr_naive_BH, selective_naive_power = [0., 0., 0., 0., 0., 0.]
-        naive_discoveries = np.zeros(1)
-        partial_Lasso_risk, partial_relLasso_risk = [0., 0.]
-
-    if tuning_rand == "lambda.min":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_min * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.1se":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_1se * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    else:
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=lam_theory * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())],
-                                  np.bool)
-    sel_MLE = np.zeros(p)
-    ind_est = np.zeros(p)
-    randomized_lasso_est = np.zeros(p)
-    randomized_rel_lasso_est = np.zeros(p)
-    MLE_nreport = 0
-
-    if nonzero.sum() > 0:
-        target_randomized = beta[nonzero]
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = debiased_targets(randomized_lasso.loglike,
-                                          randomized_lasso._W,
-                                          nonzero,
-                                          penalty=randomized_lasso.penalty,
-                                          dispersion=dispersion)
-        MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target,
-                                                                                                             cov_target,
-                                                                                                             cov_target_score,
-                                                                                                             alternatives)
-        sel_MLE[nonzero] = MLE_estimate
-        ind_est[nonzero] = ind_unbiased_estimator
-        randomized_lasso_est = randomized_lasso.initial_soln
-        randomized_rel_lasso_est = randomized_lasso._beta_full
-
-        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
-        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
-        power_MLE = ((active_rand_bool) * (
-            np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
-        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
-        bias_MLE = np.mean(MLE_estimate - target_randomized)
-
-        partial_MLE_risk = (MLE_estimate - target_randomized).T.dot(MLE_estimate - target_randomized)
-        partial_ind_risk = (ind_unbiased_estimator - target_randomized).T.dot(
-            ind_unbiased_estimator - target_randomized)
-        partial_randLasso_risk = (randomized_lasso_est[nonzero] - target_randomized).T.dot(
-            randomized_lasso_est[nonzero] - target_randomized)
-        partial_relrandLasso_risk = (randomized_rel_lasso_est[nonzero] - target_randomized).T.dot(
-            randomized_rel_lasso_est[nonzero] - target_randomized)
-    else:
-        MLE_nreport = 1
-        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power = [0., 0., 0., 0., 0.,
-                                                                                                   0., 0.]
-        MLE_discoveries = np.zeros(1)
-        partial_MLE_risk, partial_ind_risk, partial_randLasso_risk, partial_relrandLasso_risk = [0., 0., 0., 0.]
-
-    risks = np.vstack((relative_risk(sel_MLE, beta, Sigma),
-                       relative_risk(ind_est, beta, Sigma),
-                       relative_risk(randomized_lasso_est, beta, Sigma),
-                       relative_risk(randomized_rel_lasso_est, beta, Sigma),
-                       relative_risk(rel_LASSO, beta, Sigma),
-                       relative_risk(glm_LASSO, beta, Sigma)))
-
-    partial_risks = np.vstack((partial_MLE_risk,
-                               partial_ind_risk,
-                               partial_randLasso_risk,
-                               partial_relrandLasso_risk,
-                               partial_relLasso_risk,
-                               partial_Lasso_risk))
-
-    naive_inf = np.vstack((cov_naive, length_naive, 0., nactive_LASSO, bias_naive, selective_naive_power,
-                           power_naive, power_naive_BH, fdr_naive_BH, naive_discoveries.sum()))
-    Lee_inf = np.zeros((10,1))
-    Liu_inf = np.zeros((10,1))
-    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power,
-                         power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum()))
-    nreport = np.vstack((Lee_nreport, 0., MLE_nreport))
-    return np.vstack((risks, naive_inf, Lee_inf, Liu_inf, MLE_inf, partial_risks, nreport))
-
-
-def compare_sampler_MLE(n=500, p=100, nval=500, rho=0.35, s=5, beta_type=1, snr=0.20, target= "selected",
-                        randomizer_scale=np.sqrt(0.50), full_dispersion=True, tuning_rand="lambda.1se"):
-
-    X, y, _, _, Sigma, beta, sigma = sim_xy(n=n, p=p, nval=nval, rho=rho, s=s, beta_type=beta_type, snr=snr)
-    print("snr", snr)
-    X -= X.mean(0)[None, :]
-    X /= (X.std(0)[None, :] * np.sqrt(n / (n - 1.)))
-    y = y - y.mean()
-    true_set = np.asarray([u for u in range(p) if beta[u] != 0])
-
-    if full_dispersion:
-        dispersion = np.linalg.norm(y - X.dot(np.linalg.pinv(X).dot(y))) ** 2 / (n - p)
-        sigma_ = np.sqrt(dispersion)
-    else:
-        dispersion = None
-        sigma_ = np.std(y)
-    print("estimated and true sigma", sigma, sigma_)
-
-    lam_theory = sigma_ * 1. * np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0))
-    _, _, _, lam_min, lam_1se = glmnet_lasso(X, y, lam_theory / float(n))
-
-    if tuning_rand == "lambda.min":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_min * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.1se":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=n * lam_1se * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-    elif tuning_rand == "lambda.theory":
-        randomized_lasso = lasso.gaussian(X,
-                                          y,
-                                          feature_weights=lam_theory * np.ones(p),
-                                          randomizer_scale=np.sqrt(n) * randomizer_scale * sigma_)
-
-    else:
-        raise ValueError('lambda choice not specified correctly')
-
-    signs = randomized_lasso.fit()
-    nonzero = signs != 0
-    sys.stderr.write("active variables selected by randomized LASSO " + str(nonzero.sum()) + "\n" + "\n")
-    active_set_rand = np.asarray([t for t in range(p) if nonzero[t]])
-    active_rand_bool = np.asarray([(np.in1d(active_set_rand[x], true_set).sum() > 0) for x in range(nonzero.sum())],
-                                  np.bool)
-    nreport = 0.
-
-    if nonzero.sum() > 0:
-        if target == "full":
-            target_randomized = beta[nonzero]
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = full_targets(randomized_lasso.loglike,
-                                          randomized_lasso._W,
-                                          nonzero,
-                                          dispersion=dispersion)
-        elif target == "selected":
-            target_randomized = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(randomized_lasso.loglike,
-                                              randomized_lasso._W,
-                                              nonzero,
-                                              dispersion=dispersion)
-        else:
-            raise ValueError('not a valid specification of target')
-        toc = time.time()
-        MLE_estimate, _, _, MLE_pval, MLE_intervals, ind_unbiased_estimator = randomized_lasso.selective_MLE(observed_target,
-                                                                                                             cov_target,
-                                                                                                             cov_target_score,
-                                                                                                             alternatives)
-        tic = time.time()
-        time_MLE = tic - toc
-
-        cov_MLE, selective_MLE_power = coverage(MLE_intervals, MLE_pval, target_randomized, beta[nonzero])
-        length_MLE = np.mean(MLE_intervals[:, 1] - MLE_intervals[:, 0])
-        power_MLE = ((active_rand_bool) * (
-            np.logical_or((0. < MLE_intervals[:, 0]), (0. > MLE_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        MLE_discoveries = BHfilter(MLE_pval, q=0.1)
-        power_MLE_BH = (MLE_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_MLE_BH = (MLE_discoveries * ~active_rand_bool).sum() / float(max(MLE_discoveries.sum(), 1.))
-        bias_MLE = np.mean(MLE_estimate - target_randomized)
-
-        toc = time.time()
-        _, sampler_pval, sampler_intervals = randomized_lasso.summary(observed_target,
-                                                                      cov_target,
-                                                                      cov_target_score,
-                                                                      alternatives,
-                                                                      level=0.9, compute_intervals=True, ndraw=100000)
-        tic = time.time()
-        time_sampler = tic - toc
-
-        cov_sampler, selective_sampler_power = coverage(sampler_intervals, sampler_pval, target_randomized, beta[nonzero])
-        length_sampler = np.mean(sampler_intervals[:, 1] - sampler_intervals[:, 0])
-        power_sampler = ((active_rand_bool) * (np.logical_or((0. < sampler_intervals[:, 0]),
-                                                             (0. > sampler_intervals[:, 1])))).sum() / float((beta != 0).sum())
-        sampler_discoveries = BHfilter(sampler_pval, q=0.1)
-        power_sampler_BH = (sampler_discoveries * active_rand_bool).sum() / float((beta != 0).sum())
-        fdr_sampler_BH = (sampler_discoveries * ~active_rand_bool).sum() / float(max(sampler_discoveries.sum(), 1.))
-        bias_randLASSO = np.mean(randomized_lasso.initial_soln[nonzero] - target_randomized)
-
-    else:
-        nreport += 1
-        cov_MLE, length_MLE, power_MLE, power_MLE_BH, fdr_MLE_BH, bias_MLE, selective_MLE_power, time_MLE = [0., 0., 0., 0., 0., 0., 0., 0.]
-        cov_sampler, length_sampler, power_sampler, power_sampler_BH, fdr_sampler_BH, bias_randLASSO, selective_sampler_power, time_sampler = [0., 0., 0., 0., 0., 0., 0., 0.]
-        MLE_discoveries = np.zeros(1)
-        sampler_discoveries = np.zeros(1)
-
-    MLE_inf = np.vstack((cov_MLE, length_MLE, 0., nonzero.sum(), bias_MLE, selective_MLE_power, time_MLE,
-                         power_MLE, power_MLE_BH, fdr_MLE_BH, MLE_discoveries.sum()))
-
-    sampler_inf = np.vstack((cov_sampler, length_sampler, 0., nonzero.sum(), bias_randLASSO, selective_sampler_power, time_sampler,
-                             power_sampler, power_sampler_BH, fdr_sampler_BH, sampler_discoveries.sum()))
-
-    return np.vstack((MLE_inf, sampler_inf, nreport))
-
-
-
-
-
-
-
-
-
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index d8be08029..1acb23281 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -49,8 +49,8 @@ def __init__(self,
         logdens_linear = query.sampler.logdens_transform[0]
 
         result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
-                                                            cov_target,
-                                                            cov_target_score)
+                                                                 cov_target,
+                                                                 cov_target_score)
             
         ### Note for an informative prior we might want to change this...
         
@@ -97,7 +97,7 @@ def log_posterior(self,
 
         sigmasq = sigma**2
         mean_marginal = self.linear_coef.dot(target_parameter) + self.offset_coef
-        prec_marginal = np.linalg.inv(self.cov_marginal)
+        prec_marginal = self.prec_marginal
         conjugate_marginal = prec_marginal.dot(mean_marginal)
 
         useC = True
@@ -115,8 +115,8 @@ def log_posterior(self,
 
         log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal)/2.
 
-        log_lik = -((self.observed_target - target_parameter).T.dot(self.prec_target).dot(self.observed_target - target_parameter)) / 2.\
-                  - log_normalizer
+        log_lik = -(((self.observed_target - target_parameter).T.dot(self.prec_target).dot(self.observed_target - target_parameter)) / 2.
+                  - log_normalizer)
 
         grad_lik = (self.prec_target.dot(self.observed_target) -
                     self.prec_target.dot(target_parameter) \
@@ -124,9 +124,8 @@ def log_posterior(self,
 
         grad_prior, log_prior = self.prior(target_parameter)
 
-        return (self.dispersion * grad_lik/sigmasq + grad_prior,
-                self.dispersion * log_lik/sigmasq + log_prior -
-                (self.dispersion* self.log_ref/sigmasq))
+        return (self.dispersion * (log_lik - self.log_ref) / sigmasq + log_prior,
+                self.dispersion * grad_lik/sigmasq + grad_prior)
 
     ### Private method
 
@@ -140,21 +139,22 @@ def _set_marginal_parameters(self):
         target_linear = -self.logdens_linear.dot(self.cov_target_score.T.dot(self.prec_target))
 
         implied_precision = np.zeros((self.ntarget + self.nopt, self.ntarget + self.nopt))
-        implied_precision[:self.ntarget, :self.ntarget] = (self.prec_target +
+        implied_precision[:self.ntarget][:,:self.ntarget] = (self.prec_target +
                                                            target_linear.T.dot(self.cond_precision.dot(target_linear)))
-        implied_precision[:self.ntarget, self.ntarget:] = -target_linear.T.dot(self.cond_precision)
-        implied_precision[self.ntarget:, :self.ntarget] = (-target_linear.T.dot(self.cond_precision)).T
-        implied_precision[self.ntarget:, self.ntarget:] = self.cond_precision
+        implied_precision[:self.ntarget][:,self.ntarget:] = -target_linear.T.dot(self.cond_precision)
+        implied_precision[self.ntarget:][:,:self.ntarget] = (-target_linear.T.dot(self.cond_precision)).T
+        implied_precision[self.ntarget:][:,self.ntarget:] = self.cond_precision
 
         implied_cov = np.linalg.inv(implied_precision)
-        self.linear_coef = implied_cov[self.ntarget:, :self.ntarget].dot(self.prec_target)
+        self.linear_coef = implied_cov[self.ntarget:][:,:self.ntarget].dot(self.prec_target)
 
         target_offset = self.cond_mean - target_linear.dot(self.observed_target)
-        M = implied_cov[self.ntarget:, self.ntarget:].dot(self.cond_precision.dot(target_offset))
+        M = implied_cov[self.ntarget:][:,self.ntarget:].dot(self.cond_precision.dot(target_offset))
         N = -target_linear.T.dot(self.cond_precision).dot(target_offset)
-        self.offset_coef = implied_cov[self.ntarget:, :self.ntarget].dot(N) + M
+        self.offset_coef = implied_cov[self.ntarget:][:,:self.ntarget].dot(N) + M
 
-        self.cov_marginal = implied_cov[self.ntarget:, self.ntarget:]
+        self.cov_marginal = implied_cov[self.ntarget:][:,self.ntarget:]
+        self.prec_marginal = np.linalg.inv(self.cov_marginal)
 
 ### sampling methods
 
@@ -214,7 +214,7 @@ def gibbs_sampler(selective_posterior,
         scale_update_sq = invgamma.rvs(a=(0.1 +
                                        selective_posterior.ntarget +
                                        selective_posterior.ntarget/2),
-                                       scale=0.1-((scale_update**2) * sampler.grad_posterior[1]),
+                                       scale=0.1-((scale_update**2)*sampler.posterior_[0]),
                                        size=1)
         scale_samples[i] = np.sqrt(scale_update_sq)
         sampler.scaling = np.sqrt(scale_update_sq)
@@ -252,11 +252,11 @@ def next(self):
 
     def __next__(self):
         while True:
-            self.grad_posterior = self.gradient_map(self.state, self.scaling)
-            candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.grad_posterior[0])
+            self.posterior_ = self.gradient_map(self.state, self.scaling)
+            candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_[1])
                         + np.sqrt(2.)* (self.proposal_sqrt.dot(self._noise.rvs(self._shape))) * self._sqrt_step)
 
-            if not np.all(np.isfinite(self.gradient_map(candidate)[0])):
+            if not np.all(np.isfinite(self.gradient_map(candidate, self.scaling)[1])):
                 self.stepsize *= 0.5
                 self._sqrt_step = np.sqrt(self.stepsize)
             else:
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 1185adebb..40f506773 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -128,11 +128,18 @@ def summary(self,
         Parameters
         ----------
 
-        target : one of ['selected', 'full']
+        observed_target : ndarray
+            Observed estimate of target.
+
+        target_cov : ndarray
+            Estimated covaraince of target.
 
-        features : np.bool
-            Binary encoding of which features to use in final
-            model and targets.
+        target_score_cov : ndarray
+            Estimated covariance of target and score of randomized query.
+
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
 
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
@@ -288,10 +295,12 @@ def posterior(self,
             print('Using dispersion parameter 1...')
             
         if prior is None:
+            Di = 1. / (200 * np.diag(target_cov))
             def prior(target_parameter):
-                grad_prior = -target_parameter / 100
-                log_prior = -np.linalg.norm(target_parameter)**2 /(2. * 100)
-                return grad_prior, log_prior
+                grad_prior = -target_parameter * Di
+                log_prior = -0.5 * np.sum(target_parameter**2 * Di)
+                stop
+                return log_prior, grad_prior
         
         return posterior(self,
                          observed_target,
@@ -305,24 +314,28 @@ def approximate_grid_inference(self,
                                    observed_target,
                                    target_cov,
                                    target_score_cov,
-                                   grid = None,
-                                   dispersion=None,
+                                   grid=None,
+                                   alternatives=None,
                                    solve_args={'tol': 1.e-12}):
 
-        if dispersion is None:
-            dispersion = 1
-            print('Using dispersion parameter 1...')
-
-        if grid is None:
-            grid = np.linspace(- 20., 20., num=401)
-
-        return approximate_grid_inference(self,
-                                          observed_target,
-                                          target_cov,
-                                          target_score_cov,
-                                          grid,
-                                          dispersion,
-                                          solve_args=solve_args)
+        # result, inverse_info = self.selective_MLE(observed_target,
+        #                                           target_cov,
+        #                                           target_score_cov)[:2]
+
+        # if dispersion is None:
+        #     dispersion = 1
+        #     print('Using dispersion parameter 1...')
+
+        G = approximate_grid_inference(self,
+                                       observed_target,
+                                       target_cov,
+                                       target_score_cov,
+                                       #inverse_info,
+                                       #result['MLE'],
+                                       #dispersion,
+                                       grid=grid,
+                                       solve_args=solve_args)
+        return G.summary(alternatives=alternatives)
 
 
 class gaussian_query(query):

From 56f9ba6e3562eefc9cb38201512fff2d00b3ef5a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 12:16:42 -0700
Subject: [PATCH 059/187] BF: changing prior as well

---
 selectinf/randomized/query.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 40f506773..0b24ecc45 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -280,7 +280,7 @@ def posterior(self,
         prior : callable
             A callable object that takes a single argument
             `parameter` of the same shape as `observed_target`
-            and returns (gradient of log prior, value of log prior)
+            and returns (value of log prior, gradient of log prior)
 
         dispersion : float, optional
             Dispersion parameter for log-likelihood.
@@ -299,7 +299,6 @@ def posterior(self,
             def prior(target_parameter):
                 grad_prior = -target_parameter * Di
                 log_prior = -0.5 * np.sum(target_parameter**2 * Di)
-                stop
                 return log_prior, grad_prior
         
         return posterior(self,

From ae211d7a91591e2eebd047a0b822885c8c2cf3eb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 12:22:53 -0700
Subject: [PATCH 060/187] BF: two more priors needed changing

---
 selectinf/randomized/tests/test_posterior.py | 134 +++++++------------
 1 file changed, 49 insertions(+), 85 deletions(-)

diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index bba39845a..6e79e44e6 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -3,7 +3,7 @@
 import statsmodels.api as sm
 from scipy.stats import norm as ndist
 
-from ...tests.instance import gaussian_instance
+from ...tests.instance import gaussian_instance, HIV_NRTI
 from ..lasso import lasso, selected_targets, split_lasso
 from ..posterior_inference import (langevin_sampler,
                                    gibbs_sampler)
@@ -153,7 +153,7 @@ def test_flexible_prior1(nsample=100, nburnin=50):
     def prior(target_parameter):
         grad_prior = -target_parameter / 100
         log_prior = -np.linalg.norm(target_parameter)**2 /(2. * 100)
-        return grad_prior, log_prior
+        return log_prior, grad_prior
 
     seed_state = np.random.get_state()
     np.random.set_state(seed_state)
@@ -212,7 +212,7 @@ def test_flexible_prior2(nsample=1000, nburnin=50):
     def prior(target_parameter):
         grad_prior = -target_parameter / prior_var
         log_prior = -np.linalg.norm(target_parameter)**2 /(2. * prior_var)
-        return grad_prior, log_prior
+        return log_prior, grad_prior
 
     posterior_inf = L.posterior(observed_target,
                                 cov_target,
@@ -227,50 +227,22 @@ def prior(target_parameter):
                                nburnin=nburnin)
     return samples
     
-
-def test_hiv_data(nsample=1000,
-                  nburnin=100,
-                  alpha =0.10,
+def test_hiv_data(nsample=10000,
+                  nburnin=500,
+                  level=0.90,
                   split_proportion=0.50,
                   seedn = 1):
 
     np.random.seed(seedn)
 
-    level = 1 - alpha / 2.
-    Z_quantile = ndist.ppf(level)
-
-    NRTI = pd.read_csv("http://hivdb.stanford.edu/pages/published_analysis/genophenoPNAS2006/DATA/NRTI_DATA.txt",
-                       na_values="NA", sep='\t')
-
-    NRTI_specific = []
-    NRTI_muts = []
-
-    for i in range(1, 241):
-        d = NRTI['P%d' % i]
-        for mut in np.unique(d):
-            if mut not in ['-', '.'] and len(mut) == 1:
-                test = np.equal(d, mut)
-                if test.sum() > 10:
-                    NRTI_specific.append(np.array(np.equal(d, mut)))
-                    NRTI_muts.append("P%d%s" % (i, mut))
-
-    NRTI_specific = NRTI.from_records(np.array(NRTI_specific).T, columns=NRTI_muts)
-
-    X_NRTI = np.array(NRTI_specific, np.float)
-    Y = NRTI['3TC']  # shorthand
-    keep = ~np.isnan(Y).astype(np.bool)
-    X_NRTI = X_NRTI[np.nonzero(keep)]
-
-    Y = Y[keep]
-    Y = np.array(np.log(Y), np.float)
-    Y -= Y.mean()
-    X_NRTI -= X_NRTI.mean(0)[None, :]
-    X_NRTI /= X_NRTI.std(0)[None, :]
-    X = X_NRTI
+    alpha = (1 - level) / 2
+    Z_quantile = ndist.ppf(1 - alpha)
+
+    X, Y, _ = HIV_NRTI(standardize=True)
+    Y *= 15
     n, p = X.shape
     X /= np.sqrt(n)
-
-
+    
     ols_fit = sm.OLS(Y, X).fit()
     _sigma = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1)
 
@@ -296,6 +268,15 @@ def test_hiv_data(nsample=1000,
                                       nonzero,
                                       dispersion=dispersion)
 
+    mle, inverse_info = conv.selective_MLE(observed_target,
+                                           cov_target,
+                                           cov_target_score,
+                                           level=level,
+                                           solve_args={'tol':1.e-12})[:2]
+
+    # approx_inf = conv.approximate_grid_inference(observed_target,
+    #                                              cov_target,
+    #                                              cov_target_score)
 
     posterior_inf = conv.posterior(observed_target,
                                    cov_target,
@@ -307,75 +288,58 @@ def test_hiv_data(nsample=1000,
                                         nburnin=nburnin,
                                         step=1.)
 
-    lci_langevin = np.percentile(samples_langevin, int((1-level)*100), axis=0)
-    uci_langevin = np.percentile(samples_langevin, int((level)*100), axis=0)
+    lower_langevin = np.percentile(samples_langevin, int(alpha*100), axis=0)
+    upper_langevin = np.percentile(samples_langevin, int((1-alpha)*100), axis=0)
 
-    samples_gibbs = gibbs_sampler(posterior_inf,
-                                  nsample=nsample,
-                                  nburnin=nburnin)[0]
+    samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
+                                               nsample=nsample,
+                                               nburnin=nburnin)
 
-    lci_gibbs = np.percentile(samples_gibbs, int((1 - level) * 100), axis=0)
-    uci_gibbs = np.percentile(samples_gibbs, int((level) * 100), axis=0)
+    lower_gibbs = np.percentile(samples_gibbs, int(alpha* 100), axis=0)
+    upper_gibbs = np.percentile(samples_gibbs, int((1-alpha)*100), axis=0)
 
     naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
-    naive_cov = _sigma * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
+    naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
     naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
                                  naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
 
     X_split = X[~conv._selection_idx, :]
     Y_split = Y[~conv._selection_idx]
     split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
-    split_cov = _sigma * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
+    split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
     split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
                                  split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
 
-    print("lengths: adjusted intervals Langevin, Gibbs, MLE ", np.mean(uci_langevin - lci_langevin), np.mean(uci_gibbs - lci_gibbs),
-          np.mean((2* Z_quantile )* np.sqrt(np.diag(posterior_inf.inverse_info))))
+    print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
+          np.mean(upper_langevin - lower_langevin),
+          np.mean(upper_gibbs - lower_gibbs),
+          np.mean((2*Z_quantile)*np.sqrt(np.diag(posterior_inf.inverse_info))),
+          np.mean(mle['upper_confidence'] - mle['lower_confidence']),
+          #np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
+    )
 
     print("lengths: naive intervals ", np.mean(naive_intervals[:,1]-naive_intervals[:,0]))
 
     print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
 
-    output = pd.DataFrame({'Langevin_lower_confidence': lci_langevin,
-                           'Langevin_upper_confidence': uci_langevin,
-                           'Gibbs_lower_confidence': lci_gibbs,
-                           'Gibbs_upper_confidence': uci_gibbs,
+    scale_interval = np.percentile(scale_gibbs, [alpha*100, (1-alpha)*100])
+    output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
+                           'Langevin_upper_credible': upper_langevin,
+                           'Gibbs_lower_credible': lower_gibbs,
+                           'Gibbs_upper_credible': upper_gibbs,
+                           'MLE_lower_confidence': mle['lower_confidence'],
+                           'MLE_upper_confidence': mle['upper_confidence'],
+                           #'approx_lower_confidence': approx_inf['lower_confidence'],
+                           #'approx_upper_confidence': approx_inf['upper_confidence'],
                            'Split_lower_confidence': split_intervals[:,0],
                            'Split_upper_confidence': split_intervals[:, 1],
                            'Naive_lower_confidence': naive_intervals[:, 0],
                            'Naive_upper_confidence': naive_intervals[:, 1]
                            })
 
-    return output
-
-# def main(ndraw=10):
-#
-#     coverage_ = 0.
-#     length_ = 0.
-#     for n in range(ndraw):
-#         cov, len = test_Langevin(n=500,
-#                                  p=200,
-#                                  signal_fac=1.,
-#                                  s=5,
-#                                  sigma=3.,
-#                                  rho=0.2,
-#                                  randomizer_scale=1.
-#                                  )
-#
-#         # cov, len = test_instance(nsample=2000,
-#         #                          nburnin=100)
-#
-#         coverage_ += cov
-#         length_ += len
-#
-#         print("coverage so far ", coverage_ / (n + 1.))
-#         print("lengths so far ", length_ / (n + 1.))
-#         print("iteration completed ", n + 1)
-
-
-def main():
-    test_hiv_data(split_proportion=0.50)
+    return output, scale_interval, _sigma
 
 if __name__ == "__main__":
-    main()
+    test_hiv_data(split_proportion=0.50)
+
 

From 93bfded0bfd4cfbbb46e0f4f900dfacd7deb022f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 12:25:56 -0700
Subject: [PATCH 061/187] matching default prior

---
 selectinf/randomized/tests/test_posterior.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 6e79e44e6..01d2b6769 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -150,9 +150,10 @@ def test_flexible_prior1(nsample=100, nburnin=50):
                                       M,
                                       dispersion=dispersion)
 
+    Di = 1. / (200 * np.diag(cov_target))
     def prior(target_parameter):
-        grad_prior = -target_parameter / 100
-        log_prior = -np.linalg.norm(target_parameter)**2 /(2. * 100)
+        grad_prior = -target_parameter * Di
+        log_prior = -np.sum(target_parameter**2 * Di)
         return log_prior, grad_prior
 
     seed_state = np.random.get_state()

From c8ed7cbcde7b463d4f059189ce88da3576489a28 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 14:14:51 -0700
Subject: [PATCH 062/187] using discrete family for approximate grid inference

---
 selectinf/randomized/approx_reference.py      | 274 ++++++++++++++----
 selectinf/randomized/posterior_inference.py   |   4 +-
 .../randomized/tests/test_approx_reference.py |  91 ++++--
 selectinf/randomized/tests/test_posterior.py  |  12 +-
 4 files changed, 294 insertions(+), 87 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 3c1b1b8fa..b68e3de0b 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -1,23 +1,57 @@
 from __future__ import division, print_function
 
-import numpy as np, sys
-from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+import numpy as np, pandas as pd
+from scipy.interpolate import interp1d
 
+from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from ..distributions.discrete_family import discrete_family
 
-class approximate_grid_inference():
+class approximate_grid_inference(object):
 
     def __init__(self,
                  query,
                  observed_target,
                  cov_target,
                  cov_target_score,
-                 grid,
-                 dispersion=1,
-                 level=0.9,
+                 grid=None,
                  solve_args={'tol':1.e-12}):
 
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        observed_target : ndarray
+            Observed estimate of target.
+
+        target_cov : ndarray
+            Estimated covaraince of target.
+
+        target_score_cov : ndarray
+            Estimated covariance of target and score of randomized query.
+
+        grid : ndarray
+            Grid on which to evaluate the approximate
+            probability of selection.
+
+        mle : ndarray
+            Selective MLE as initial guess.
+
+        inverse_info : ndarray
+            Selective inverse information to guide grid search.
+
+        """
+
         self.solve_args = solve_args
 
+        result, inverse_info = query.selective_MLE(observed_target,
+                                                   cov_target,
+                                                   cov_target_score,
+                                                   solve_args=solve_args)[:2]
+        mle = result['MLE']
+        
         self.linear_part = query.sampler.affine_con.linear_part
         self.offset = query.sampler.affine_con.offset
 
@@ -31,28 +65,47 @@ def __init__(self,
         self.cov_target = cov_target
 
         self.init_soln = query.observed_opt_state
-        self.grid = grid
 
-        self.ntarget = cov_target.shape[0]
-        self.level = level
+        self.ntarget = ntarget = cov_target.shape[0]
+        _scale = 4 * np.sqrt(np.diag(inverse_info))
+        ngrid = 40
+
+        scale_ = 4 * np.max(np.sqrt(np.diag(inverse_info)))
+
+        self.stat_grid = np.zeros((ntarget, ngrid))
+        for j in range(ntarget):
+            self.stat_grid[j,:] = np.linspace(observed_target[j] - 1.5*_scale[j],
+                                              observed_target[j] + 1.5*_scale[j],
+                                              num=ngrid)
+
 
-    def approx_log_reference(self,
+    def _approx_log_reference(self,
                              observed_target,
                              cov_target,
-                             cov_target_score):
+                             cov_target_score,
+                             grid):
 
+        """
+        Approximate the log of the reference density on a grid.
+
+        """
         if np.asarray(observed_target).shape in [(), (0,)]:
            raise ValueError('no target specified')
 
-        observed_target = np.atleast_1d(observed_target)
         prec_target = np.linalg.inv(cov_target)
         target_lin = - self.logdens_linear.dot(cov_target_score.T.dot(prec_target))
 
         ref_hat = []
         solver = solve_barrier_affine_C
-        for k in range(self.grid.shape[0]):
-            cond_mean_grid = target_lin.dot(np.asarray([self.grid[k]])) + (
-                    self.cond_mean - target_lin.dot(observed_target))
+        for k in range(grid.shape[0]):
+            # in the usual D = N + Gamma theta.hat,
+            # target_lin is "something" times Gamma,
+            # where "something" comes from implied Gaussian
+            # cond_mean is "something" times D
+            # Gamma is cov_target_score.T.dot(prec_target)
+            
+            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) + 
+                              self.cond_mean)
             conjugate_arg = self.prec_opt.dot(cond_mean_grid)
 
             val, _, _ = solver(conjugate_arg,
@@ -66,35 +119,40 @@ def approx_log_reference(self,
 
         return np.asarray(ref_hat)
 
-
-    def approx_density(self,
-                       mean_parameter,
-                       cov_target,
-                       approx_log_ref):
+    def approx_CDF(self,
+                   mean_parameter,
+                   cov_target,
+                   approx_log_ref,
+                   grid):
 
         _approx_density = []
-        for k in range(self.grid.shape[0]):
-            _approx_density.append(np.exp(-np.true_divide((self.grid[k] - mean_parameter) ** 2, 2 * cov_target) + approx_log_ref[k]))
+        for k in range(grid.shape[0]):
+            # approx_log_ref[k] = P(selection | D = N + Gamma * grid[k])
+            _approx_density.append(np.exp(-np.true_divide((grid[k] - mean_parameter)**2,
+                                                          2 * cov_target) + approx_log_ref[k]))
 
         _approx_density_ = np.asarray(_approx_density) / (np.asarray(_approx_density).sum())
         return np.cumsum(_approx_density_)
 
     def approx_ci(self,
                   param_grid,
+                  stat_grid,
                   cov_target,
                   approx_log_ref,
-                  indx_obsv):
+                  indx_obsv,
+                  level):
 
         area = np.zeros(param_grid.shape[0])
 
         for k in range(param_grid.shape[0]):
-            area_vec = self.approx_density(param_grid[k],
-                                           cov_target,
-                                           approx_log_ref)
+            area_vec = self.approx_CDF(param_grid[k],
+                                       cov_target,
+                                       approx_log_ref,
+                                       stat_grid)
 
             area[k] = area_vec[indx_obsv]
 
-        alpha = 1 - self.level
+        alpha = 1 - level
         region = param_grid[(area >= alpha / 2.) & (area <= (1 - alpha / 2.))]
 
         if region.size > 0:
@@ -102,61 +160,155 @@ def approx_ci(self,
         else:
             return 0., 0.
 
-    def approx_pivot(self,
-                     mean_parameter):
-
-        pivot = []
+    def _construct_families(self):
 
+        self._families = []
         for m in range(self.ntarget):
             p = self.cov_target_score.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            var_target = cov_target_uni[0, 0]
             cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
-            grid_indx_obs = np.argmin(np.abs(self.grid - observed_target_uni))
 
-            approx_log_ref = self.approx_log_reference(observed_target_uni,
-                                                       cov_target_uni,
-                                                       cov_target_score_uni)
+            approx_log_ref = self._approx_log_reference(observed_target_uni,
+                                                        cov_target_uni,
+                                                        cov_target_score_uni,
+                                                        self.stat_grid[m])
+
+            approx_fn = interp1d(self.stat_grid[m],
+                                 approx_log_ref,
+                                 kind='quadratic',
+                                 bounds_error=False,
+                                 fill_value='extrapolate')
+
+            grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+            logW = (approx_fn(grid) -
+                    0.5 * (grid - self.observed_target[m])**2 / var_target)
+            logW -= logW.max()
+
+            self._families.append(discrete_family(grid,
+                                                  np.exp(logW)))
+            
+            logG = - 0.5 * grid**2 / var_target
+            logG -= logG.max()
+            import matplotlib.pyplot as plt
+
+            # plt.plot(self.stat_grid[m][10:30], approx_log_ref[10:30])
+            # plt.plot(self.stat_grid[m][:10], approx_log_ref[:10], 'r', linewidth=4)
+            # plt.plot(self.stat_grid[m][30:], approx_log_ref[30:], 'r', linewidth=4)
+            # plt.plot(self.stat_grid[m]*1.5, fapprox(self.stat_grid[m]*1.5), 'k--')
+            # plt.show()
+
+            # plt.plot(grid, logW)
+            # plt.plot(grid, logG)
+
+            # stop
+
+    def approx_pivots(self,
+                      mean_parameter,
+                      alternatives=None):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+            
+        if alternatives is None:
+            alternatives = ['twosided'] * self.ntarget
 
-            area_cum = self.approx_density(mean_parameter[m],
-                                           cov_target_uni,
-                                           approx_log_ref)
-
-            pivot.append(2 * np.minimum(area_cum[grid_indx_obs], 1. - area_cum[grid_indx_obs]))
-
-            sys.stderr.write("variable completed " + str(m + 1) + "\n")
+        pivot = []
 
+        for m in range(self.ntarget):
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+            var_target = self.cov_target[m, m]
+            _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
+                              x=observed_target)
+            if alternatives[m] == 'twosided':
+                pivot.append(2 * min(_cdf, 1 - _cdf))
+            elif alternatives[m] == 'greater':
+                pivot.append(1 - _cdf)
+            elif alternatives[m] == 'less':
+                pivot.append(_cdf)
+            else:
+                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
         return pivot
 
     def approx_intervals(self,
-                         param_grid):
+                         level=0.9):
 
-        intervals_lci =[]
-        intervals_uci =[]
+        if not hasattr(self, "_families"):
+            self._construct_families()
+            
+        lower, upper = [], []
 
         for m in range(self.ntarget):
-            p = self.cov_target_score.shape[1]
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
-            grid_indx_obs = np.argmin(np.abs(self.grid - observed_target_uni))
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+            l, u = family.equal_tailed_interval(observed_target,
+                                                        alpha=1-level)
+            var_target = self.cov_target[m, m]
+            lower.append(l *  var_target + observed_target)
+            upper.append(u * var_target + observed_target)
+
+        return np.asarray(lower), np.asarray(upper)
+
+    def summary(self,
+                alternatives=None,
+                parameter=None,
+                level=0.9):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        """
+
+        if parameter is not None:
+            pivots = self.approx_pivots(parameter,
+                                        alternatives=alternatives)
+        else:
+            pivots = None
 
-            approx_log_ref = self.approx_log_reference(observed_target_uni,
-                                                       cov_target_uni,
-                                                       cov_target_score_uni)
+        pvalues = self.approx_pivots(np.zeros_like(self.observed_target),
+                                     alternatives=alternatives)
+        lower, upper = self.approx_intervals(level=level)
 
-            approx_lci, approx_uci = self.approx_ci(param_grid[m,:],
-                                                    cov_target_uni,
-                                                    approx_log_ref,
-                                                    grid_indx_obs)
+        result = pd.DataFrame({'target':self.observed_target,
+                               'pvalue':pvalues,
+                               'lower_confidence':lower,
+                               'upper_confidence':upper})
 
-            intervals_lci.append(approx_lci)
-            intervals_uci.append(approx_uci)
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
 
-            sys.stderr.write("variable completed " + str(m + 1) + "\n")
+        return result
 
-        return np.asarray(intervals_lci), np.asarray(intervals_uci)
+def _log_concave_approx(xval, yval):
+    """
+    Approximate a log-concave function
+    to full line based on sample.
 
+    Assumes `xval` is sorted
+    """
 
+    nu, nl = 10, 10
+    n = xval.shape[0]
+    D = np.vstack([np.ones(n), xval, xval**2]).T
 
+    Du = D[-nu:]
+    Qu = np.linalg(Du).dot(yval[-nu:])
 
+    Dl = D[:nl]
+    Ql = np.linalg(Dl).dot(yval[:nl])
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 1acb23281..403a5a1f0 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -24,7 +24,7 @@ class posterior(object):
     prior : callable
         A callable object that takes a single argument
         `parameter` of the same shape as `observed_target`
-        and returns (gradient of log prior, value of log prior)
+        and returns (value of log prior, gradient of log prior)
 
     dispersion : float, optional
         A dispersion parameter for likelihood. 
@@ -122,7 +122,7 @@ def log_posterior(self,
                     self.prec_target.dot(target_parameter) \
                     - self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal))
 
-        grad_prior, log_prior = self.prior(target_parameter)
+        log_prior, grad_prior = self.prior(target_parameter)
 
         return (self.dispersion * (log_lik - self.log_ref) / sigmasq + log_prior,
                 self.dispersion * grad_lik/sigmasq + grad_prior)
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index c394559d6..5045e46d5 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -2,7 +2,58 @@
 
 from ...tests.instance import gaussian_instance
 from ..lasso import lasso, selected_targets
+from ..approx_reference import approximate_grid_inference
 
+def test_summary(n=500,
+                 p=100,
+                 signal_fac=1.,
+                 s=5,
+                 sigma=2.,
+                 rho=0.4,
+                 randomizer_scale=1.):
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+    conv = const(X,
+                 Y,
+                 W,
+                 randomizer_scale=randomizer_scale * dispersion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    if nonzero.sum()>0:
+        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero,
+                                          dispersion=dispersion)
+
+        S = conv.approximate_grid_inference(observed_target,
+                                            cov_target,
+                                            cov_target_score,
+                                            alternatives=alternatives)
 
 def test_approx_pivot(n=500,
                       p=100,
@@ -59,13 +110,13 @@ def test_approx_pivot(n=500,
 
         grid = np.linspace(- scale_, scale_, num=ngrid)
 
-        approximate_grid_inf = conv.approximate_grid_inference(observed_target,
-                                                               cov_target,
-                                                               cov_target_score,
-                                                               grid=grid,
-                                                               dispersion=dispersion)
+        approximate_grid_inf = approximate_grid_inference(conv,
+                                                          observed_target,
+                                                          cov_target,
+                                                          cov_target_score,
+                                                          grid=grid)
 
-        pivot = approximate_grid_inf.approx_pivot(beta_target)
+        pivot = approximate_grid_inf.approx_pivots(beta_target)
 
         return pivot
 
@@ -76,7 +127,8 @@ def test_approx_ci(n=500,
                    s=5,
                    sigma=2.,
                    rho=0.4,
-                   randomizer_scale=1.):
+                   randomizer_scale=1.,
+                   level=0.9):
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
@@ -126,19 +178,22 @@ def test_approx_ci(n=500,
 
         grid = np.linspace(-scale_, scale_, num=ngrid)
 
-        approximate_grid_inf = conv.approximate_grid_inference(observed_target,
-                                                               cov_target,
-                                                               cov_target_score,
-                                                               grid=grid,
-                                                               dispersion=dispersion)
-
+        approximate_grid_inf = approximate_grid_inference(conv,
+                                                          observed_target,
+                                                          cov_target,
+                                                          cov_target_score,
+                                                          grid=grid)
 
         param_grid = np.zeros((ntarget, ngrid))
         mle = np.asarray(result['MLE'])
         for j in range(ntarget):
             param_grid[j,:] = np.linspace(mle[j]-_scale[j], mle[j]+_scale[j], num=ngrid)
 
-        lci, uci = approximate_grid_inf.approx_intervals(param_grid)
+        lci, uci = approximate_grid_inf.approx_intervals(level)
+
+        S = conv.approximate_grid_inference(observed_target,
+                                            cov_target,
+                                            cov_target_score)
 
     beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
     coverage = (lci < beta_target) * (uci > beta_target)
@@ -146,18 +201,18 @@ def test_approx_ci(n=500,
 
     return np.mean(coverage), np.mean(length)
 
-import matplotlib.pyplot as plt
-from statsmodels.distributions.empirical_distribution import ECDF
 
 
 def main(nsim=300, CI = False):
 
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions.empirical_distribution import ECDF
     if CI is False:
         _pivot = []
         for i in range(nsim):
             _pivot.extend(test_approx_pivot(n=200,
                                             p=100,
-                                            signal_fac=0.5,
+                                            signal_fac=1.,
                                             s=5,
                                             sigma=3.,
                                             rho=0.20,
@@ -191,4 +246,4 @@ def main(nsim=300, CI = False):
             print("iteration completed ", n + 1)
 
 if __name__ == "__main__":
-    main(nsim=20, CI = True)
\ No newline at end of file
+    main(nsim=20, CI = True)
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 01d2b6769..2b93b0422 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -275,9 +275,9 @@ def test_hiv_data(nsample=10000,
                                            level=level,
                                            solve_args={'tol':1.e-12})[:2]
 
-    # approx_inf = conv.approximate_grid_inference(observed_target,
-    #                                              cov_target,
-    #                                              cov_target_score)
+    approx_inf = conv.approximate_grid_inference(observed_target,
+                                                 cov_target,
+                                                 cov_target_score)
 
     posterior_inf = conv.posterior(observed_target,
                                    cov_target,
@@ -316,7 +316,7 @@ def test_hiv_data(nsample=10000,
           np.mean(upper_gibbs - lower_gibbs),
           np.mean((2*Z_quantile)*np.sqrt(np.diag(posterior_inf.inverse_info))),
           np.mean(mle['upper_confidence'] - mle['lower_confidence']),
-          #np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
+          np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
     )
 
     print("lengths: naive intervals ", np.mean(naive_intervals[:,1]-naive_intervals[:,0]))
@@ -330,8 +330,8 @@ def test_hiv_data(nsample=10000,
                            'Gibbs_upper_credible': upper_gibbs,
                            'MLE_lower_confidence': mle['lower_confidence'],
                            'MLE_upper_confidence': mle['upper_confidence'],
-                           #'approx_lower_confidence': approx_inf['lower_confidence'],
-                           #'approx_upper_confidence': approx_inf['upper_confidence'],
+                           'approx_lower_confidence': approx_inf['lower_confidence'],
+                           'approx_upper_confidence': approx_inf['upper_confidence'],
                            'Split_lower_confidence': split_intervals[:,0],
                            'Split_upper_confidence': split_intervals[:, 1],
                            'Naive_lower_confidence': naive_intervals[:, 0],

From 28fcb50f6c9ccd4785c802337ad5c35a809e04d9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 14:41:25 -0700
Subject: [PATCH 063/187] code cleanup for readability

---
 selectinf/distributions/discrete_family.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/selectinf/distributions/discrete_family.py b/selectinf/distributions/discrete_family.py
index 5c6e6fc23..6bdf10f55 100644
--- a/selectinf/distributions/discrete_family.py
+++ b/selectinf/distributions/discrete_family.py
@@ -83,7 +83,7 @@ def __init__(self, sufficient_stat, weights, theta=0.):
         xw = np.array(sorted(zip(sufficient_stat, weights)), np.float)
         self._x = xw[:,0]
         self._w = xw[:,1]
-        self._lw = np.array([np.log(v) for v in xw[:,1]])
+        self._lw = np.log(xw[:,1])
         self._w /= self._w.sum() # make sure they are a pmf
         self.n = len(xw)
         self._theta = np.nan
@@ -479,7 +479,12 @@ def interval(self, observed, alpha=0.05, randomize=True, auxVar=None, tol=1e-6):
             lower = self._inter2Lower(observed, 0., alpha, tol)
         return lower, upper
 
-    def equal_tailed_interval(self, observed, alpha=0.05, randomize=True, auxVar=None, tol=1e-6):
+    def equal_tailed_interval(self,
+                              observed,
+                              alpha=0.05,
+                              randomize=True,
+                              auxVar=None,
+                              tol=1e-6):
         """
         Form interval by inverting
         equal-tailed test with $\alpha/2$ in each tail.

From 7042db4706c39e7f378819a92ac90df73005d291 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 14:42:03 -0700
Subject: [PATCH 064/187] rename selective MLE method due to signature conflict

---
 selectinf/randomized/drop_losers.py            | 2 +-
 selectinf/randomized/tests/test_drop_losers.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/drop_losers.py b/selectinf/randomized/drop_losers.py
index ffe2804ca..7c2a7bce6 100644
--- a/selectinf/randomized/drop_losers.py
+++ b/selectinf/randomized/drop_losers.py
@@ -66,7 +66,7 @@ def __init__(self,
 
         self._setup_sampler(A, b, linear, offset)
 
-    def selective_MLE(self,
+    def MLE_inference(self,
                       level=0.9,
                       solve_args={'tol':1.e-12}):
         """
diff --git a/selectinf/randomized/tests/test_drop_losers.py b/selectinf/randomized/tests/test_drop_losers.py
index 46f4b8395..45bd3595d 100644
--- a/selectinf/randomized/tests/test_drop_losers.py
+++ b/selectinf/randomized/tests/test_drop_losers.py
@@ -39,11 +39,11 @@ def test_drop_losers(p=50,
     dtl = drop_losers(df,
                       K=K)
 
-    dtl.selective_MLE() 
+    dtl.MLE_inference()
     if not use_MLE:
         result = dtl.summary(ndraw=20000, burnin=5000)
     else:
-        result = dtl.selective_MLE()[0]
+        result = dtl.MLE_inference()[0]
     pvalue = np.asarray(result['pvalue'])
     lower = np.asarray(result['lower_confidence'])
     upper = np.asarray(result['upper_confidence'])

From 1da37cf61273cd5ed662a64914a46e9cb6b41388 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 14:42:26 -0700
Subject: [PATCH 065/187] unused grid arguments

---
 .../randomized/tests/test_approx_reference.py  | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 5045e46d5..fb1d94828 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -105,16 +105,10 @@ def test_approx_pivot(n=500,
                                           cov_target,
                                           cov_target_score)[1]
 
-        scale_ = 4 * np.max(np.sqrt(np.diag(inverse_info)))
-        ngrid = 2 * scale_/0.1
-
-        grid = np.linspace(- scale_, scale_, num=ngrid)
-
         approximate_grid_inf = approximate_grid_inference(conv,
                                                           observed_target,
                                                           cov_target,
-                                                          cov_target_score,
-                                                          grid=grid)
+                                                          cov_target_score)
 
         pivot = approximate_grid_inf.approx_pivots(beta_target)
 
@@ -176,18 +170,10 @@ def test_approx_ci(n=500,
         scale_ = np.max(_scale)
         ngrid = int(2 * scale_/0.1)
 
-        grid = np.linspace(-scale_, scale_, num=ngrid)
-
         approximate_grid_inf = approximate_grid_inference(conv,
                                                           observed_target,
                                                           cov_target,
-                                                          cov_target_score,
-                                                          grid=grid)
-
-        param_grid = np.zeros((ntarget, ngrid))
-        mle = np.asarray(result['MLE'])
-        for j in range(ntarget):
-            param_grid[j,:] = np.linspace(mle[j]-_scale[j], mle[j]+_scale[j], num=ngrid)
+                                                          cov_target_score)
 
         lci, uci = approximate_grid_inf.approx_intervals(level)
 

From 283026212042f53a181f87e6cc6f9bc8054ecc63 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 14:44:21 -0700
Subject: [PATCH 066/187] moved gaussian query specific methods to that class

---
 selectinf/randomized/approx_reference.py |  64 +++----
 selectinf/randomized/query.py            | 230 +++++++++++------------
 2 files changed, 143 insertions(+), 151 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index b68e3de0b..abee9bfbc 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -11,9 +11,8 @@ class approximate_grid_inference(object):
     def __init__(self,
                  query,
                  observed_target,
-                 cov_target,
-                 cov_target_score,
-                 grid=None,
+                 target_cov,
+                 target_score_cov,
                  solve_args={'tol':1.e-12}):
 
         """
@@ -23,6 +22,10 @@ def __init__(self,
         Parameters
         ----------
 
+        query : `gaussian_query`
+            A Gaussian query which has information
+            to describe implied Gaussian.
+
         observed_target : ndarray
             Observed estimate of target.
 
@@ -32,23 +35,16 @@ def __init__(self,
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
 
-        grid : ndarray
-            Grid on which to evaluate the approximate
-            probability of selection.
-
-        mle : ndarray
-            Selective MLE as initial guess.
-
-        inverse_info : ndarray
-            Selective inverse information to guide grid search.
+        solve_args : dict, optional
+            Arguments passed to solver.
 
         """
 
         self.solve_args = solve_args
 
         result, inverse_info = query.selective_MLE(observed_target,
-                                                   cov_target,
-                                                   cov_target_score,
+                                                   target_cov,
+                                                   target_score_cov,
                                                    solve_args=solve_args)[:2]
         mle = result['MLE']
         
@@ -61,12 +57,12 @@ def __init__(self,
         self.cond_cov = query.cond_cov
 
         self.observed_target = observed_target
-        self.cov_target_score = cov_target_score
-        self.cov_target = cov_target
+        self.target_score_cov = target_score_cov
+        self.target_cov = target_cov
 
         self.init_soln = query.observed_opt_state
 
-        self.ntarget = ntarget = cov_target.shape[0]
+        self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
         ngrid = 40
 
@@ -81,8 +77,8 @@ def __init__(self,
 
     def _approx_log_reference(self,
                              observed_target,
-                             cov_target,
-                             cov_target_score,
+                             target_cov,
+                             target_score_cov,
                              grid):
 
         """
@@ -92,8 +88,8 @@ def _approx_log_reference(self,
         if np.asarray(observed_target).shape in [(), (0,)]:
            raise ValueError('no target specified')
 
-        prec_target = np.linalg.inv(cov_target)
-        target_lin = - self.logdens_linear.dot(cov_target_score.T.dot(prec_target))
+        prec_target = np.linalg.inv(target_cov)
+        target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
 
         ref_hat = []
         solver = solve_barrier_affine_C
@@ -102,7 +98,7 @@ def _approx_log_reference(self,
             # target_lin is "something" times Gamma,
             # where "something" comes from implied Gaussian
             # cond_mean is "something" times D
-            # Gamma is cov_target_score.T.dot(prec_target)
+            # Gamma is target_score_cov.T.dot(prec_target)
             
             cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) + 
                               self.cond_mean)
@@ -121,7 +117,7 @@ def _approx_log_reference(self,
 
     def approx_CDF(self,
                    mean_parameter,
-                   cov_target,
+                   target_cov,
                    approx_log_ref,
                    grid):
 
@@ -129,7 +125,7 @@ def approx_CDF(self,
         for k in range(grid.shape[0]):
             # approx_log_ref[k] = P(selection | D = N + Gamma * grid[k])
             _approx_density.append(np.exp(-np.true_divide((grid[k] - mean_parameter)**2,
-                                                          2 * cov_target) + approx_log_ref[k]))
+                                                          2 * target_cov) + approx_log_ref[k]))
 
         _approx_density_ = np.asarray(_approx_density) / (np.asarray(_approx_density).sum())
         return np.cumsum(_approx_density_)
@@ -137,7 +133,7 @@ def approx_CDF(self,
     def approx_ci(self,
                   param_grid,
                   stat_grid,
-                  cov_target,
+                  target_cov,
                   approx_log_ref,
                   indx_obsv,
                   level):
@@ -146,7 +142,7 @@ def approx_ci(self,
 
         for k in range(param_grid.shape[0]):
             area_vec = self.approx_CDF(param_grid[k],
-                                       cov_target,
+                                       target_cov,
                                        approx_log_ref,
                                        stat_grid)
 
@@ -164,15 +160,15 @@ def _construct_families(self):
 
         self._families = []
         for m in range(self.ntarget):
-            p = self.cov_target_score.shape[1]
+            p = self.target_score_cov.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            var_target = cov_target_uni[0, 0]
-            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
+            var_target = target_cov_uni[0, 0]
+            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
             approx_log_ref = self._approx_log_reference(observed_target_uni,
-                                                        cov_target_uni,
-                                                        cov_target_score_uni,
+                                                        target_cov_uni,
+                                                        target_score_cov_uni,
                                                         self.stat_grid[m])
 
             approx_fn = interp1d(self.stat_grid[m],
@@ -219,7 +215,7 @@ def approx_pivots(self,
         for m in range(self.ntarget):
             family = self._families[m]
             observed_target = self.observed_target[m]
-            var_target = self.cov_target[m, m]
+            var_target = self.target_cov[m, m]
             _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
                               x=observed_target)
             if alternatives[m] == 'twosided':
@@ -245,7 +241,7 @@ def approx_intervals(self,
             observed_target = self.observed_target[m]
             l, u = family.equal_tailed_interval(observed_target,
                                                         alpha=1-level)
-            var_target = self.cov_target[m, m]
+            var_target = self.target_cov[m, m]
             lower.append(l *  var_target + observed_target)
             upper.append(u * var_target + observed_target)
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 0b24ecc45..df890a2ef 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -97,17 +97,97 @@ def solve(self):
 
         raise NotImplementedError('abstract method')
 
-    def setup_sampler(self):
-        """
-        Setup query to prepare for sampling.
-        Should set a few key attributes:
+class gaussian_query(query):
 
-            - observed_score_state
-            - observed_opt_state
-            - opt_transform
+    useC = True
 
-        """
-        raise NotImplementedError('abstract method -- only keyword arguments')
+    """
+    A class with Gaussian perturbation to the objective -- 
+    easy to apply CLT to such things
+    """
+
+    def fit(self, perturb=None):
+
+        p = self.nfeature
+
+        # take a new perturbation if supplied
+        if perturb is not None:
+            self._initial_omega = perturb
+        if self._initial_omega is None:
+            self._initial_omega = self.randomizer.sample()
+
+    # Private methods
+
+    def _setup_sampler(self, 
+                       linear_part,
+                       offset,
+                       opt_linear,
+                       opt_offset,
+                       # optional dispersion parameter
+                       # for covariance of randomization
+                       dispersion=1):
+
+        A, b = linear_part, offset
+        if not np.all(A.dot(self.observed_opt_state) - b <= 0):
+            raise ValueError('constraints not satisfied')
+
+        (cond_mean, 
+         cond_cov, 
+         cond_precision, 
+         logdens_linear) = self._setup_implied_gaussian(opt_linear, 
+                                                        opt_offset,
+                                                        dispersion)
+
+        def log_density(logdens_linear, offset, cond_prec, opt, score):
+            if score.ndim == 1:
+                mean_term = logdens_linear.dot(score.T + offset).T
+            else:
+                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
+            arg = opt + mean_term
+            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+        log_density = functools.partial(log_density, 
+                                        logdens_linear, 
+                                        opt_offset, 
+                                        cond_precision)
+
+        self.cond_mean, self.cond_cov = cond_mean, cond_cov
+
+        affine_con = constraints(A,
+                                 b,
+                                 mean=cond_mean,
+                                 covariance=cond_cov)
+
+        self.sampler = affine_gaussian_sampler(affine_con,
+                                               self.observed_opt_state,
+                                               self.observed_score_state,
+                                               log_density,
+                                               (logdens_linear, opt_offset),
+                                               selection_info=self.selection_variable,
+                                               useC=self.useC)
+
+    def _setup_implied_gaussian(self, 
+                                opt_linear, 
+                                opt_offset,
+                                # optional dispersion parameter
+                                # for covariance of randomization
+                                dispersion=1):
+
+        _, prec = self.randomizer.cov_prec 
+        prec = prec / dispersion
+
+        if np.asarray(prec).shape in [(), (0,)]:
+            cond_precision = opt_linear.T.dot(opt_linear) * prec
+            cond_cov = np.linalg.inv(cond_precision)
+            logdens_linear = cond_cov.dot(opt_linear.T) * prec
+        else:
+            cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
+            cond_cov = np.linalg.inv(cond_precision)
+            logdens_linear = cond_cov.dot(opt_linear.T).dot(prec)
+
+        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+
+        return cond_mean, cond_cov, cond_precision, logdens_linear
 
     def summary(self,
                 observed_target, 
@@ -197,10 +277,9 @@ def summary(self,
 
         if compute_intervals:
 
-            MLE = query.selective_MLE(self,
-                                      observed_target,
-                                      target_cov,
-                                      target_score_cov)[0]
+            MLE = self.selective_MLE(observed_target,
+                                     target_cov,
+                                     target_score_cov)[0]
             MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
 
             intervals = self.sampler.confidence_intervals(  
@@ -313,121 +392,38 @@ def approximate_grid_inference(self,
                                    observed_target,
                                    target_cov,
                                    target_score_cov,
-                                   grid=None,
                                    alternatives=None,
                                    solve_args={'tol': 1.e-12}):
 
-        # result, inverse_info = self.selective_MLE(observed_target,
-        #                                           target_cov,
-        #                                           target_score_cov)[:2]
-
-        # if dispersion is None:
-        #     dispersion = 1
-        #     print('Using dispersion parameter 1...')
-
-        G = approximate_grid_inference(self,
-                                       observed_target,
-                                       target_cov,
-                                       target_score_cov,
-                                       #inverse_info,
-                                       #result['MLE'],
-                                       #dispersion,
-                                       grid=grid,
-                                       solve_args=solve_args)
-        return G.summary(alternatives=alternatives)
-
-
-class gaussian_query(query):
-
-    useC = True
-
-    """
-    A class with Gaussian perturbation to the objective -- 
-    easy to apply CLT to such things
-    """
-
-    def fit(self, perturb=None):
-
-        p = self.nfeature
-
-        # take a new perturbation if supplied
-        if perturb is not None:
-            self._initial_omega = perturb
-        if self._initial_omega is None:
-            self._initial_omega = self.randomizer.sample()
-
-    # Private methods
-
-    def _setup_sampler(self, 
-                       linear_part,
-                       offset,
-                       opt_linear,
-                       opt_offset,
-                       # optional dispersion parameter
-                       # for covariance of randomization
-                       dispersion=1):
-
-        A, b = linear_part, offset
-        if not np.all(A.dot(self.observed_opt_state) - b <= 0):
-            raise ValueError('constraints not satisfied')
-
-        (cond_mean, 
-         cond_cov, 
-         cond_precision, 
-         logdens_linear) = self._setup_implied_gaussian(opt_linear, 
-                                                        opt_offset,
-                                                        dispersion)
-
-        def log_density(logdens_linear, offset, cond_prec, opt, score):
-            if score.ndim == 1:
-                mean_term = logdens_linear.dot(score.T + offset).T
-            else:
-                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
-            arg = opt + mean_term
-            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-        log_density = functools.partial(log_density, 
-                                        logdens_linear, 
-                                        opt_offset, 
-                                        cond_precision)
+        """
 
-        self.cond_mean, self.cond_cov = cond_mean, cond_cov
+        Parameters
+        ----------
 
-        affine_con = constraints(A,
-                                 b,
-                                 mean=cond_mean,
-                                 covariance=cond_cov)
+        observed_target : ndarray
+            Observed estimate of target.
 
-        self.sampler = affine_gaussian_sampler(affine_con,
-                                               self.observed_opt_state,
-                                               self.observed_score_state,
-                                               log_density,
-                                               (logdens_linear, opt_offset),
-                                               selection_info=self.selection_variable,
-                                               useC=self.useC)
+        target_cov : ndarray
+            Estimated covaraince of target.
 
-    def _setup_implied_gaussian(self, 
-                                opt_linear, 
-                                opt_offset,
-                                # optional dispersion parameter
-                                # for covariance of randomization
-                                dispersion=1):
+        target_score_cov : ndarray
+            Estimated covariance of target and score of randomized query.
 
-        _, prec = self.randomizer.cov_prec 
-        prec = prec / dispersion
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
 
-        if np.asarray(prec).shape in [(), (0,)]:
-            cond_precision = opt_linear.T.dot(opt_linear) * prec
-            cond_cov = np.linalg.inv(cond_precision)
-            logdens_linear = cond_cov.dot(opt_linear.T) * prec
-        else:
-            cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
-            cond_cov = np.linalg.inv(cond_precision)
-            logdens_linear = cond_cov.dot(opt_linear.T).dot(prec)
+        solve_args : dict, optional
+            Arguments passed to solver.
 
-        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+        """
 
-        return cond_mean, cond_cov, cond_precision, logdens_linear
+        G = approximate_grid_inference(self,
+                                       observed_target,
+                                       target_cov,
+                                       target_score_cov,
+                                       solve_args=solve_args)
+        return G.summary(alternatives=alternatives)
 
 class multiple_queries(object):
 

From 2da4d06ea7b55d31c5a9a465955311d09c67d322 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 14:59:29 -0700
Subject: [PATCH 067/187] cleanup of approx reference code

---
 selectinf/randomized/approx_reference.py      | 169 ++++++------------
 .../randomized/tests/test_approx_reference.py |   4 +-
 2 files changed, 59 insertions(+), 114 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index abee9bfbc..af8b936c8 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -74,6 +74,49 @@ def __init__(self,
                                               observed_target[j] + 1.5*_scale[j],
                                               num=ngrid)
 
+    def summary(self,
+                alternatives=None,
+                parameter=None,
+                level=0.9):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        """
+
+        if parameter is not None:
+            pivots = self.approx_pivots(parameter,
+                                        alternatives=alternatives)
+        else:
+            pivots = None
+
+        pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
+                                     alternatives=alternatives)
+        lower, upper = self._approx_intervals(level=level)
+
+        result = pd.DataFrame({'target':self.observed_target,
+                               'pvalue':pvalues,
+                               'lower_confidence':lower,
+                               'upper_confidence':upper})
+
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
+
+        return result
 
     def _approx_log_reference(self,
                              observed_target,
@@ -115,47 +158,6 @@ def _approx_log_reference(self,
 
         return np.asarray(ref_hat)
 
-    def approx_CDF(self,
-                   mean_parameter,
-                   target_cov,
-                   approx_log_ref,
-                   grid):
-
-        _approx_density = []
-        for k in range(grid.shape[0]):
-            # approx_log_ref[k] = P(selection | D = N + Gamma * grid[k])
-            _approx_density.append(np.exp(-np.true_divide((grid[k] - mean_parameter)**2,
-                                                          2 * target_cov) + approx_log_ref[k]))
-
-        _approx_density_ = np.asarray(_approx_density) / (np.asarray(_approx_density).sum())
-        return np.cumsum(_approx_density_)
-
-    def approx_ci(self,
-                  param_grid,
-                  stat_grid,
-                  target_cov,
-                  approx_log_ref,
-                  indx_obsv,
-                  level):
-
-        area = np.zeros(param_grid.shape[0])
-
-        for k in range(param_grid.shape[0]):
-            area_vec = self.approx_CDF(param_grid[k],
-                                       target_cov,
-                                       approx_log_ref,
-                                       stat_grid)
-
-            area[k] = area_vec[indx_obsv]
-
-        alpha = 1 - level
-        region = param_grid[(area >= alpha / 2.) & (area <= (1 - alpha / 2.))]
-
-        if region.size > 0:
-            return np.nanmin(region), np.nanmax(region)
-        else:
-            return 0., 0.
-
     def _construct_families(self):
 
         self._families = []
@@ -182,12 +184,14 @@ def _construct_families(self):
                     0.5 * (grid - self.observed_target[m])**2 / var_target)
             logW -= logW.max()
 
+            # construction of families follows `selectinf.learning.core`
+            
             self._families.append(discrete_family(grid,
                                                   np.exp(logW)))
             
-            logG = - 0.5 * grid**2 / var_target
-            logG -= logG.max()
-            import matplotlib.pyplot as plt
+            # logG = - 0.5 * grid**2 / var_target
+            # logG -= logG.max()
+            # import matplotlib.pyplot as plt
 
             # plt.plot(self.stat_grid[m][10:30], approx_log_ref[10:30])
             # plt.plot(self.stat_grid[m][:10], approx_log_ref[:10], 'r', linewidth=4)
@@ -198,11 +202,9 @@ def _construct_families(self):
             # plt.plot(grid, logW)
             # plt.plot(grid, logG)
 
-            # stop
-
-    def approx_pivots(self,
-                      mean_parameter,
-                      alternatives=None):
+    def _approx_pivots(self,
+                       mean_parameter,
+                       alternatives=None):
 
         if not hasattr(self, "_families"):
             self._construct_families()
@@ -216,6 +218,9 @@ def approx_pivots(self,
             family = self._families[m]
             observed_target = self.observed_target[m]
             var_target = self.target_cov[m, m]
+
+            # construction of pivot from families follows `selectinf.learning.core`
+
             _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
                               x=observed_target)
             if alternatives[m] == 'twosided':
@@ -228,8 +233,8 @@ def approx_pivots(self,
                 raise ValueError('alternative should be in ["twosided", "less", "greater"]')
         return pivot
 
-    def approx_intervals(self,
-                         level=0.9):
+    def _approx_intervals(self,
+                          level=0.9):
 
         if not hasattr(self, "_families"):
             self._construct_families()
@@ -237,6 +242,7 @@ def approx_intervals(self,
         lower, upper = [], []
 
         for m in range(self.ntarget):
+            # construction of intervals from families follows `selectinf.learning.core`
             family = self._families[m]
             observed_target = self.observed_target[m]
             l, u = family.equal_tailed_interval(observed_target,
@@ -247,64 +253,3 @@ def approx_intervals(self,
 
         return np.asarray(lower), np.asarray(upper)
 
-    def summary(self,
-                alternatives=None,
-                parameter=None,
-                level=0.9):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-
-        Parameters
-        ----------
-
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-
-        level : float
-            Confidence level.
-
-        """
-
-        if parameter is not None:
-            pivots = self.approx_pivots(parameter,
-                                        alternatives=alternatives)
-        else:
-            pivots = None
-
-        pvalues = self.approx_pivots(np.zeros_like(self.observed_target),
-                                     alternatives=alternatives)
-        lower, upper = self.approx_intervals(level=level)
-
-        result = pd.DataFrame({'target':self.observed_target,
-                               'pvalue':pvalues,
-                               'lower_confidence':lower,
-                               'upper_confidence':upper})
-
-        if not np.all(parameter == 0):
-            result.insert(4, 'pivot', pivots)
-            result.insert(5, 'parameter', parameter)
-
-        return result
-
-def _log_concave_approx(xval, yval):
-    """
-    Approximate a log-concave function
-    to full line based on sample.
-
-    Assumes `xval` is sorted
-    """
-
-    nu, nl = 10, 10
-    n = xval.shape[0]
-    D = np.vstack([np.ones(n), xval, xval**2]).T
-
-    Du = D[-nu:]
-    Qu = np.linalg(Du).dot(yval[-nu:])
-
-    Dl = D[:nl]
-    Ql = np.linalg(Dl).dot(yval[:nl])
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index fb1d94828..fbf57dd13 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -110,7 +110,7 @@ def test_approx_pivot(n=500,
                                                           cov_target,
                                                           cov_target_score)
 
-        pivot = approximate_grid_inf.approx_pivots(beta_target)
+        pivot = approximate_grid_inf._approx_pivots(beta_target)
 
         return pivot
 
@@ -175,7 +175,7 @@ def test_approx_ci(n=500,
                                                           cov_target,
                                                           cov_target_score)
 
-        lci, uci = approximate_grid_inf.approx_intervals(level)
+        lci, uci = approximate_grid_inf._approx_intervals(level)
 
         S = conv.approximate_grid_inference(observed_target,
                                             cov_target,

From ae7c0de630f8a92ab807427a47c751168ea4dc3e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 8 Jul 2020 15:02:15 -0700
Subject: [PATCH 068/187] BF: remove statsmodells dependency

---
 selectinf/randomized/tests/test_posterior.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 2b93b0422..c9e3fc118 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-import statsmodels.api as sm
 from scipy.stats import norm as ndist
 
 from ...tests.instance import gaussian_instance, HIV_NRTI
@@ -244,8 +243,8 @@ def test_hiv_data(nsample=10000,
     n, p = X.shape
     X /= np.sqrt(n)
     
-    ols_fit = sm.OLS(Y, X).fit()
-    _sigma = np.linalg.norm(ols_fit.resid) / np.sqrt(n - p - 1)
+    ols_fit = np.linalg.pinv(X).dot(Y)
+    _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
 
     const = split_lasso.gaussian
 

From 07cfc9e0c9ae4847d3c3ecb21c6e9af2ed25f5f6 Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Sat, 11 Jul 2020 18:34:20 -0400
Subject: [PATCH 069/187] commit before switch

---
 selectinf/randomized/tests/test_approx_reference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index fbf57dd13..1832b7cbe 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -221,7 +221,7 @@ def main(nsim=300, CI = False):
                                       p=100,
                                       signal_fac=1.,
                                       s=5,
-                                      sigma=2.,
+                                      sigma=3.,
                                       rho=0.4,
                                       randomizer_scale=1.)
 
@@ -232,4 +232,4 @@ def main(nsim=300, CI = False):
             print("iteration completed ", n + 1)
 
 if __name__ == "__main__":
-    main(nsim=20, CI = True)
+    main(nsim=40, CI = False)

From 69713fd3f9bff88daadd8eb9125daf08a700cf5b Mon Sep 17 00:00:00 2001
From: snigdhagit <psnigdha@umich.edu>
Date: Mon, 13 Jul 2020 19:17:40 -0400
Subject: [PATCH 070/187] test bias

---
 selectinf/randomized/query.py                 |   6 +-
 .../tests/test_selective_MLE_high.py          |  75 ++++++-----
 selectinf/randomized/tests/test_topK.py       | 124 ++++++++++++++----
 3 files changed, 146 insertions(+), 59 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index df890a2ef..b46aab9a7 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1681,11 +1681,13 @@ def selective_MLE(observed_target,
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
+    useC= False
+    print("useC", useC)
     if useC:
         solver = solve_barrier_affine_C
     else:
         solver = _solve_barrier_affine_py
-
+    
     val, soln, hess = solver(conjugate_arg,
                              prec_opt,
                              init_soln,
@@ -1696,6 +1698,8 @@ def selective_MLE(observed_target,
     final_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln)))
     ind_unbiased_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean
                                                                                             - init_soln)))
+
+    print("check within MLE ", soln, init_soln)
     L = target_lin.T.dot(prec_opt)
     observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
     observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 578ae66ec..e38bde4fa 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -83,7 +83,7 @@ def test_full_targets(n=200,
 
 def test_selected_targets(n=2000, 
                           p=200, 
-                          signal_fac=10.,
+                          signal_fac=1.,
                           s=5, 
                           sigma=3, 
                           rho=0.4, 
@@ -147,43 +147,18 @@ def test_selected_targets(n=2000,
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
-
 
-def main(nsim=500, full=False):
-    P0, PA, cover, length_int = [], [], [], []
-    from statsmodels.distributions import ECDF
-
-    n, p, s = 500, 100, 5
-
-    for i in range(nsim):
-        if full:
-            if n > p:
-                full_dispersion = True
-            else:
-                full_dispersion = False
-            p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-            avg_length = intervals[:, 1] - intervals[:, 0]
-        else:
-            full_dispersion = True
-            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s,
-                                                              full_dispersion=full_dispersion)
-            avg_length = intervals[:, 1] - intervals[:, 0]
-
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        print(
-            np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
-            np.mean(avg_length), 'null pvalue + power + length')
+            print("observed_opt_state ", conv.observed_opt_state)
+            # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 def test_instance():
 
     n, p, s = 500, 100, 5
     X = np.random.standard_normal((n, p))
     beta = np.zeros(p)
-    #beta[:s] = np.sqrt(2 * np.log(p) / n)
+    beta[:s] = np.sqrt(2 * np.log(p) / n)
     Y = X.dot(beta) + np.random.standard_normal(n)
 
     scale_ = np.std(Y)
@@ -215,17 +190,47 @@ def test_instance():
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
 
     coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+    print("observed_opt_state ", L.observed_opt_state)
+    #print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
     return coverage
 
-def main(nsim=500):
+# def main(nsim=500):
+#
+#     cover = []
+#     for i in range(nsim):
+#
+#         cover_ = test_instance()
+#         cover.extend(cover_)
+#         print(np.mean(cover), 'coverage so far ')
+
+def main(nsim=500, full=False):
+    P0, PA, cover, length_int = [], [], [], []
+    from statsmodels.distributions import ECDF
+
+    n, p, s = 500, 100, 5
 
-    cover = []
     for i in range(nsim):
+        if full:
+            if n > p:
+                full_dispersion = True
+            else:
+                full_dispersion = False
+            p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+            avg_length = intervals[:, 1] - intervals[:, 0]
+        else:
+            full_dispersion = True
+            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s,
+                                                              full_dispersion=full_dispersion)
+            avg_length = intervals[:, 1] - intervals[:, 0]
 
-        cover_ = test_instance()
         cover.extend(cover_)
-        print(np.mean(cover), 'coverage so far ')
+        P0.extend(p0)
+        PA.extend(pA)
+        print(
+            np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+            np.mean(avg_length), 'null pvalue + power + length')
+
 
 if __name__ == "__main__":
-    main(nsim=500)
+    main(nsim=100)
diff --git a/selectinf/randomized/tests/test_topK.py b/selectinf/randomized/tests/test_topK.py
index 000c45aba..8091f8ac3 100644
--- a/selectinf/randomized/tests/test_topK.py
+++ b/selectinf/randomized/tests/test_topK.py
@@ -10,7 +10,7 @@ def test_topK(n=500,
               s=5, 
               sigma=3, 
               rho=0.4, 
-              randomizer_scale=0.25,
+              randomizer_scale=0.50,
               use_MLE=True,
               marginal=False):
 
@@ -85,29 +85,107 @@ def test_both():
     test_topK(marginal=True)
     test_topK(marginal=False)
 
-def main(nsim=5000, use_MLE=False):
+def test_bias_topK(n=500,
+                   p=50,
+                   s=5,
+                   sigma=3,
+                   rho=0.4,
+                   randomizer_scale=0.50,
+                   K=5,
+                   marginal=False):
 
-    import matplotlib.pyplot as plt
-    import statsmodels.api as sm
-    U = np.linspace(0, 1, 101)
+    while True:
+        X = gaussian_instance(n=n,
+                              p=p,
+                              equicorrelated=False,
+                              rho=rho)[0]
+        W = rho**(np.fabs(np.subtract.outer(np.arange(p), np.arange(p))))
+        sqrtW = np.linalg.cholesky(W)
+        sigma = 0.15
+        Z = np.random.standard_normal(p).dot(sqrtW.T) * sigma
+        beta = (2 * np.random.binomial(1, 0.5, size=(p,)) - 1) * 5 * sigma
+        beta[s:] = 0
+        np.random.shuffle(beta)
+
+        true_mean = W.dot(beta)
+        score = Z + true_mean
+        idx = np.arange(p)
+
+        n, p = X.shape
+
+        randomizer = randomization.isotropic_gaussian(p, randomizer_scale * sigma)
+        topK_select = topK(score,
+                           W * sigma**2,
+                           randomizer,
+                           K)
+
+        boundary = topK_select.fit()
+        nonzero = boundary != 0
+
+        if nonzero.sum() > 0:
+
+            if marginal:
+                beta_target = true_mean[nonzero]
+                (observed_target,
+                 cov_target,
+                 crosscov_target_score,
+                 alternatives) = topK_select.marginal_targets(nonzero)
+            else:
+                beta_target = beta[nonzero]
+                (observed_target,
+                 cov_target,
+                 crosscov_target_score,
+                 alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
+
+            result = topK_select.selective_MLE(observed_target,
+                                               cov_target,
+                                               crosscov_target_score)[0]
+
+            bias_mle = np.asarray(result['MLE'])-beta_target
+            bias_indest = np.asarray(result['unbiased'])-beta_target
+            print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
+
+            return bias_mle, bias_indest
+
+
+# def main(nsim=5000, use_MLE=False):
+#
+#     import matplotlib.pyplot as plt
+#     import statsmodels.api as sm
+#     U = np.linspace(0, 1, 101)
+#
+#     P0, PA, cover, length_int = [], [], [], []
+#     for i in range(nsim):
+#         p0, pA, cover_, intervals = test_topK(use_MLE=use_MLE)
+#
+#         cover.extend(cover_)
+#         P0.extend(p0)
+#         PA.extend(pA)
+#         print(np.mean(cover),'coverage so far')
+#
+#         period = 10
+#         if use_MLE:
+#             period = 50
+#         if i % period == 0 and i > 0:
+#             plt.clf()
+#             plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null')
+#             plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt')
+#             plt.plot([0, 1], [0, 1], 'k--')
+#             plt.legend()
+#             plt.savefig('topK_pvals.pdf')
+
+
+def main(nsim=500):
+    _bias_mle = []
+    _bias_indest = []
 
-    P0, PA, cover, length_int = [], [], [], []
     for i in range(nsim):
-        p0, pA, cover_, intervals = test_topK(use_MLE=use_MLE)
-
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        print(np.mean(cover),'coverage so far')
-
-        period = 10
-        if use_MLE:
-            period = 50
-        if i % period == 0 and i > 0:
-            plt.clf()
-            plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null')
-            plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.legend()
-            plt.savefig('topK_pvals.pdf')
+        bias_mle, bias_indest = test_bias_topK()
+        _bias_mle.extend(bias_mle)
+        _bias_indest.extend(bias_indest)
+
+        print(np.mean(_bias_mle), np.mean(_bias_indest), 'bias so far: mle and independent estimate ')
+
 
+if __name__ == "__main__":
+    main(nsim=500)

From 54478ed23b24df31f801d5deb5cdecf85b986883 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sat, 9 Jan 2021 13:31:51 -0500
Subject: [PATCH 071/187] commit before switch

---
 selectinf/randomized/query.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index b46aab9a7..aa1cbd8a6 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1699,7 +1699,6 @@ def selective_MLE(observed_target,
     ind_unbiased_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean
                                                                                             - init_soln)))
 
-    print("check within MLE ", soln, init_soln)
     L = target_lin.T.dot(prec_opt)
     observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
     observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))

From e45a42e54961823b9d842c31c8ce1c34d2122d49 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Wed, 3 Feb 2021 13:56:21 -0500
Subject: [PATCH 072/187] MCMC free pivots for group lasso

---
 .../randomized/approx_reference_grouplasso.py | 776 ++++++++++++++++++
 .../tests/test_approx_reference_grouplasso.py |  88 ++
 selectinf/tests/instance.py                   |  94 +++
 3 files changed, 958 insertions(+)
 create mode 100644 selectinf/randomized/approx_reference_grouplasso.py
 create mode 100644 selectinf/randomized/tests/test_approx_reference_grouplasso.py

diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
new file mode 100644
index 000000000..f028fcbe3
--- /dev/null
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -0,0 +1,776 @@
+from __future__ import print_function
+from scipy.linalg import block_diag
+from scipy.stats import norm as ndist
+from scipy.interpolate import interp1d
+
+import collections
+import numpy as np
+from numpy import log
+from numpy.linalg import norm, qr, inv, eig
+import pandas as pd
+
+import regreg.api as rr
+from .randomization import randomization
+from ..base import restricted_estimator
+from .query import _solve_barrier_affine_py
+from ..distributions.discrete_family import discrete_family
+
+class group_lasso(object):
+
+    def __init__(self,
+                 loglike,
+                 groups,
+                 weights,
+                 ridge_term,
+                 randomizer,
+                 use_lasso=True,  # should lasso solver be used where applicable - defaults to True
+                 perturb=None):
+
+        _check_groups(groups)  # make sure groups looks sensible
+
+        # log likelihood : quadratic loss
+        self.loglike = loglike
+        self.nfeature = self.loglike.shape[0]
+
+        # ridge parameter
+        self.ridge_term = ridge_term
+
+        # group lasso penalty (from regreg)
+        # use regular lasso penalty if all groups are size 1
+        if use_lasso and groups.size == np.unique(groups).size:
+            # need to provide weights an an np.array rather than a dictionary
+            weights_np = np.array([w[1] for w in sorted(weights.items())])
+            self.penalty = rr.weighted_l1norm(weights=weights_np,
+                                              lagrange=1.)
+        else:
+            self.penalty = rr.group_lasso(groups,
+                                          weights=weights,
+                                          lagrange=1.)
+
+        # store groups as a class variable since the non-group lasso doesn't
+        self.groups = groups
+
+        self._initial_omega = perturb
+
+        # gaussian randomization
+        self.randomizer = randomizer
+
+    def fit(self,
+            solve_args={'tol': 1.e-12, 'min_its': 50},
+            perturb=None):
+
+        # solve the randomized version of group lasso
+        (self.initial_soln,
+         self.initial_subgrad) = self._solve_randomized_problem(perturb=perturb,
+                                                                solve_args=solve_args)
+
+        # initialize variables
+        active_groups = []  # active group labels
+        active_dirs = {}  # dictionary: keys are group labels, values are unit-norm coefficients
+        unpenalized = []  # selected groups with no penalty
+        overall = np.ones(self.nfeature, np.bool)  # mask of active features
+        ordered_groups = []  # active group labels sorted by label
+        ordered_opt = []  # gamma's ordered by group labels
+        ordered_vars = []  # indices "ordered" by sorting group labels
+
+        tol = 1.e-20
+
+        # now we are collecting the directions and norms of the active groups
+        for g in sorted(np.unique(self.groups)):  # g is group label
+
+            group_mask = self.groups == g
+            soln = self.initial_soln  # do not need to keep setting this
+
+            if norm(soln[group_mask]) > tol * norm(soln):  # is group g appreciably nonzero
+                ordered_groups.append(g)
+
+                # variables in active group
+                ordered_vars.extend(np.flatnonzero(group_mask))
+
+                if self.penalty.weights[g] == 0:
+                    unpenalized.append(g)
+
+                else:
+                    active_groups.append(g)
+                    active_dirs[g] = soln[group_mask] / norm(soln[group_mask])
+
+                ordered_opt.append(norm(soln[group_mask]))
+            else:
+                overall[group_mask] = False
+
+        self.selection_variable = {'directions': active_dirs,
+                                   'active_groups': active_groups}  # kind of redundant with keys of active_dirs
+
+        self._ordered_groups = ordered_groups
+
+        # exception if no groups are selected
+        if len(self.selection_variable['active_groups']) == 0:
+            return np.sign(soln), soln
+
+        # otherwise continue as before
+        self.observed_opt_state = np.hstack(ordered_opt)  # gammas as array
+
+        _beta_unpenalized = restricted_estimator(self.loglike,  # refit OLS on E
+                                                 overall,
+                                                 solve_args=solve_args)
+
+        beta_bar = np.zeros(self.nfeature)
+        beta_bar[overall] = _beta_unpenalized  # refit OLS beta with zeros
+        self._beta_full = beta_bar
+
+        X, y = self.loglike.data
+        W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))  # all 1's for LS
+        opt_linearNoU = np.dot(X.T, X[:, ordered_vars] * W[:, np.newaxis])
+
+        for i, var in enumerate(ordered_vars):
+            opt_linearNoU[var, i] += self.ridge_term
+
+        opt_offset = self.initial_subgrad
+
+        self.observed_score_state = -opt_linearNoU.dot(_beta_unpenalized)
+        self.observed_score_state[~overall] += self.loglike.smooth_objective(beta_bar, 'grad')[~overall]
+
+        active_signs = np.sign(self.initial_soln)
+        active = np.flatnonzero(active_signs)
+        self.active = active
+
+        def compute_Vg(ug):
+            pg = ug.size  # figure out size of g'th group
+            if pg > 1:
+                Z = np.column_stack((ug, np.eye(pg, pg - 1)))
+                Q, _ = qr(Z)
+                Vg = Q[:, 1:]  # drop the first column
+            else:
+                Vg = np.zeros((1, 0))  # if the group is size one, the orthogonal complement is empty
+            return Vg
+
+        def compute_Lg(g):
+            pg = active_dirs[g].size
+            Lg = self.penalty.weights[g] * np.eye(pg)
+            return Lg
+
+        sorted_active_dirs = collections.OrderedDict(sorted(active_dirs.items()))
+
+        Vs = [compute_Vg(ug) for ug in sorted_active_dirs.values()]
+        V = block_diag(*Vs)  # unpack the list
+        Ls = [compute_Lg(g) for g in sorted_active_dirs]
+        L = block_diag(*Ls)  # unpack the list
+        XE = X[:, ordered_vars]  # changed to ordered_vars
+        Q = XE.T.dot(self._W[:, None] * XE)
+        QI = inv(Q)
+        C = V.T.dot(QI).dot(L).dot(V)
+
+        self.XE = XE
+        self.Q = Q
+        self.QI = QI
+        self.C = C
+
+        U = block_diag(*[ug for ug in sorted_active_dirs.values()]).T
+
+        self.opt_linear = opt_linearNoU.dot(U)
+        self.active_dirs = active_dirs
+        self.opt_offset = opt_offset
+        self.ordered_vars = ordered_vars
+
+        self.linear_part = -np.eye(self.observed_opt_state.shape[0])
+        self.offset = np.zeros(self.observed_opt_state.shape[0])
+
+        # print("K.K.T. map", np.allclose(self._initial_omega, self.observed_score_state + self.opt_linear.dot(self.observed_opt_state)
+        #                                + self.opt_offset, rtol=1e-03))
+        return active_signs, soln
+
+    def _solve_randomized_problem(self,
+                                  perturb=None,
+                                  solve_args={'tol': 1.e-15, 'min_its': 100}):
+
+        # take a new perturbation if supplied
+        if perturb is not None:
+            self._initial_omega = perturb
+        if self._initial_omega is None:
+            self._initial_omega = self.randomizer.sample()
+
+        quad = rr.identity_quadratic(self.ridge_term,
+                                     0,
+                                     -self._initial_omega,
+                                     0)
+
+        problem = rr.simple_problem(self.loglike, self.penalty)
+
+        # if all groups are size 1, set up lasso penalty and run usual lasso solver... (see existing code)...
+
+        initial_soln = problem.solve(quad, **solve_args)
+        initial_subgrad = -(self.loglike.smooth_objective(initial_soln,
+                                                          'grad') +
+                            quad.objective(initial_soln, 'grad'))
+
+        return initial_soln, initial_subgrad
+
+    @staticmethod
+    def gaussian(X,
+                 Y,
+                 groups,
+                 weights,
+                 sigma=1.,
+                 quadratic=None,
+                 ridge_term=0.,
+                 perturb=None,
+                 use_lasso=True,  # should lasso solver be used when applicable - defaults to True
+                 randomizer_scale=None):
+
+        loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
+        n, p = X.shape
+
+        mean_diag = np.mean((X ** 2).sum(0))
+        if ridge_term is None:
+            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+
+        if randomizer_scale is None:
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+
+        randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
+
+        return group_lasso(loglike,
+                           groups,
+                           weights,
+                           ridge_term,
+                           randomizer,
+                           use_lasso,
+                           perturb)
+
+    def _setup_implied_gaussian(self):
+
+        _, prec = self.randomizer.cov_prec
+
+        if np.asarray(prec).shape in [(), (0,)]:
+            cond_precision = self.opt_linear.T.dot(self.opt_linear) * prec
+            cond_cov = inv(cond_precision)
+            logdens_linear = cond_cov.dot(self.opt_linear.T) * prec
+        else:
+            cond_precision = self.opt_linear.T.dot(prec.dot(self.opt_linear))
+            cond_cov = inv(cond_precision)
+            logdens_linear = cond_cov.dot(self.opt_linear.T).dot(prec)
+
+        cond_mean = -logdens_linear.dot(self.observed_score_state + self.opt_offset)
+        self.cond_mean = cond_mean
+        self.cond_cov = cond_cov
+        self.cond_precision = cond_precision
+        self.logdens_linear = logdens_linear
+
+        return cond_mean, cond_cov, cond_precision, logdens_linear
+
+    def selective_MLE(self,
+                      solve_args={'tol': 1.e-12},
+                      level=0.9,
+                      useJacobian=True,
+                      dispersion=None):
+
+        """Do selective_MLE for group_lasso
+        Note: this masks the selective_MLE inherited from query
+        because that is not adapted for the group_lasso. Also, assumes
+        you have already run the fit method since this uses results
+        from that method.
+        Parameters
+        ----------
+        observed_target: from selected_targets
+        target_cov: from selected_targets
+        target_cov_score: from selected_targets
+        init_soln:  (opt_state) initial (observed) value of optimization variables
+        cond_mean: conditional mean of optimization variables (model on _setup_implied_gaussian)
+        cond_cov: conditional variance of optimization variables (model on _setup_implied_gaussian)
+        logdens_linear: (model on _setup_implied_gaussian)
+        linear_part: like A_scaling (from lasso)
+        offset: like b_scaling (from lasso)
+        solve_args: passed on to solver
+        level: level of confidence intervals
+        useC: whether to use python or C solver
+        JacobianPieces: (use self.C defined in fitting)
+        """
+
+        self._setup_implied_gaussian()  # Calculate useful quantities
+        (observed_target, target_cov, target_score_cov, alternatives) = self.selected_targets(dispersion)
+
+        init_soln = self.observed_opt_state  # just the gammas
+        cond_mean = self.cond_mean
+        cond_cov = self.cond_cov
+        logdens_linear = self.logdens_linear
+        linear_part = self.linear_part
+        offset = self.offset
+
+        if np.asarray(observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        observed_target = np.atleast_1d(observed_target)
+        prec_target = inv(target_cov)
+
+        # target_lin determines how the conditional mean of optimization variables
+        # vary with target
+        # logdens_linear determines how the argument of the optimization density
+        # depends on the score, not how the mean depends on score, hence the minus sign
+
+        target_lin = - logdens_linear.dot(target_score_cov.T.dot(prec_target))
+        target_offset = cond_mean - target_lin.dot(observed_target)
+
+        prec_opt = self.cond_precision
+
+        conjugate_arg = prec_opt.dot(cond_mean)
+
+        val, soln, hess = solve_barrier_affine_jacobian_py(conjugate_arg,
+                                                           prec_opt,
+                                                           init_soln,
+                                                           linear_part,
+                                                           offset,
+                                                           self.C,
+                                                           self.active_dirs,
+                                                           useJacobian,
+                                                           **solve_args)
+
+        log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
+
+        final_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln)))
+        ind_unbiased_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean
+                                                                                                - init_soln)))
+        L = target_lin.T.dot(prec_opt)
+        observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
+        observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))
+
+        Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+        pvalues = ndist.cdf(Z_scores)
+        pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
+
+        alpha = 1. - level
+        quantile = ndist.ppf(1 - alpha / 2.)
+        intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
+                               final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
+
+        result = pd.DataFrame({'MLE': final_estimator,
+                               'SE': np.sqrt(np.diag(observed_info_mean)),
+                               'Zvalue': Z_scores,
+                               'pvalue': pvalues,
+                               'lower_confidence': intervals[:, 0],
+                               'upper_confidence': intervals[:, 1],
+                               'unbiased': ind_unbiased_estimator})
+
+        return result, observed_info_mean, log_ref
+
+    def selected_targets(self,
+                         dispersion=None,
+                         solve_args={'tol': 1.e-12, 'min_its': 50}):
+
+        X, y = self.loglike.data
+        n, p = X.shape
+
+        XE = self.XE
+        Q = self.Q
+        observed_target = restricted_estimator(self.loglike, self.ordered_vars, solve_args=solve_args)
+        _score_linear = -XE.T.dot(self._W[:, None] * X).T
+        alternatives = ['twosided'] * len(self.active)
+
+        if dispersion is None:  # use Pearson's X^2
+            dispersion = ((y - self.loglike.saturated_loss.mean_function(
+                XE.dot(observed_target))) ** 2 / self._W).sum() / (n - XE.shape[1])
+
+        cov_target = self.QI * dispersion
+        crosscov_target_score = _score_linear.dot(self.QI).T * dispersion
+
+        return (observed_target,
+                cov_target,
+                crosscov_target_score,
+                alternatives)
+
+
+class approximate_grid_inference(object):
+
+    def __init__(self,
+                 query,
+                 dispersion,
+                 solve_args={'tol': 1.e-12}):
+
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        query : `gaussian_query`
+            A Gaussian query which has information
+            to describe implied Gaussian.
+        observed_target : ndarray
+            Observed estimate of target.
+        target_cov : ndarray
+            Estimated covaraince of target.
+        target_score_cov : ndarray
+            Estimated covariance of target and score of randomized query.
+        solve_args : dict, optional
+            Arguments passed to solver.
+        """
+
+        self.solve_args = solve_args
+
+        result, inverse_info = query.selective_MLE(dispersion=dispersion)[:2]
+
+        (observed_target, target_cov, target_score_cov, alternatives) = query.selected_targets(dispersion)
+
+        self.observed_target = observed_target
+        self.target_score_cov = target_score_cov
+        self.target_cov = target_cov
+
+        self.linear_part = query.linear_part
+        self.offset = query.offset
+
+        self.logdens_linear = query.logdens_linear
+        self.cond_mean = query.cond_mean
+        self.prec_opt = np.linalg.inv(query.cond_cov)
+        self.cond_cov = query.cond_cov
+        self.C = query.C
+        self.active_dirs = query.active_dirs
+
+        self.init_soln = query.observed_opt_state
+
+        self.ntarget = ntarget = target_cov.shape[0]
+        _scale = 4 * np.sqrt(np.diag(inverse_info))
+        ngrid = 40
+
+        self.stat_grid = np.zeros((ntarget, ngrid))
+        for j in range(ntarget):
+            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                               observed_target[j] + 1.5 * _scale[j],
+                                               num=ngrid)
+    def summary(self,
+                alternatives=None,
+                parameter=None,
+                level=0.9):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+        level : float
+            Confidence level.
+        """
+
+        if parameter is not None:
+            pivots = self.approx_pivots(parameter,
+                                        alternatives=alternatives)
+        else:
+            pivots = None
+
+        pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
+                                      alternatives=alternatives)
+        lower, upper = self._approx_intervals(level=level)
+
+        result = pd.DataFrame({'target': self.observed_target,
+                               'pvalue': pvalues,
+                               'lower_confidence': lower,
+                               'upper_confidence': upper})
+
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
+
+        return result
+
+    def _approx_log_reference(self,
+                              observed_target,
+                              target_cov,
+                              target_score_cov,
+                              grid):
+
+        """
+        Approximate the log of the reference density on a grid.
+        """
+        if np.asarray(observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        prec_target = np.linalg.inv(target_cov)
+        target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
+
+        ref_hat = []
+        solver = _solve_barrier_affine_py
+
+        for k in range(grid.shape[0]):
+
+            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
+                              self.cond_mean)
+            conjugate_arg = self.prec_opt.dot(cond_mean_grid)
+
+            val, soln, _ = solver(conjugate_arg,
+                               self.prec_opt,
+                               self.init_soln,
+                               self.linear_part,
+                               self.offset,
+                               **self.solve_args)
+
+            log_jacob = jacobian_grad_hess(soln, self.C, self.active_dirs)
+
+            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.) + log_jacob[0])
+
+        return np.asarray(ref_hat)
+
+    def _construct_families(self):
+
+        self._families = []
+        for m in range(self.ntarget):
+            p = self.target_score_cov.shape[1]
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
+            var_target = target_cov_uni[0, 0]
+            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+
+            approx_log_ref = self._approx_log_reference(observed_target_uni,
+                                                        target_cov_uni,
+                                                        target_score_cov_uni,
+                                                        self.stat_grid[m])
+
+            approx_fn = interp1d(self.stat_grid[m],
+                                 approx_log_ref,
+                                 kind='quadratic',
+                                 bounds_error=False,
+                                 fill_value='extrapolate')
+
+            grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+            logW = (approx_fn(grid) -
+                    0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
+            logW -= logW.max()
+
+            # construction of families follows `selectinf.learning.core`
+
+            self._families.append(discrete_family(grid,
+                                                  np.exp(logW)))
+
+
+    def _approx_pivots(self,
+                       mean_parameter,
+                       alternatives=None):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+
+        if alternatives is None:
+            alternatives = ['twosided'] * self.ntarget
+
+        pivot = []
+
+        for m in range(self.ntarget):
+            print("variable computed ", m)
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+            var_target = self.target_cov[m, m]
+
+            # construction of pivot from families follows `selectinf.learning.core`
+
+            _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
+                              x=observed_target)
+            if alternatives[m] == 'twosided':
+                pivot.append(2 * min(_cdf, 1 - _cdf))
+            elif alternatives[m] == 'greater':
+                pivot.append(1 - _cdf)
+            elif alternatives[m] == 'less':
+                pivot.append(_cdf)
+            else:
+                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
+        return pivot
+
+    def _approx_intervals(self,
+                          level=0.9):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+
+        lower, upper = [], []
+
+        for m in range(self.ntarget):
+            # construction of intervals from families follows `selectinf.learning.core`
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+            l, u = family.equal_tailed_interval(observed_target,
+                                                alpha=1 - level)
+            var_target = self.target_cov[m, m]
+            lower.append(l * var_target + observed_target)
+            upper.append(u * var_target + observed_target)
+
+        return np.asarray(lower), np.asarray(upper)
+
+
+def solve_barrier_affine_jacobian_py(conjugate_arg,
+                                     precision,
+                                     feasible_point,
+                                     con_linear,
+                                     con_offset,
+                                     C,
+                                     active_dirs,
+                                     useJacobian=True,
+                                     step=1,
+                                     nstep=2000,
+                                     min_its=500,
+                                     tol=1.e-12):
+    """
+    This needs to be updated to actually use the Jacobian information (in self.C)
+    arguments
+    conjugate_arg: \\bar{\\Sigma}^{-1} \bar{\\mu}
+    precision:  \\bar{\\Sigma}^{-1}
+    feasible_point: gamma's from fitting
+    con_linear: linear part of affine constraint used for barrier function
+    con_offset: offset part of affine constraint used for barrier function
+    C: V^T Q^{-1} \\Lambda V
+    active_dirs:
+    """
+    scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    def objective(gs):
+        p1 = -gs.T.dot(conjugate_arg)
+        p2 = gs.T.dot(precision).dot(gs) / 2.
+        if useJacobian:
+            p3 = - jacobian_grad_hess(gs, C, active_dirs)[0]
+        else:
+            p3 = 0
+        p4 = log(1. + 1. / ((con_offset - con_linear.dot(gs)) / scaling)).sum()
+        return p1 + p2 + p3 + p4
+
+    def grad(gs):
+        p1 = -conjugate_arg + precision.dot(gs)
+        p2 = -con_linear.T.dot(1. / (scaling + con_offset - con_linear.dot(gs)))
+        if useJacobian:
+            p3 = - jacobian_grad_hess(gs, C, active_dirs)[1]
+        else:
+            p3 = 0
+        p4 = 1. / (con_offset - con_linear.dot(gs))
+        return p1 + p2 + p3 + p4
+
+    def barrier_hessian(gs):  # contribution of barrier and jacobian to hessian
+        p1 = con_linear.T.dot(np.diag(-1. / ((scaling + con_offset - con_linear.dot(gs)) ** 2.)
+                                      + 1. / ((con_offset - con_linear.dot(gs)) ** 2.))).dot(con_linear)
+        if useJacobian:
+            p2 = - jacobian_grad_hess(gs, C, active_dirs)[2]
+        else:
+            p2 = 0
+        return p1 + p2
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        cur_grad = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * cur_grad
+            if np.all(con_offset - con_linear.dot(proposal) > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * cur_grad
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+            if count >= 20:
+                if not (np.isnan(proposed_value) or np.isnan(current_value)):
+                    break
+                else:
+                    raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value))
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its:
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    hess = inv(precision + barrier_hessian(current))
+    return current_value, current, hess
+
+
+# Jacobian calculations
+def calc_GammaMinus(gamma, active_dirs):
+    """Calculate Gamma^minus (as a function of gamma vector, active directions)
+    """
+    to_diag = [[g] * (ug.size - 1) for (g, ug) in zip(gamma, active_dirs.values())]
+    return block_diag(*[i for gp in to_diag for i in gp])
+
+
+def jacobian_grad_hess(gamma, C, active_dirs):
+    """ Calculate the log-Jacobian (scalar), gradient (gamma.size vector) and hessian (gamma.size square matrix)
+    """
+    if C.shape == (0, 0):  # when all groups are size one, C will be an empty array
+        return 0, 0, 0
+    else:
+        GammaMinus = calc_GammaMinus(gamma, active_dirs)
+
+        # eigendecomposition
+        evalues, evectors = eig(GammaMinus + C)
+
+        # log Jacobian
+        J = log(evalues).sum()
+
+        # inverse
+        GpC_inv = evectors.dot(np.diag(1 / evalues).dot(evectors.T))
+
+        # summing matrix (gamma.size by C.shape[0])
+        S = block_diag(*[np.ones((1, ug.size - 1)) for ug in active_dirs.values()])
+
+        # gradient
+        grad_J = S.dot(GpC_inv.diagonal())
+
+        # hessian
+        hess_J = -S.dot(np.multiply(GpC_inv, GpC_inv.T).dot(S.T))
+
+        return J, grad_J, hess_J
+
+def _check_groups(groups):
+    """Make sure that the user-specific groups are ok
+    There are a number of assumptions that group_lasso makes about
+    how groups are specified. Specifically, we assume that
+    `groups` is a 1-d array_like of integers that are sorted in
+    increasing order, start at 0, and have no gaps (e.g., if there
+    is a group 2 and a group 4, there must also be at least one
+    feature in group 3).
+    This function checks the user-specified group scheme and
+    raises an exception if it finds any problems.
+    Sorting feature groups is potentially tedious for the user and
+    in future we might do this for them.
+    """
+
+    # check array_like
+    agroups = np.array(groups)
+
+    # check dimension
+    if len(agroups.shape) != 1:
+        raise ValueError("Groups are not a 1D array_like")
+
+    # check sorted
+    if np.any(agroups[:-1] > agroups[1:]) < 0:
+        raise ValueError("Groups are not sorted")
+
+    # check integers
+    if not np.issubdtype(agroups.dtype, np.integer):
+        raise TypeError("Groups are not integers")
+
+    # check starts with 0
+    if not np.amin(agroups) == 0:
+        raise ValueError("First group is not 0")
+
+    # check for no skipped groups
+    if not np.all(np.diff(np.unique(agroups)) == 1):
+        raise ValueError("Some group is skipped")
diff --git a/selectinf/randomized/tests/test_approx_reference_grouplasso.py b/selectinf/randomized/tests/test_approx_reference_grouplasso.py
new file mode 100644
index 000000000..0f4d64539
--- /dev/null
+++ b/selectinf/randomized/tests/test_approx_reference_grouplasso.py
@@ -0,0 +1,88 @@
+import numpy as np
+
+from ...tests.instance import gaussian_instance, gaussian_group_instance
+from ..approx_reference_grouplasso import group_lasso, approximate_grid_inference
+
+def test_approx_pivot(n=500,
+                      p=200,
+                      signal_fac=0.1,
+                      sgroup=3,
+                      groups=np.arange(50).repeat(4),
+                      sigma=3.,
+                      rho=0.3,
+                      randomizer_scale=1,
+                      weight_frac=1.2):
+
+    inst, const = gaussian_group_instance, group_lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      sgroup=sgroup,
+                      groups=groups,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    penalty_weights = dict([(i, weight_frac * sigma_ * np.sqrt(2 * np.log(p))) for i in np.unique(groups)])
+
+    conv = const(X,
+                 Y,
+                 groups,
+                 penalty_weights,
+                 randomizer_scale=randomizer_scale * dispersion)
+
+    signs, _ = conv.fit()
+    nonzero = signs != 0
+    print("number of selected variables ", nonzero.sum())
+
+    if nonzero.sum()>0:
+
+        conv._setup_implied_gaussian()
+
+        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+        approximate_grid_inf = approximate_grid_inference(conv,
+                                                          dispersion)
+
+        pivot = approximate_grid_inf._approx_pivots(beta_target)
+
+        return pivot
+
+
+def main(nsim=300, CI = False):
+
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions.empirical_distribution import ECDF
+    if CI is False:
+        _pivot = []
+        for i in range(nsim):
+            _pivot.extend(test_approx_pivot(n=500,
+                                            p=100,
+                                            signal_fac=0.3,
+                                            sgroup=3,
+                                            groups=np.arange(20).repeat(5),
+                                            sigma=1.,
+                                            rho=0.20,
+                                            randomizer_scale=0.5,
+                                            weight_frac=1.))
+
+            print("iteration completed ", i)
+
+        plt.clf()
+        ecdf_MLE = ECDF(np.asarray(_pivot))
+        grid = np.linspace(0, 1, 101)
+        plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^')
+        plt.plot(grid, grid, 'k--')
+        plt.show()
+
+if __name__ == "__main__":
+
+    main(nsim=50, CI = False)
diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py
index 15826a148..9a75a8ded 100644
--- a/selectinf/tests/instance.py
+++ b/selectinf/tests/instance.py
@@ -373,3 +373,97 @@ def HIV_NRTI(drug='3TC',
         Y -= Y.mean()
         X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:]
     return X_NRTI, Y, np.array(NRTI_muts)
+
+
+def gaussian_group_instance(n=100, p=200, sgroup=7, sigma=5, rho=0., signal=7,
+                            random_signs=False, df=np.inf,
+                            scale=True, center=True,
+                            groups=np.arange(20).repeat(10),
+                            equicorrelated=True):
+    """A testing instance for the group LASSO.
+    If equicorrelated is True design is equi-correlated in the population,
+    normalized to have columns of norm 1.
+    If equicorrelated is False design is auto-regressive.
+    For the default settings, a $\\lambda$ of around 13.5
+    corresponds to the theoretical $E(\\|X^T\\epsilon\\|_{\\infty})$
+    with $\\epsilon \\sim N(0, \\sigma^2 I)$.
+    Parameters
+    ----------
+    n : int
+        Sample size
+    p : int
+        Number of features
+    sgroup : int
+        True sparsity (number of active groups)
+    groups : array_like (1d, size == p)
+        Assignment of features to (non-overlapping) groups
+    sigma : float
+        Noise level
+    rho : float
+        Equicorrelation value (must be in interval [0,1])
+    signal : float or (float, float)
+        Sizes for the coefficients. If a tuple -- then coefficients
+        are equally spaced between these values using np.linspace.
+        Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
+        If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
+    random_signs : bool
+        If true, assign random signs to coefficients.
+        Else they are all positive.
+    df : int
+        Degrees of freedom for noise (from T distribution).
+    equicorrelated: bool
+        If true, design in equi-correlated,
+        Else design is AR.
+    Returns
+    -------
+    X : np.float((n,p))
+        Design matrix.
+    y : np.float(n)
+        Response vector.
+    beta : np.float(p)
+        True coefficients.
+    active : np.int(s)
+        Non-zero pattern.
+    sigma : float
+        Noise level.
+    sigmaX : np.ndarray((p,p))
+        Row covariance.
+    """
+
+    X, sigmaX = _design(n, p, rho, equicorrelated)[:2]
+
+    if center:
+        X -= X.mean(0)[None, :]
+
+    beta = np.zeros(p)
+    signal = np.atleast_1d(signal)
+
+    group_labels = np.unique(groups)
+    group_active = np.random.choice(group_labels, sgroup, replace=False)
+
+    active = np.isin(groups, group_active)
+
+    if signal.shape == (1,):
+        beta[active] = signal[0]
+    else:
+        beta[active] = np.linspace(signal[0], signal[1], active.sum())
+    if random_signs:
+        beta[active] *= (2 * np.random.binomial(1, 0.5, size=(active.sum(),)) - 1.)
+    beta /= np.sqrt(n)
+
+    if scale:
+        scaling = X.std(0) * np.sqrt(n)
+        X /= scaling[None, :]
+        beta *= np.sqrt(n)
+        sigmaX = sigmaX / np.multiply.outer(scaling, scaling)
+
+    # noise model
+    def _noise(n, df=np.inf):
+        if df == np.inf:
+            return np.random.standard_normal(n)
+        else:
+            sd_t = np.std(tdist.rvs(df, size=50000))
+        return tdist.rvs(df, size=n) / sd_t
+
+    Y = (X.dot(beta) + _noise(n, df)) * sigma
+    return X, Y, beta * sigma, np.nonzero(active)[0], sigma, sigmaX
\ No newline at end of file

From bc2107eb9f3846c0cc2c9d42951b880a0eb42037 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 25 Feb 2021 12:29:09 -0500
Subject: [PATCH 073/187] commit changes before switch

---
 selectinf/randomized/tests/test_approx_reference_grouplasso.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selectinf/randomized/tests/test_approx_reference_grouplasso.py b/selectinf/randomized/tests/test_approx_reference_grouplasso.py
index 0f4d64539..0b4f53474 100644
--- a/selectinf/randomized/tests/test_approx_reference_grouplasso.py
+++ b/selectinf/randomized/tests/test_approx_reference_grouplasso.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from ...tests.instance import gaussian_instance, gaussian_group_instance
+from ...tests.instance import gaussian_group_instance
 from ..approx_reference_grouplasso import group_lasso, approximate_grid_inference
 
 def test_approx_pivot(n=500,

From c04d7a6f54a0fcc87cadf87f55e163ae1c3603f9 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 2 Mar 2021 15:43:28 -0800
Subject: [PATCH 074/187] BF: misnamed columns

---
 selectinf/randomized/tests/test_selective_MLE_high.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 578ae66ec..7ea737021 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -74,7 +74,9 @@ def test_full_targets(n=200,
                                         cov_target_score)[0]
             pval = result['pvalue']
             estimate = result['MLE']
-            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+
             print("estimate, intervals", estimate, intervals)
 
             coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
@@ -142,7 +144,8 @@ def test_selected_targets(n=2000,
                                         cov_target_score)[0]
             estimate = result['MLE']
             pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
             
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
@@ -210,7 +213,8 @@ def test_instance():
                              cov_target_score)[0]
     estimate = result['MLE']
     pval = result['pvalue']
-    intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
+    intervals = np.asarray(result[['lower_confidence',
+                                   'upper_confidence']])
 
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
 

From fc9b0a31ebf91ddffa989b5b699e0bbf5f2c4ce7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 2 Mar 2021 15:44:56 -0800
Subject: [PATCH 075/187] BF: renaming of module

---
 doc/learning_examples/lasso_CV/lasso_example_CV.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/learning_examples/lasso_CV/lasso_example_CV.py b/doc/learning_examples/lasso_CV/lasso_example_CV.py
index ad08a05a9..f0b6fa0f5 100644
--- a/doc/learning_examples/lasso_CV/lasso_example_CV.py
+++ b/doc/learning_examples/lasso_CV/lasso_example_CV.py
@@ -5,11 +5,11 @@
 
 import regreg.api as rr
 
-from selection.tests.instance import gaussian_instance
+from selectinf.tests.instance import gaussian_instance
 
-from selection.learning.utils import full_model_inference, pivot_plot
-from selection.learning.core import split_sampler, probit_fit
-from selection.learning.Rutils import lasso_glmnet
+from selectinf.learning.utils import full_model_inference, pivot_plot
+from selectinf.learning.core import split_sampler, probit_fit
+from selectinf.learning.Rutils import lasso_glmnet
 
 def simulate(n=200, p=100, s=10, signal=(0.5, 1), sigma=2, alpha=0.1):
 

From f9a19787b837cb4883ed00cafba19781e72c2f0a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 2 Mar 2021 15:50:59 -0800
Subject: [PATCH 076/187] fix some warnings about literal comparison

---
 selectinf/algorithms/tests/test_compareR.py | 2 +-
 selectinf/randomized/tests/test_lasso.py    | 4 +---
 selectinf/randomized/tests/test_slope.py    | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/selectinf/algorithms/tests/test_compareR.py b/selectinf/algorithms/tests/test_compareR.py
index 58ac797cb..81b01d877 100644
--- a/selectinf/algorithms/tests/test_compareR.py
+++ b/selectinf/algorithms/tests/test_compareR.py
@@ -875,7 +875,7 @@ def test_rlasso_gaussian():
                                                          random_signs=True)
 
         sigma_ = np.std(y)
-        if target is not 'debiased':
+        if target != 'debiased':
             lam = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_
         else:
             lam = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 507a80d63..5d0f2bd63 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -41,7 +41,7 @@ def test_highdim_lasso(n=500,
     n, p = X.shape
 
     sigma_ = np.std(Y)
-    if target is not 'debiased':
+    if target != 'debiased':
         W = np.ones(X.shape[1]) * np.sqrt(1.5 * np.log(p)) * sigma_
     else:
         W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
@@ -394,5 +394,3 @@ def main(nsim=500, n=500, p=200, sqrt=False, target='full', sigma=3, AR=True):
     plt.show()
 
 
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_slope.py b/selectinf/randomized/tests/test_slope.py
index bc3a475a7..66a89ac19 100644
--- a/selectinf/randomized/tests/test_slope.py
+++ b/selectinf/randomized/tests/test_slope.py
@@ -55,9 +55,9 @@ def slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma
 
         if W is None:
             r_W = robjects.NA_Logical
-            if choice_weights is "gaussian":
+            if choice_weights == "gaussian":
                 r_choice_weights  = robjects.StrVector('gaussian')
-            elif choice_weights is "bh":
+            elif choice_weights == "bh":
                 r_choice_weights = robjects.StrVector('bh')
         else:
             r_W = robjects.r.matrix(W, nrow=p, ncol=1)

From 5df425bba0828219f7ace4ba449240ceee82c807 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 15 Mar 2021 17:53:31 -0700
Subject: [PATCH 077/187] standalone functions for lasso inference; added
 nongaussian split-lasso classes; a test for randomized lasso;

---
 selectinf/randomized/approx_reference.py      |  32 +--
 selectinf/randomized/lasso.py                 | 256 +++++++++++++++---
 selectinf/randomized/query.py                 |  16 +-
 selectinf/randomized/tests/test_lasso.py      |  71 ++++-
 .../tests/test_standalone_lasso_mle.py        | 195 +++++++++++++
 5 files changed, 512 insertions(+), 58 deletions(-)
 create mode 100644 selectinf/randomized/tests/test_standalone_lasso_mle.py

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index af8b936c8..f253ed01a 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -9,10 +9,16 @@
 class approximate_grid_inference(object):
 
     def __init__(self,
-                 query,
                  observed_target,
                  target_cov,
                  target_score_cov,
+                 inverse_info,
+                 init_soln,
+                 cond_mean,
+                 cond_cov,
+                 logdens_linear,
+                 linear_part,
+                 offset,
                  solve_args={'tol':1.e-12}):
 
         """
@@ -41,27 +47,19 @@ def __init__(self,
         """
 
         self.solve_args = solve_args
-
-        result, inverse_info = query.selective_MLE(observed_target,
-                                                   target_cov,
-                                                   target_score_cov,
-                                                   solve_args=solve_args)[:2]
-        mle = result['MLE']
         
-        self.linear_part = query.sampler.affine_con.linear_part
-        self.offset = query.sampler.affine_con.offset
-
-        self.logdens_linear = query.sampler.logdens_transform[0]
-        self.cond_mean = query.cond_mean
-        self.prec_opt = np.linalg.inv(query.cond_cov)
-        self.cond_cov = query.cond_cov
-
+        self.init_soln = init_soln
+        self.cond_mean = cond_mean
+        self.cond_cov = cond_cov
+        self.prec_opt = np.linalg.inv(self.cond_cov)
+
+        self.logdens_linear = logdens_linear
+        self.linear_part = linear_part
+        self.offset = offset
         self.observed_target = observed_target
         self.target_score_cov = target_score_cov
         self.target_cov = target_cov
 
-        self.init_soln = query.observed_opt_state
-
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
         ngrid = 40
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index f06e837eb..e24243386 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -162,20 +162,38 @@ def fit(self,
         # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
 
         X, y = self.loglike.data
-        W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))
-        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
-        _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+        linpred = X.dot(beta_bar)
+        n = linpred.shape[0]
+        if hasattr(self.loglike.saturated_loss, "hessian"): # a GLM -- all we need is W
+            W = self._W = self.loglike.saturated_loss.hessian(linpred)
+            _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+            _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+        elif hasattr(self.loglike.saturated_loss, "hessian_mult"):
+            active_right = np.zeros((n, active.sum()))
+            for i, j in enumerate(np.nonzero(active)[0]):
+                active_right[:,i] = self.loglike.saturated_loss.hessian_mult(linpred, 
+                                                                             X[:,j], 
+                                                                             case_weights=self.loglike.saturated_loss.case_weights)
+            unpen_right = np.zeros((n, unpenalized.sum()))
+            for i, j in enumerate(np.nonzero(unpenalized)[0]):
+                unpen_right[:,i] = self.loglike.saturated_loss.hessian_mult(linpred, 
+                                                                            X[:,j], 
+                                                                            case_weights=self.loglike.saturated_loss.case_weights)
+            _hessian_active = X.T.dot(active_right)
+            _hessian_unpen = X.T.dot(unpen_right)
+        else:
+            raise ValueError('saturated_loss has no hessian or hessian_mult method')
 
         _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])
 
         # set the observed score (data dependent) state
 
         # observed_score_state is
-        # \nabla \ell(\bar{\beta}_E) + Q(\bar{\beta}_E) \bar{\beta}_E
+        # \nabla \ell(\bar{\beta}_E) - Q(\bar{\beta}_E) \bar{\beta}_E
         # in linear regression this is _ALWAYS_ -X^TY
         # 
         # should be asymptotically equivalent to
-        # \nabla \ell(\beta^*) + Q(\beta^*)\beta^*
+        # \nabla \ell(\beta^*) - Q(\beta^*)\beta^*
 
         self.observed_score_state = _score_linear_term.dot(_beta_unpenalized)
         self.observed_score_state[inactive] += self.loglike.smooth_objective(beta_bar, 'grad')[inactive]
@@ -300,9 +318,6 @@ def gaussian(X,
         randomizer_scale : float
             Scale for IID components of randomizer.
 
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
@@ -381,9 +396,6 @@ def logistic(X,
         randomizer_scale : float
             Scale for IID components of randomizer.
 
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
@@ -463,16 +475,13 @@ def coxph(X,
         randomizer_scale : float
             Scale for IID components of randomizer.
 
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
         L : `selection.randomized.lasso.lasso`
 
         """
-        loglike = coxph_obj(X, times, status, quadratic=quadratic)
+        loglike = rr.glm.cox(X, times, status, quadratic=quadratic)
 
         # scale for randomization seems kind of meaningless here...
 
@@ -536,9 +545,6 @@ def poisson(X,
         randomizer_scale : float
             Scale for IID components of randomizer.
 
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
@@ -620,9 +626,6 @@ def sqrt_lasso(X,
         randomizer_scale : float
             Scale for IID components of randomizer.
 
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
@@ -691,16 +694,21 @@ def selected_targets(loglike,
                      features, 
                      sign_info={}, 
                      dispersion=None,
-                     solve_args={'tol': 1.e-12, 'min_its': 50}):
+                     solve_args={'tol': 1.e-12, 'min_its': 50},
+                     hessian=None):
 
     X, y = loglike.data
     n, p = X.shape
 
     Xfeat = X[:, features]
-    Qfeat = Xfeat.T.dot(W[:, None] * Xfeat)
+    if hessian is None:
+        Qfeat = Xfeat.T.dot(W[:, None] * Xfeat)
+        _score_linear = -Xfeat.T.dot(W[:, None] * X).T
+    else:
+        Qfeat = hessian[features][:,features]
+        _score_linear = -hessian[features].T
     observed_target = restricted_estimator(loglike, features, solve_args=solve_args)
     cov_target = np.linalg.inv(Qfeat)
-    _score_linear = -Xfeat.T.dot(W[:, None] * X).T
     crosscov_target_score = _score_linear.dot(cov_target)
     alternatives = ['twosided'] * features.sum()
     features_idx = np.arange(p)[features]
@@ -823,7 +831,8 @@ def __init__(self,
                  feature_weights,
                  proportion_select,
                  ridge_term=0,
-                 perturb=None):
+                 perturb=None,
+                 estimate_dispersion=False):
 
         (self.loglike,
          self.feature_weights,
@@ -836,11 +845,11 @@ def __init__(self,
         self.nfeature = p = self.loglike.shape[0]
         self.penalty = rr.weighted_l1norm(self.feature_weights, lagrange=1.)
         self._initial_omega = perturb
+        self.estimate_dispersion = estimate_dispersion
 
     def fit(self,
             solve_args={'tol': 1.e-12, 'min_its': 50},
-            perturb=None,
-            estimate_dispersion=True):
+            perturb=None):
 
         signs = lasso.fit(self, 
                           solve_args=solve_args,
@@ -851,7 +860,7 @@ def fit(self,
 
         # we then setup up the sampler again
 
-        if estimate_dispersion:
+        if self.estimate_dispersion:
 
             X, y = self.loglike.data
             n, p = X.shape
@@ -864,7 +873,6 @@ def fit(self,
             # run setup again after 
             # estimating dispersion 
 
-            print(dispersion, 'dispersion')
             if df_fit > 0:
                 self._setup_sampler(*self._setup_sampler_data, 
                                      dispersion=dispersion)
@@ -949,7 +957,7 @@ def gaussian(X,
                  proportion,
                  sigma=1.,
                  quadratic=None,
-                 ridge_term=0):
+                 estimate_dispersion=True):
         r"""
         Squared-error LASSO with feature weights.
         Objective function is (before randomization)
@@ -977,6 +985,9 @@ def gaussian(X,
             `feature_weights` to 0. If `feature_weights` is
             a float, then all parameters are penalized equally.
 
+        proportion: float
+            What proportion of data to use for selection.
+ 
         sigma : float (optional)
             Noise variance. Set to 1 if `covariance_estimator` is not None.
             This scales the loglikelihood by `sigma**(-2)`.
@@ -986,12 +997,6 @@ def gaussian(X,
             Can also be a linear term by setting quadratic
             coefficient to 0.
 
-        randomizer_scale : float
-            Scale for IID components of randomizer.
-
-        randomizer : str
-            One of ['laplace', 'logistic', 'gaussian']
-
         Returns
         -------
 
@@ -1003,10 +1008,185 @@ def gaussian(X,
                                   Y, 
                                   coef=1. / sigma ** 2, 
                                   quadratic=quadratic)
-        n, p = X.shape
 
         return split_lasso(loglike, 
-                           np.asarray(feature_weights) / sigma ** 2,
+                           np.asarray(feature_weights)/sigma**2,
+                           proportion,
+                           estimate_dispersion=estimate_dispersion)
+
+
+    @staticmethod
+    def logistic(X,
+                 successes,
+                 feature_weights,
+                 proportion,
+                 trials=None,
+                 quadratic=None):
+        r"""
+        Logistic LASSO with feature weights (before randomization)
+
+        .. math::
+
+             \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\ell$ is the negative of the logistic
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+
+        """
+
+        loglike = rr.glm.logistic(X,
+                                  successes,
+                                  trials=trials,
+                                  quadratic=quadratic)
+
+        return split_lasso(loglike, 
+                           np.asarray(feature_weights),
                            proportion)
 
+    @staticmethod
+    def coxph(X,
+              times,
+              status,
+              feature_weights,
+              proportion,
+              quadratic=None):
+        r"""
+        Cox proportional hazards LASSO with feature weights.
+        Objective function is (before randomization)
+
+        .. math::
 
+            \beta \mapsto \ell^{\text{Cox}}(\beta) + 
+            \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\ell^{\text{Cox}}$ is the
+        negative of the log of the Cox partial
+        likelihood and $\lambda$ is `feature_weights`.
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+
+        """
+        loglike = rr.glm.cox(X, times, status, quadratic=quadratic)
+
+        return split_lasso(loglike, 
+                           np.asarray(feature_weights),
+                           proportion)
+
+    @staticmethod
+    def poisson(X,
+                counts,
+                feature_weights,
+                proportion,
+                quadratic=None,
+                ridge_term=None):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+        Objective function is (before randomization)
+
+        .. math::
+
+            \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `feature_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+
+        """
+        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
+
+        return split_lasso(loglike, 
+                           np.asarray(feature_weights),
+                           proportion)
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index df890a2ef..6d1bbecd7 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -418,11 +418,23 @@ def approximate_grid_inference(self,
 
         """
 
-        G = approximate_grid_inference(self,
-                                       observed_target,
+        inverse_info = self.selective_MLE(observed_target,
+                                          target_cov,
+                                          target_score_cov,
+                                          solve_args=solve_args)[1]
+
+        G = approximate_grid_inference(observed_target,
                                        target_cov,
                                        target_score_cov,
+                                       inverse_info,
+                                       self.observed_opt_state,
+                                       self.cond_mean,
+                                       self.cond_cov,
+                                       self.sampler.logdens_transform[0],
+                                       self.sampler.affine_con.linear_part,
+                                       self.sampler.affine_con.offset,
                                        solve_args=solve_args)
+
         return G.summary(alternatives=alternatives)
 
 class multiple_queries(object):
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 5d0f2bd63..3a16411ec 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -6,7 +6,7 @@
 import regreg.api as rr
 
 from ..lasso import lasso, selected_targets, full_targets, debiased_targets
-from ...tests.instance import gaussian_instance
+from ...tests.instance import gaussian_instance, logistic_instance
 from ...tests.flags import SET_SEED
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
@@ -355,6 +355,75 @@ def Rpval(X, Y, W, noise_scale=None):
     assert np.linalg.norm(conv.sampler.affine_con.covariance - cond_cov) / np.linalg.norm(cond_cov) < 1.e-3
     assert np.linalg.norm(conv.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3
 
+def test_logistic_lasso(n=500, 
+                        p=200, 
+                        signal_fac=1.5, 
+                        s=5, 
+                        full=True, 
+                        rho=0.4, 
+                        randomizer_scale=1., 
+                        ndraw=5000, 
+                        burnin=1000, 
+                        ridge_term=None, compare_to_lasso=True):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = logistic_instance, lasso.logistic
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+    X, Y, beta = inst(n=n,
+                      p=p, 
+                      signal=signal, 
+                      s=s, 
+                      equicorrelated=False, 
+                      rho=rho, 
+                      random_signs=True)[:3]
+
+    if ridge_term is None:
+        mean_diag = np.mean((X**2).sum(0))
+        ridge_term = (np.sqrt(mean_diag) / np.sqrt(n)) * np.sqrt(n / (n - 1.))
+
+    W = np.ones(X.shape[1]) * choose_lambda(X) * 0.7
+
+    perturb = np.random.standard_normal(p) * randomizer_scale / np.sqrt(n)
+
+    conv = const(X, 
+                 Y, 
+                 W, 
+                 randomizer_scale=randomizer_scale / np.sqrt(n),
+                 ridge_term=ridge_term)
+    
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    # sanity check
+
+    if full:
+        (observed_target, 
+         cov_target, 
+         cov_target_score, 
+         alternatives) = full_targets(conv.loglike, 
+                                      conv._W, 
+                                      nonzero)
+    else:
+        (observed_target, 
+         cov_target, 
+         cov_target_score, 
+         alternatives) = selected_targets(conv.loglike, 
+                                          conv._W, 
+                                          nonzero)
+
+    result = conv.summary(observed_target, 
+                          cov_target, 
+                          cov_target_score, 
+                          alternatives,
+                          ndraw=ndraw,
+                          burnin=burnin, 
+                          compute_intervals=False)
+    pval = result['pvalue']
+
+    return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
+
 
 def main(nsim=500, n=500, p=200, sqrt=False, target='full', sigma=3, AR=True):
 
diff --git a/selectinf/randomized/tests/test_standalone_lasso_mle.py b/selectinf/randomized/tests/test_standalone_lasso_mle.py
new file mode 100644
index 000000000..c01879f4f
--- /dev/null
+++ b/selectinf/randomized/tests/test_standalone_lasso_mle.py
@@ -0,0 +1,195 @@
+from __future__ import division, print_function
+
+import numpy as np
+import nose.tools as nt
+
+import regreg.api as rr
+
+from selectinf.randomized.lasso import split_lasso, selected_targets
+from selectinf.randomized.query import selective_MLE
+from selectinf.randomized.approx_reference import approximate_grid_inference
+
+def test_standalone_inference(n=2000, 
+                              p=100, 
+                              signal_fac=1.5, 
+                              proportion=0.7,
+                              approx=True,
+                              MLE=True):
+    """
+    Compare to R randomized lasso
+    """
+
+    signal = np.sqrt(signal_fac * np.log(p)) / np.sqrt(n)
+    X = np.random.standard_normal((n, p))
+    T = np.random.exponential(1, size=(n,))
+    S = np.random.choice([0,1], n, p=[0.2,0.8])
+
+    cox_lasso = split_lasso.coxph(X, 
+                                  T, 
+                                  S,
+                                  2 * np.ones(p) * np.sqrt(n),
+                                  proportion)
+    
+    signs = cox_lasso.fit()
+    nonzero = signs != 0
+
+    cox_sel = rr.glm.cox(X[:,nonzero], T, S)
+
+    cox_full = rr.glm.cox(X, T, S)
+
+    refit_soln = cox_sel.solve(min_its=2000)
+    padded_soln = np.zeros(p)
+    padded_soln[nonzero] = refit_soln
+    cox_full.solve(min_its=2000)
+    
+    full_hess = cox_full.hessian(padded_soln)
+    selected_hess = full_hess[nonzero][:,nonzero]
+
+    (observed_target, 
+     cov_target, 
+     cov_target_score, 
+     alternatives) = selected_targets(cox_lasso.loglike, 
+                                      None,
+                                      nonzero,
+                                      hessian=full_hess,
+                                      dispersion=1)
+
+    if nonzero.sum(): 
+        if approx:
+            approx_result = cox_lasso.approximate_grid_inference(observed_target, 
+                                                                 cov_target, 
+                                                                 cov_target_score)
+            approx_pval = approx_result['pvalue']
+
+            testval = approximate_normalizer_inference(proportion,
+                                                       cox_lasso.initial_soln[nonzero],
+                                                       refit_soln,
+                                                       signs[nonzero],
+                                                       selected_hess,
+                                                       cox_lasso.feature_weights[nonzero])
+
+            assert np.allclose(testval['pvalue'], approx_pval)
+
+        else:
+            approx_pval = np.empty(nonzero.sum())*np.nan
+
+        if MLE:
+            MLE_result = cox_lasso.selective_MLE(observed_target, 
+                                                 cov_target, 
+                                                 cov_target_score)[0]
+            MLE_pval = MLE_result['pvalue']
+        else:
+            MLE_pval = np.empty(nonzero.sum())*np.nan
+
+        # working under null here
+        beta = np.zeros(p)
+
+        testval = approximate_mle_inference(proportion,
+                                            cox_lasso.initial_soln[nonzero],
+                                            refit_soln,
+                                            signs[nonzero],
+                                            selected_hess,
+                                            cox_lasso.feature_weights[nonzero])
+
+        assert np.allclose(testval['pvalue'], MLE_pval)
+        return approx_pval[beta[nonzero] == 0], MLE_pval[beta[nonzero] == 0], testval
+    else:
+        return [], []
+
+def approximate_mle_inference(training_proportion,
+                              training_betahat,
+                              selected_beta_refit,
+                              selected_signs,
+                              selected_hessian,
+                              selected_feature_weights,
+                              level=0.9): 
+
+    nselect = selected_hessian.shape[0]
+    pi_s = training_proportion
+    ratio = (1 - pi_s) / pi_s
+
+    target_cov = np.linalg.inv(selected_hessian)
+    cond_precision = selected_hessian / ratio
+    cond_cov = target_cov * ratio
+    cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None]
+    selected_signs[np.isnan(selected_signs)] = 1 # for unpenalized
+
+    logdens_linear = target_cov * selected_signs[:,None] 
+    cond_mean = selected_beta_refit * selected_signs - logdens_linear.dot(
+                    selected_feature_weights *
+                    selected_signs)
+    linear_part = -np.identity(nselect)
+    offset = np.zeros(nselect)
+
+    target_score_cov = -np.identity(nselect)
+    observed_target = selected_beta_refit
+    
+    result = selective_MLE(observed_target, 
+                           target_cov,
+                           target_score_cov, 
+                           training_betahat * selected_signs,
+                           cond_mean,
+                           cond_cov,
+                           logdens_linear,
+                           linear_part,
+                           offset,
+                           level=level,
+                           useC=True)[0]
+
+    return result
+
+def approximate_normalizer_inference(training_proportion,
+                                     training_betahat,
+                                     selected_beta_refit,
+                                     selected_signs,
+                                     selected_hessian,
+                                     selected_feature_weights,
+                                     alternatives=None,
+                                     level=0.9): 
+
+    nselect = selected_hessian.shape[0]
+    pi_s = training_proportion
+    ratio = (1 - pi_s) / pi_s
+
+    target_cov = np.linalg.inv(selected_hessian)
+    cond_precision = selected_hessian / ratio
+    cond_cov = target_cov * ratio
+    cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None]
+    selected_signs[np.isnan(selected_signs)] = 1 # for unpenalized
+
+    logdens_linear = target_cov * selected_signs[:,None] 
+    cond_mean = selected_beta_refit * selected_signs - logdens_linear.dot(
+                    selected_feature_weights *
+                    selected_signs)
+    linear_part = -np.identity(nselect)
+    offset = np.zeros(nselect)
+
+    target_score_cov = -np.identity(nselect)
+    observed_target = selected_beta_refit
+    
+    inverse_info = selective_MLE(observed_target, 
+                                 target_cov,
+                                 target_score_cov, 
+                                 training_betahat * selected_signs,
+                                 cond_mean,
+                                 cond_cov,
+                                 logdens_linear,
+                                 linear_part,
+                                 offset,
+                                 level=level,
+                                 useC=True)[1]
+
+    G = approximate_grid_inference(observed_target,
+                                   target_cov,
+                                   target_score_cov,
+                                   inverse_info,
+                                   training_betahat * selected_signs,
+                                   cond_mean,
+                                   cond_cov,
+                                   logdens_linear,
+                                   linear_part,
+                                   offset)
+
+    return G.summary(alternatives=alternatives,
+                     level=level)
+

From 8b595e18acd20e124f00ed4ae31e60b0ca7a8b04 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 15 Mar 2021 17:54:01 -0700
Subject: [PATCH 078/187] signature of logistic instance

---
 selectinf/tests/instance.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py
index 15826a148..0c5662b21 100644
--- a/selectinf/tests/instance.py
+++ b/selectinf/tests/instance.py
@@ -140,7 +140,11 @@ def _noise(n, df=np.inf):
     return X, Y, beta * sigma, np.nonzero(active)[0], sigma, sigmaX
 
 
-def logistic_instance(n=100, p=200, s=7, rho=0.3, signal=14,
+def logistic_instance(n=100,
+                      p=200,
+                      s=7,
+                      rho=0.3,
+                      signal=14,
                       random_signs=False, 
                       scale=True, 
                       center=True, 

From cffdb0f28da4b4241c64c45ff06f81996a0385fa Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 15 Mar 2021 18:03:44 -0700
Subject: [PATCH 079/187] fixing approxiamte reference tests

---
 .../randomized/tests/test_approx_reference.py | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index fbf57dd13..f2572884b 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -50,6 +50,10 @@ def test_summary(n=500,
                                           nonzero,
                                           dispersion=dispersion)
 
+        inverse_info = conv.selective_MLE(observed_target,
+                                          cov_target,
+                                          cov_target_score)[1]
+        
         S = conv.approximate_grid_inference(observed_target,
                                             cov_target,
                                             cov_target_score,
@@ -105,10 +109,16 @@ def test_approx_pivot(n=500,
                                           cov_target,
                                           cov_target_score)[1]
 
-        approximate_grid_inf = approximate_grid_inference(conv,
-                                                          observed_target,
+        approximate_grid_inf = approximate_grid_inference(observed_target,
                                                           cov_target,
-                                                          cov_target_score)
+                                                          cov_target_score,
+                                                          inverse_info,
+                                                          conv.observed_opt_state,
+                                                          conv.sampler.affine_con.mean,
+                                                          conv.sampler.affine_con.covariance,
+                                                          conv.sampler.logdens_transform[0],
+                                                          conv.sampler.affine_con.linear_part,
+                                                          conv.sampler.affine_con.offset)
 
         pivot = approximate_grid_inf._approx_pivots(beta_target)
 
@@ -170,10 +180,16 @@ def test_approx_ci(n=500,
         scale_ = np.max(_scale)
         ngrid = int(2 * scale_/0.1)
 
-        approximate_grid_inf = approximate_grid_inference(conv,
-                                                          observed_target,
+        approximate_grid_inf = approximate_grid_inference(observed_target,
                                                           cov_target,
-                                                          cov_target_score)
+                                                          cov_target_score,
+                                                          inverse_info,
+                                                          conv.observed_opt_state,
+                                                          conv.sampler.affine_con.mean,
+                                                          conv.sampler.affine_con.covariance,
+                                                          conv.sampler.logdens_transform[0],
+                                                          conv.sampler.affine_con.linear_part,
+                                                          conv.sampler.affine_con.offset)
 
         lci, uci = approximate_grid_inf._approx_intervals(level)
 

From 13b8d2be6d1bf5db2f96c969be6be03cf4b73efb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 15 Mar 2021 18:06:40 -0700
Subject: [PATCH 080/187] changing docstring

---
 selectinf/randomized/tests/test_standalone_lasso_mle.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/selectinf/randomized/tests/test_standalone_lasso_mle.py b/selectinf/randomized/tests/test_standalone_lasso_mle.py
index c01879f4f..0d9b13e1e 100644
--- a/selectinf/randomized/tests/test_standalone_lasso_mle.py
+++ b/selectinf/randomized/tests/test_standalone_lasso_mle.py
@@ -16,7 +16,8 @@ def test_standalone_inference(n=2000,
                               approx=True,
                               MLE=True):
     """
-    Compare to R randomized lasso
+    Check that standalone functions reproduce same p-values
+    as methods of `selectinf.randomized.lasso`
     """
 
     signal = np.sqrt(signal_fac * np.log(p)) / np.sqrt(n)

From 66fda017500809e47e90afcf7dbc99c3c39203d1 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 15 Mar 2021 18:12:52 -0700
Subject: [PATCH 081/187] fix nan signs

---
 selectinf/randomized/tests/test_standalone_lasso_mle.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/tests/test_standalone_lasso_mle.py b/selectinf/randomized/tests/test_standalone_lasso_mle.py
index 0d9b13e1e..4151fa8a4 100644
--- a/selectinf/randomized/tests/test_standalone_lasso_mle.py
+++ b/selectinf/randomized/tests/test_standalone_lasso_mle.py
@@ -112,8 +112,8 @@ def approximate_mle_inference(training_proportion,
     target_cov = np.linalg.inv(selected_hessian)
     cond_precision = selected_hessian / ratio
     cond_cov = target_cov * ratio
-    cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None]
     selected_signs[np.isnan(selected_signs)] = 1 # for unpenalized
+    cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None]
 
     logdens_linear = target_cov * selected_signs[:,None] 
     cond_mean = selected_beta_refit * selected_signs - logdens_linear.dot(
@@ -155,8 +155,8 @@ def approximate_normalizer_inference(training_proportion,
     target_cov = np.linalg.inv(selected_hessian)
     cond_precision = selected_hessian / ratio
     cond_cov = target_cov * ratio
-    cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None]
     selected_signs[np.isnan(selected_signs)] = 1 # for unpenalized
+    cond_cov = cond_cov * selected_signs[None, :] * selected_signs[:, None]
 
     logdens_linear = target_cov * selected_signs[:,None] 
     cond_mean = selected_beta_refit * selected_signs - logdens_linear.dot(

From 7926ad4b428cfa4d4f1098788324b7ea5ec7292a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 16 Mar 2021 12:33:21 -0700
Subject: [PATCH 082/187] ensuring Gaussian mle is scale invariant; testing mle
 for other families; add Cox instance generator

---
 selectinf/randomized/lasso.py                 |  24 +-
 selectinf/randomized/query.py                 |   4 +-
 selectinf/randomized/selective_MLE_utils.pyx  |  10 +-
 .../tests/test_selective_MLE_high.py          | 537 +++++++++++++++++-
 selectinf/tests/instance.py                   | 201 ++++++-
 5 files changed, 727 insertions(+), 49 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index e24243386..35051b321 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -285,7 +285,7 @@ def gaussian(X,
             \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i|
 
         where $\lambda$ is `feature_weights`. The ridge term
-        is determined by the Hessian and `np.std(Y)` by default,
+        is determined by the Hessian by default,
         as is the randomizer scale.
 
         Parameters
@@ -333,10 +333,10 @@ def gaussian(X,
 
         mean_diag = np.mean((X ** 2).sum(0))
         if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+            ridge_term = np.sqrt(mean_diag) / (np.sqrt(n - 1) * sigma**2)
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y, ddof=1)
 
         randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
 
@@ -409,7 +409,7 @@ def logistic(X,
         mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+            ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5
@@ -481,6 +481,7 @@ def coxph(X,
         L : `selection.randomized.lasso.lasso`
 
         """
+        n, p = X.shape
         loglike = rr.glm.cox(X, times, status, quadratic=quadratic)
 
         # scale for randomization seems kind of meaningless here...
@@ -488,7 +489,7 @@ def coxph(X,
         mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+            ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
@@ -559,7 +560,7 @@ def poisson(X,
         mean_diag = np.mean((X ** 2).sum(0))
 
         if ridge_term is None:
-            ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1)
+            ridge_term = np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
             randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.))
@@ -694,7 +695,7 @@ def selected_targets(loglike,
                      features, 
                      sign_info={}, 
                      dispersion=None,
-                     solve_args={'tol': 1.e-12, 'min_its': 50},
+                     solve_args={'tol': 1.e-12, 'min_its': 100},
                      hessian=None):
 
     X, y = loglike.data
@@ -727,7 +728,8 @@ def full_targets(loglike,
                  W, 
                  features, 
                  dispersion=None,
-                 solve_args={'tol': 1.e-12, 'min_its': 50}):
+                 solve_args={'tol': 1.e-12, 'min_its': 50},
+                 hessian=None):
     
     X, y = loglike.data
     n, p = X.shape
@@ -738,6 +740,11 @@ def full_targets(loglike,
     # target is one-step estimator
 
     Qfull = X.T.dot(W[:, None] * X)
+    if hessian is None:
+        Qfull = X.T.dot(W[:, None] * X)
+    else:
+        Qfull = hessian
+
     Qfull_inv = np.linalg.inv(Qfull)
     full_estimator = loglike.solve(**solve_args)
     cov_target = Qfull_inv[features][:, features]
@@ -1131,6 +1138,7 @@ def coxph(X,
         L : `selection.randomized.lasso.lasso`
 
         """
+        n, p = X.shape
         loglike = rr.glm.cox(X, times, status, quadratic=quadratic)
 
         return split_lasso(loglike, 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 6d1bbecd7..c5ab43ff6 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1486,7 +1486,7 @@ def _solve_barrier_affine_py(conjugate_arg,
                              min_its=200,
                              tol=1.e-10):
 
-    scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
+    scaling = 1 / np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
 
     if feasible_point is None:
         feasible_point = 1. / scaling
@@ -1555,7 +1555,7 @@ def _solve_barrier_nonneg(conjugate_arg,
                           nstep=1000,
                           tol=1.e-8):
 
-    scaling = np.sqrt(np.diag(precision))
+    scaling = 1 / np.sqrt(np.diag(precision))
 
     if feasible_point is None:
         feasible_point = 1. / scaling
diff --git a/selectinf/randomized/selective_MLE_utils.pyx b/selectinf/randomized/selective_MLE_utils.pyx
index 2aabbc365..363399a25 100644
--- a/selectinf/randomized/selective_MLE_utils.pyx
+++ b/selectinf/randomized/selective_MLE_utils.pyx
@@ -114,8 +114,8 @@ def solve_barrier_nonneg(conjugate_arg,
     gradient = np.zeros_like(conjugate_arg)
     opt_variable = np.asarray(feasible_point)
     opt_proposed = opt_variable.copy()
-    scaling = np.sqrt(np.diag(precision))
-
+    scaling = 1 / np.sqrt(np.diag(precision))
+  
     return barrier_solve_(gradient,
                           opt_variable,
                           opt_proposed,
@@ -143,7 +143,8 @@ def solve_barrier_affine(conjugate_arg,
     affine_term = np.zeros_like(offset)
     A = linear_term
 
-    scaling = np.sqrt(np.diag(A.dot(precision).dot(A.T)))
+    scaling = 1 / np.sqrt(np.diag(A.dot(precision).dot(A.T)))
+
     linear_term_fortran = np.asfortranarray(linear_term)
 
     value, opt_variable, hess = barrier_solve_affine_(gradient,
@@ -158,6 +159,7 @@ def solve_barrier_affine(conjugate_arg,
                                                       step,
                                                       max_iter=max_iter,
                                                       min_iter=min_iter,
-                                                      value_tol=tol)
+                                                      value_tol=tol
+						      )
 
     return value, opt_variable, hess
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 7ea737021..30faa1767 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -1,8 +1,18 @@
 import numpy as np
 import nose.tools as nt
 
-from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
-from selectinf.tests.instance import gaussian_instance
+import regreg.api as rr
+
+
+from ..lasso import (lasso,
+                     split_lasso,
+                     full_targets,
+                     selected_targets,
+                     debiased_targets)
+from ...tests.instance import (gaussian_instance,
+                               logistic_instance,
+                               poisson_instance,
+                               cox_instance)
 
 def test_full_targets(n=200, 
                       p=1000, 
@@ -12,7 +22,7 @@ def test_full_targets(n=200,
                       randomizer_scale=0.5,
                       full_dispersion=False):
     """
-    Compare to R randomized lasso
+    Run approx MLE with full targets on Gaussian data
     """
 
     inst, const = gaussian_instance, lasso.gaussian
@@ -82,7 +92,6 @@ def test_full_targets(n=200,
             coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
-
 def test_selected_targets(n=2000, 
                           p=200, 
                           signal_fac=10.,
@@ -92,7 +101,7 @@ def test_selected_targets(n=2000,
                           randomizer_scale=1,
                           full_dispersion=True):
     """
-    Compare to R randomized lasso
+    Run approx MLE with selected targets on Gaussian data
     """
 
     inst, const = gaussian_instance, lasso.gaussian
@@ -152,6 +161,513 @@ def test_selected_targets(n=2000,
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
+def test_logistic(n=2000, 
+                  p=200, 
+                  signal_fac=10.,
+                  s=5, 
+                  rho=0.4, 
+                  randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on binomial data
+    """
+
+    inst, const = logistic_instance, lasso.logistic
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_logistic_split(n=2000, 
+                        p=200, 
+                        signal_fac=10.,
+                        s=5, 
+                        rho=0.4, 
+                        randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on binomial data with data splitting
+    """
+
+    inst, const = logistic_instance, split_lasso.logistic
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+        
+def test_poisson(n=2000, 
+                 p=200, 
+                 signal_fac=10.,
+                 s=5, 
+                 rho=0.4, 
+                 randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on Poisson data 
+    """
+
+    inst, const = poisson_instance, lasso.poisson
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_poisson_split(n=2000, 
+                       p=200, 
+                       signal_fac=10.,
+                       s=5, 
+                       rho=0.4, 
+                       randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on Poisson data with data splitting
+    """
+
+    inst, const = poisson_instance, split_lasso.poisson
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_cox(n=2000, 
+             p=200, 
+             signal_fac=10.,
+             s=5, 
+             rho=0.4, 
+             randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on survival data 
+    """
+
+    inst, const = cox_instance, lasso.coxph
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, T, S, beta = inst(n=n,
+                             p=p,
+                             signal=signal,
+                             s=s,
+                             equicorrelated=False,
+                             rho=rho,
+                             random_signs=True)[:4]
+
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) 
+
+        conv = const(X,
+                     T,
+                     S,
+                     W,
+                     randomizer_scale=randomizer_scale)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            cox_full = rr.glm.cox(X, T, S)
+            full_hess = cox_full.hessian(conv.initial_soln)
+
+            (observed_target, 
+             cov_target, 
+             cov_target_score, 
+             alternatives) = selected_targets(conv.loglike, 
+                                              None,
+                                              nonzero,
+                                              hessian=full_hess,
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_cox_split(n=2000, 
+                   p=200, 
+                   signal_fac=10.,
+                   s=5, 
+                   rho=0.4, 
+                   randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on survival data with data splitting
+    """
+
+    inst, const = cox_instance, split_lasso.coxph
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, T, S, beta = inst(n=n,
+                             p=p,
+                             signal=signal,
+                             s=s,
+                             equicorrelated=False,
+                             rho=rho,
+                             random_signs=True)[:4]
+
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p))
+
+        conv = const(X,
+                     T,
+                     S,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            cox_full = rr.glm.cox(X, T, S)
+            full_hess = cox_full.hessian(conv.initial_soln)
+
+            (observed_target, 
+             cov_target, 
+             cov_target_score, 
+             alternatives) = selected_targets(conv.loglike, 
+                                              None,
+                                              nonzero,
+                                              hessian=full_hess,
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+        
+def test_scale_invariant_split(n=200, 
+                               p=20, 
+                               signal_fac=10.,
+                               s=5, 
+                               sigma=3, 
+                               rho=0.4, 
+                               randomizer_scale=1,
+                               full_dispersion=True,
+                               seed=2):
+    """
+    Confirm Gaussian version is appropriately scale invariant with data splitting
+    """
+
+    inst, const = gaussian_instance, split_lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    results = []
+
+    scales = [1, 5]
+    for scale in scales:
+
+        np.random.seed(seed)
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        Y *= scale; beta *= scale
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+        print('W', W[0]/scale)
+        conv = const(X,
+                     Y,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print('nonzero', np.where(nonzero)[0])
+        print('feature_weights', conv.feature_weights[0] / scale)
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero, 
+                                          dispersion=dispersion)
+
+        print('dispersion', dispersion/scale**2)
+        print('target', observed_target[0]/scale)
+        print('cov_target', cov_target[0,0]/scale**2)
+        print('cov_target_score',  cov_target_score[0,0]/scale**2)
+        
+        result = conv.selective_MLE(observed_target,
+                                    cov_target,
+                                    cov_target_score)[0]
+
+        print(result['MLE'] / scale)
+        results.append(result)
+
+    assert np.allclose(results[0]['MLE'] / scales[0],
+                       results[1]['MLE'] / scales[1])
+    assert np.allclose(results[0]['SE'] / scales[0],
+                       results[1]['SE'] / scales[1])
+    assert np.allclose(results[0]['upper_confidence'] / scales[0],
+                       results[1]['upper_confidence'] / scales[1])
+    assert np.allclose(results[0]['lower_confidence'] / scales[0],
+                       results[1]['lower_confidence'] / scales[1])
+    assert np.allclose(results[0]['Zvalue'],
+                       results[1]['Zvalue'])
+    assert np.allclose(results[0]['pvalue'],
+                       results[1]['pvalue'])
+
+def test_scale_invariant(n=200, 
+                         p=20, 
+                         signal_fac=10.,
+                         s=5, 
+                         sigma=3, 
+                         rho=0.4, 
+                         randomizer_scale=1,
+                         full_dispersion=True,
+                         seed=2):
+    """
+    Confirm Gaussian version is appropriately scale invariant
+    """
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    results = []
+
+    scales = [1, 5]
+    for scale in scales:
+
+        np.random.seed(seed)
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        Y *= scale; beta *= scale
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+        print('W', W[0]/scale)
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print('nonzero', np.where(nonzero)[0])
+        print('feature_weights', conv.feature_weights[0] / scale)
+        print('perturb', conv._initial_omega[0] / scale)
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero, 
+                                          dispersion=dispersion)
+
+        print('dispersion', dispersion/scale**2)
+        print('target', observed_target[0]/scale)
+        print('cov_target', cov_target[0,0]/scale**2)
+        print('cov_target_score',  cov_target_score[0,0]/scale**2)
+        
+        result = conv.selective_MLE(observed_target,
+                                    cov_target,
+                                    cov_target_score)[0]
+
+        print(result['MLE'] / scale)
+        results.append(result)
+
+    assert np.allclose(results[0]['MLE'] / scales[0],
+                       results[1]['MLE'] / scales[1])
+    assert np.allclose(results[0]['SE'] / scales[0],
+                       results[1]['SE'] / scales[1])
+    assert np.allclose(results[0]['upper_confidence'] / scales[0],
+                       results[1]['upper_confidence'] / scales[1])
+    assert np.allclose(results[0]['lower_confidence'] / scales[0],
+                       results[1]['lower_confidence'] / scales[1])
+    assert np.allclose(results[0]['Zvalue'],
+                       results[1]['Zvalue'])
+    assert np.allclose(results[0]['pvalue'],
+                       results[1]['pvalue'])
+    
 
 def main(nsim=500, full=False):
     P0, PA, cover, length_int = [], [], [], []
@@ -222,14 +738,3 @@ def test_instance():
 
     return coverage
 
-def main(nsim=500):
-
-    cover = []
-    for i in range(nsim):
-
-        cover_ = test_instance()
-        cover.extend(cover_)
-        print(np.mean(cover), 'coverage so far ')
-
-if __name__ == "__main__":
-    main(nsim=500)
diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py
index 0c5662b21..5035518c6 100644
--- a/selectinf/tests/instance.py
+++ b/selectinf/tests/instance.py
@@ -31,9 +31,16 @@ def AR1(rho, p):
         X = np.random.standard_normal((n, p)).dot(cholX.T)
     return X, sigmaX, cholX
 
-def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7,
-                      random_signs=False, df=np.inf,
-                      scale=True, center=True,
+def gaussian_instance(n=100,
+                      p=200,
+                      s=7,
+                      sigma=5,
+                      rho=0.,
+                      signal=7,
+                      random_signs=False,
+                      df=np.inf,
+                      scale=True,
+                      center=True,
                       equicorrelated=True):
 
 
@@ -61,14 +68,13 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7,
     sigma : float
         Noise level
 
-    rho : float
-        Equicorrelation value (must be in interval [0,1])
+    rho : float 
+        Correlation parameter. Must be in interval [0,1] for
+        equicorrelated, [-1,1] for AR(1).
 
     signal : float or (float, float)
         Sizes for the coefficients. If a tuple -- then coefficients
         are equally spaced between these values using np.linspace.
-        Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
-        If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
 
     random_signs : bool
         If true, assign random signs to coefficients.
@@ -77,9 +83,15 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7,
     df : int
         Degrees of freedom for noise (from T distribution).
 
-    equicorrelated: bool
-        If true, design in equi-correlated,
-        Else design is AR.
+    scale : bool
+        Scale columns of design matrix?
+
+    center : bool
+        Center columns of design matrix?
+
+    equicorrelated : bool
+        Should columns of design be equi-correlated
+        or AR(1)?
 
     Returns
     -------
@@ -101,6 +113,13 @@ def gaussian_instance(n=100, p=200, s=7, sigma=5, rho=0., signal=7,
 
     sigmaX : np.ndarray((p,p))
         Row covariance.
+
+    Notes
+    -----
+        
+    The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
+    If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
+
     """
 
     X, sigmaX = _design(n, p, rho, equicorrelated)[:2]
@@ -166,19 +185,28 @@ def logistic_instance(n=100,
     s : int
         True sparsity
 
-    rho : float
-        Equicorrelation value (must be in interval [0,1])
+    rho : float 
+        Correlation parameter. Must be in interval [0,1] for
+        equicorrelated, [-1,1] for AR(1).
 
     signal : float or (float, float)
         Sizes for the coefficients. If a tuple -- then coefficients
         are equally spaced between these values using np.linspace.
-        Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
-        If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
 
     random_signs : bool
         If true, assign random signs to coefficients.
         Else they are all positive.
 
+    scale : bool
+        Scale columns of design matrix?
+
+    center : bool
+        Center columns of design matrix?
+
+    equicorrelated : bool
+        Should columns of design be equi-correlated
+        or AR(1)?
+
     Returns
     -------
 
@@ -197,6 +225,11 @@ def logistic_instance(n=100,
     sigmaX : np.ndarray((p,p))
         Row covariance.
 
+    Notes
+    -----
+        
+    The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
+    If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
     """
 
     X, sigmaX = _design(n, p, rho, equicorrelated)[:2]
@@ -230,7 +263,11 @@ def logistic_instance(n=100,
     Y = np.random.binomial(1, pi)
     return X, Y, beta, np.nonzero(active)[0], sigmaX
 
-def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4,
+def poisson_instance(n=100,
+                     p=200,
+                     s=7,
+                     rho=0.3,
+                     signal=4,
                      random_signs=False, 
                      scale=True, 
                      center=True, 
@@ -252,19 +289,28 @@ def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4,
     s : int
         True sparsity
 
-    rho : float
-        Equicorrelation value (must be in interval [0,1])
+    rho : float 
+        Correlation parameter. Must be in interval [0,1] for
+        equicorrelated, [-1,1] for AR(1).
 
     signal : float or (float, float)
         Sizes for the coefficients. If a tuple -- then coefficients
         are equally spaced between these values using np.linspace.
-        Note: the size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
-        If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
 
     random_signs : bool
         If true, assign random signs to coefficients.
         Else they are all positive.
 
+    scale : bool
+        Scale columns of design matrix?
+
+    center : bool
+        Center columns of design matrix?
+
+    equicorrelated : bool
+        Should columns of design be equi-correlated
+        or AR(1)?
+
     Returns
     -------
 
@@ -283,6 +329,11 @@ def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4,
     sigmaX : np.ndarray((p,p))
         Row covariance.
 
+    Notes
+    -----
+        
+    The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
+    If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
     """
 
     X, sigmaX = _design(n, p, rho, equicorrelated)[:2]
@@ -316,6 +367,118 @@ def poisson_instance(n=100, p=200, s=7, rho=0.3, signal=4,
     Y = np.random.poisson(mu)
     return X, Y, beta, np.nonzero(active)[0], sigmaX
 
+def cox_instance(n=100,
+                 p=200,
+                 s=7,
+                 rho=0.3,
+                 signal=4,
+                 random_signs=False, 
+                 scale=True, 
+                 center=True, 
+                 p_censor=0.1,
+                 equicorrelated=True):
+    """A testing instance for the LASSO.
+    Design is equi-correlated in the population,
+    normalized to have columns of norm 1.
+
+    Parameters
+    ----------
+
+    n : int
+        Sample size
+
+    p : int
+        Number of features
+
+    s : int
+        True sparsity
+
+    rho : float 
+        Correlation parameter. Must be in interval [0,1] for
+        equicorrelated, [-1,1] for AR(1).
+
+    signal : float or (float, float)
+        Sizes for the coefficients. If a tuple -- then coefficients
+        are equally spaced between these values using np.linspace.
+
+    random_signs : bool
+        If true, assign random signs to coefficients.
+        Else they are all positive.
+
+    scale : bool
+        Scale columns of design matrix?
+
+    center : bool
+        Center columns of design matrix?
+
+    equicorrelated : bool
+        Should columns of design be equi-correlated
+        or AR(1)?
+
+    p_censor : float
+        Probability of right-censorship.
+
+    Returns
+    -------
+
+    X : np.float((n,p))
+        Design matrix.
+
+    T : np.float(n)
+        Response vector of times.
+
+    S : np.bool(n)
+        Right-censoring status.
+
+    beta : np.float(p)
+        True coefficients.
+
+    active : np.int(s)
+        Non-zero pattern.
+
+    sigmaX : np.ndarray((p,p))
+        Row covariance.
+
+    Notes
+    -----
+        
+    The size of signal is for a "normalized" design, where np.diag(X.T.dot(X)) == np.ones(p).
+    If scale=False, this signal is divided by np.sqrt(n), otherwise it is unchanged.
+
+    """
+
+    X, sigmaX = _design(n, p, rho, equicorrelated)[:2]
+
+    if center:
+        X -= X.mean(0)[None,:]
+
+    beta = np.zeros(p) 
+    signal = np.atleast_1d(signal)
+    if signal.shape == (1,):
+        beta[:s] = signal[0] 
+    else:
+        beta[:s] = np.linspace(signal[0], signal[1], s)
+    if random_signs:
+        beta[:s] *= (2 * np.random.binomial(1, 0.5, size=(s,)) - 1.)
+    np.random.shuffle(beta)
+    beta /= np.sqrt(n)
+
+    if scale:
+        scaling = X.std(0) * np.sqrt(n)
+        X /= scaling[None, :]
+        beta *= np.sqrt(n)
+        sigmaX = sigmaX / np.multiply.outer(scaling, scaling)
+
+    active = np.zeros(p, np.bool)
+    active[beta != 0] = True
+
+    eta = linpred = np.dot(X, beta) 
+    mu = np.exp(eta)
+
+    T = np.random.exponential(mu)
+    S = np.random.choice([0,1], n, p=[p_censor,1-p_censor])
+    return X, T, S, beta, np.nonzero(active)[0], sigmaX
+
 def HIV_NRTI(drug='3TC', 
              standardize=True, 
              datafile=None,

From 812d4e1ea77eb6e0fe24a8cbf2ae3be4bfce828a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 19 Apr 2021 13:55:29 -0400
Subject: [PATCH 083/187] new pivots based on exact reference

---
 selectinf/randomized/exact_reference.py       | 272 ++++++++++++++++++
 .../randomized/tests/test_exact_reference.py  |  92 ++++++
 2 files changed, 364 insertions(+)
 create mode 100644 selectinf/randomized/exact_reference.py
 create mode 100644 selectinf/randomized/tests/test_exact_reference.py

diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
new file mode 100644
index 000000000..7626ea182
--- /dev/null
+++ b/selectinf/randomized/exact_reference.py
@@ -0,0 +1,272 @@
+from __future__ import division, print_function
+
+import numpy as np, pandas as pd
+from scipy.interpolate import interp1d
+from scipy.stats import norm as ndist
+
+from ..distributions.discrete_family import discrete_family
+
+class exact_grid_inference(object):
+
+    def __init__(self,
+                 query,
+                 observed_target,
+                 target_cov,
+                 target_score_cov,
+                 solve_args={'tol': 1.e-12}):
+
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        query : `gaussian_query`
+            A Gaussian query which has information
+            to describe implied Gaussian.
+
+        observed_target : ndarray
+            Observed estimate of target.
+
+        target_cov : ndarray
+            Estimated covaraince of target.
+
+        target_score_cov : ndarray
+            Estimated covariance of target and score of randomized query.
+
+        solve_args : dict, optional
+            Arguments passed to solver.
+
+        """
+
+        self.solve_args = solve_args
+
+        result, inverse_info = query.selective_MLE(observed_target,
+                                                   target_cov,
+                                                   target_score_cov,
+                                                   solve_args=solve_args)[:2]
+        mle = result['MLE']
+
+        self.linear_part = query.sampler.affine_con.linear_part
+        self.offset = query.sampler.affine_con.offset
+
+        self.logdens_linear = query.sampler.logdens_transform[0]
+        self.cond_mean = query.cond_mean
+        self.prec_opt = np.linalg.inv(query.cond_cov)
+        self.cond_cov = query.cond_cov
+
+        self.observed_target = observed_target
+        self.target_score_cov = target_score_cov
+        self.target_cov = target_cov
+
+        self.init_soln = query.observed_opt_state
+
+        self.ntarget = ntarget = target_cov.shape[0]
+        _scale = 4. * np.sqrt(np.diag(inverse_info))
+        ngrid = 60
+
+        self.stat_grid = np.zeros((ntarget, ngrid))
+        for j in range(ntarget):
+            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1. * _scale[j],
+                                               observed_target[j] + 1. * _scale[j],
+                                               num=ngrid)
+
+    def summary(self,
+                alternatives=None,
+                parameter=None,
+                level=0.9):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+
+        Parameters
+        ----------
+
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+
+        level : float
+            Confidence level.
+
+        """
+
+        if parameter is not None:
+            pivots = self.approx_pivots(parameter,
+                                        alternatives=alternatives)
+        else:
+            pivots = None
+
+        pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
+                                      alternatives=alternatives)
+        lower, upper = self._approx_intervals(level=level)
+
+        result = pd.DataFrame({'target': self.observed_target,
+                               'pvalue': pvalues,
+                               'lower_confidence': lower,
+                               'upper_confidence': upper})
+
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
+
+        return result
+
+    def log_reference(self,
+                      observed_target,
+                      target_cov,
+                      target_score_cov,
+                      grid):
+
+        if np.asarray(observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        prec_target = np.linalg.inv(target_cov)
+        target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
+
+        ref_hat = []
+
+        for k in range(grid.shape[0]):
+            # in the usual D = N + Gamma theta.hat,
+            # target_lin is "something" times Gamma,
+            # where "something" comes from implied Gaussian
+            # cond_mean is "something" times D
+            # Gamma is target_score_cov.T.dot(prec_target)
+
+            num_opt = self.prec_opt.shape[0]
+            num_con = self.linear_part.shape[0]
+
+            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
+                              self.cond_mean)
+
+            #direction for decomposing o
+
+            eta = -self.prec_opt.dot(self.logdens_linear.dot(target_score_cov.T))
+
+            implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
+            implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
+            implied_prec = 1./implied_cov
+
+            _A = self.cond_cov.dot(eta) * implied_prec
+            A = self.linear_part.dot(_A).reshape((-1,))
+            b = self.linear_part.dot((-np.identity(num_opt) + _A.dot(eta.T)).dot(self.init_soln))
+
+            neg_indx = np.asarray([j for j in range(num_con) if A[j] < 0.])
+            pos_indx = np.asarray([j for j in range(num_con) if A[j] > 0.])
+
+            trunc_ = (self.offset + b) / A
+
+            if pos_indx.shape[0]>0 and neg_indx.shape[0]>0:
+
+                trunc_lower = np.max(trunc_[neg_indx])
+                trunc_upper = np.min(trunc_[pos_indx])
+
+                lower_limit = (trunc_lower - implied_mean) * implied_prec
+                upper_limit = (trunc_upper - implied_mean) * implied_prec
+
+                ref_hat.append(np.log(ndist.cdf(upper_limit) - ndist.cdf(lower_limit)))
+
+            elif pos_indx.shape[0] == num_con:
+
+                trunc_upper = np.min(trunc_[pos_indx])
+
+                upper_limit = (trunc_upper - implied_mean) * implied_prec
+
+                ref_hat.append(np.log(ndist.cdf(upper_limit)))
+
+            else:
+
+                trunc_lower = np.max(trunc_[neg_indx])
+
+                lower_limit = (trunc_lower - implied_mean) * implied_prec
+
+                ref_hat.append(np.log(1. - ndist.cdf(lower_limit)))
+
+        return np.asarray(ref_hat)
+
+    def _construct_families(self):
+
+        self._families = []
+        for m in range(self.ntarget):
+            p = self.target_score_cov.shape[1]
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
+            var_target = target_cov_uni[0, 0]
+            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+
+            log_ref = self.log_reference(observed_target_uni,
+                                         target_cov_uni,
+                                         target_score_cov_uni,
+                                         self.stat_grid[m])
+
+            grid_approx_fn = interp1d(self.stat_grid[m],
+                                      log_ref,
+                                      kind='quadratic',
+                                      bounds_error=False,
+                                      fill_value='extrapolate')
+
+            grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+            logW = (grid_approx_fn(grid) -
+                    0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
+            logW -= logW.max()
+
+            # construction of families follows `selectinf.learning.core`
+
+            self._families.append(discrete_family(grid,
+                                                  np.exp(logW)))
+
+    def _pivots(self,
+                mean_parameter,
+                alternatives=None):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+
+        if alternatives is None:
+            alternatives = ['twosided'] * self.ntarget
+        else:
+            alternatives = [alternatives] *self.ntarget
+        pivot = []
+
+        for m in range(self.ntarget):
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+            var_target = self.target_cov[m, m]
+
+            # construction of pivot from families follows `selectinf.learning.core`
+
+            _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
+                              x=observed_target)
+            if alternatives[m] == 'twosided':
+                pivot.append(2 * min(_cdf, 1 - _cdf))
+            elif alternatives[m] == 'greater':
+                pivot.append(1 - _cdf)
+            elif alternatives[m] == 'less':
+                pivot.append(_cdf)
+            else:
+                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
+        return pivot
+
+    def _intervals(self,
+                   level=0.9):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+
+        lower, upper = [], []
+
+        for m in range(self.ntarget):
+            # construction of intervals from families follows `selectinf.learning.core`
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+            l, u = family.equal_tailed_interval(observed_target,
+                                                alpha=1 - level)
+            var_target = self.target_cov[m, m]
+            lower.append(l * var_target + observed_target)
+            upper.append(u * var_target + observed_target)
+
+        return np.asarray(lower), np.asarray(upper)
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
new file mode 100644
index 000000000..b8835ecb2
--- /dev/null
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -0,0 +1,92 @@
+import numpy as np
+
+from ...tests.instance import gaussian_instance
+from ..lasso import lasso, selected_targets
+from ..exact_reference import exact_grid_inference
+
+def test_approx_pivot(n=500,
+                      p=100,
+                      signal_fac=1.,
+                      s=5,
+                      sigma=2.,
+                      rho=0.4,
+                      randomizer_scale=1.):
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * np.sqrt(dispersion)
+
+    conv = const(X,
+                 Y,
+                 W,
+                 randomizer_scale=randomizer_scale * dispersion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+    print("size of selected set ", nonzero.sum())
+
+    if nonzero.sum()>0:
+        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero,
+                                          dispersion=dispersion)
+
+        exact_grid_inf = exact_grid_inference(conv,
+                                              observed_target,
+                                              cov_target,
+                                              cov_target_score)
+
+        pivot = exact_grid_inf._pivots(beta_target)
+
+        return pivot
+
+
+
+def main(nsim=300):
+
+    import matplotlib as mpl
+    mpl.use('tkagg')
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions.empirical_distribution import ECDF
+
+    _pivot = []
+    for i in range(nsim):
+        _pivot.extend(test_approx_pivot(n=500,
+                                        p=100,
+                                        signal_fac=0.5,
+                                        s=5,
+                                        sigma=2.,
+                                        rho=0.50,
+                                        randomizer_scale=1.))
+
+        print("iteration completed ", i)
+
+    plt.clf()
+    ecdf_pivot = ECDF(np.asarray(_pivot))
+    grid = np.linspace(0, 1, 101)
+    plt.plot(grid, ecdf_pivot(grid), c='blue', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
+
+if __name__ == "__main__":
+    main(nsim=10)
\ No newline at end of file

From 71bf5f1fc23e7ab8e1941dac0d0496164342dce5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 19 Apr 2021 15:48:35 -0400
Subject: [PATCH 084/187] added test for ci

---
 selectinf/randomized/exact_reference.py       |   2 +-
 .../randomized/tests/test_exact_reference.py  | 123 +++++++++++++++---
 2 files changed, 104 insertions(+), 21 deletions(-)

diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 7626ea182..5e5c43db8 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -64,7 +64,7 @@ def __init__(self,
 
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4. * np.sqrt(np.diag(inverse_info))
-        ngrid = 60
+        ngrid = 40
 
         self.stat_grid = np.zeros((ntarget, ngrid))
         for j in range(ntarget):
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index b8835ecb2..23a091b70 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -60,33 +60,116 @@ def test_approx_pivot(n=500,
 
         return pivot
 
+def test_approx_ci(n=500,
+                   p=100,
+                   signal_fac=1.,
+                   s=5,
+                   sigma=2.,
+                   rho=0.4,
+                   randomizer_scale=1.,
+                   level=0.9):
 
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    X, Y, beta = inst(n=n,
+                      p=p,
+                      signal=signal,
+                      s=s,
+                      equicorrelated=False,
+                      rho=rho,
+                      sigma=sigma,
+                      random_signs=True)[:3]
+
+    n, p = X.shape
+
+    sigma_ = np.std(Y)
+    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * np.sqrt(dispersion)
+
+    conv = const(X,
+                 Y,
+                 W,
+                 randomizer_scale=randomizer_scale * dispersion)
 
-def main(nsim=300):
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    if nonzero.sum()>0:
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero,
+                                          dispersion=dispersion)
+
+        result, inverse_info = conv.selective_MLE(observed_target,
+                                                  cov_target,
+                                                  cov_target_score)[:2]
+
+        exact_grid_inf = exact_grid_inference(conv,
+                                              observed_target,
+                                              cov_target,
+                                              cov_target_score)
+
+        lci, uci = exact_grid_inf._intervals(level)
+
+    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    coverage = (lci < beta_target) * (uci > beta_target)
+    length = uci - lci
+
+    return np.mean(coverage), np.mean(length), np.mean(length-(3.3 * np.sqrt(np.diag(inverse_info))))
+
+def main(nsim=300, CI=False):
 
     import matplotlib as mpl
     mpl.use('tkagg')
     import matplotlib.pyplot as plt
     from statsmodels.distributions.empirical_distribution import ECDF
 
-    _pivot = []
-    for i in range(nsim):
-        _pivot.extend(test_approx_pivot(n=500,
-                                        p=100,
-                                        signal_fac=0.5,
-                                        s=5,
-                                        sigma=2.,
-                                        rho=0.50,
-                                        randomizer_scale=1.))
-
-        print("iteration completed ", i)
-
-    plt.clf()
-    ecdf_pivot = ECDF(np.asarray(_pivot))
-    grid = np.linspace(0, 1, 101)
-    plt.plot(grid, ecdf_pivot(grid), c='blue', marker='^')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
+    if CI is False:
+        _pivot = []
+        for i in range(nsim):
+            _pivot.extend(test_approx_pivot(n=500,
+                                            p=100,
+                                            signal_fac=0.5,
+                                            s=5,
+                                            sigma=3.,
+                                            rho=0.50,
+                                            randomizer_scale=0.7))
+
+            print("iteration completed ", i)
+
+        plt.clf()
+        ecdf_pivot = ECDF(np.asarray(_pivot))
+        grid = np.linspace(0, 1, 101)
+        plt.plot(grid, ecdf_pivot(grid), c='blue', marker='^')
+        plt.plot(grid, grid, 'k--')
+        plt.show()
+
+    if CI is True:
+        coverage_ = 0.
+        length_ = 0.
+        length_diff_ = 0.
+        for n in range(nsim):
+            cov, len, len_diff = test_approx_ci(n=500,
+                                                p=100,
+                                                signal_fac=1.,
+                                                s=5,
+                                                sigma=3.,
+                                                rho=0.50,
+                                                randomizer_scale=1.)
+
+            coverage_ += cov
+            length_ += len
+            length_diff_ += len_diff
+            print("coverage so far ", coverage_ / (n + 1.))
+            print("lengths so far ", length_ / (n + 1.), length_diff_/(n+1.))
+            print("iteration completed ", n + 1)
+
 
 if __name__ == "__main__":
-    main(nsim=10)
\ No newline at end of file
+    main(nsim=50, CI=True)
\ No newline at end of file

From c8d31900d0f11192b26b25b0a2bfed11137ca467 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 28 Apr 2021 09:59:51 -0400
Subject: [PATCH 085/187] adding level argument for approximate reference

---
 selectinf/randomized/query.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index c5ab43ff6..b45f59978 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -393,6 +393,7 @@ def approximate_grid_inference(self,
                                    target_cov,
                                    target_score_cov,
                                    alternatives=None,
+                                   level=0.9,
                                    solve_args={'tol': 1.e-12}):
 
         """
@@ -413,6 +414,9 @@ def approximate_grid_inference(self,
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
 
+        level : float, optional
+            Confidence level.
+
         solve_args : dict, optional
             Arguments passed to solver.
 
@@ -435,7 +439,8 @@ def approximate_grid_inference(self,
                                        self.sampler.affine_con.offset,
                                        solve_args=solve_args)
 
-        return G.summary(alternatives=alternatives)
+        return G.summary(alternatives=alternatives,
+                         level=level)
 
 class multiple_queries(object):
 

From 80c0cd840b307f2e67b3bb066e9a56bc62fd492b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 29 Apr 2021 09:22:44 -0400
Subject: [PATCH 086/187] tests for new equicorrelated instance

---
 .../randomized/tests/test_exact_reference.py  | 31 +++----
 .../tests/test_selective_MLE_high.py          | 81 +++++++++++++++++--
 2 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 23a091b70..c023b0d65 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -17,23 +17,26 @@ def test_approx_pivot(n=500,
 
     X, Y, beta = inst(n=n,
                       p=p,
-                      signal=signal,
+                      signal=0,
                       s=s,
-                      equicorrelated=False,
+                      equicorrelated=True,
                       rho=rho,
                       sigma=sigma,
-                      random_signs=True)[:3]
+                      random_signs=False)[:3]
 
     n, p = X.shape
 
     sigma_ = np.std(Y)
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+    #dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+    dispersion = sigma_ ** 2
 
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * np.sqrt(dispersion)
+    #W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * np.sqrt(dispersion)
+    eps = np.random.standard_normal((n, 2000)) * Y.std()
+    lam_theory = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
 
     conv = const(X,
                  Y,
-                 W,
+                 lam_theory * np.ones(p),
                  randomizer_scale=randomizer_scale * dispersion)
 
     signs = conv.fit()
@@ -49,7 +52,7 @@ def test_approx_pivot(n=500,
          alternatives) = selected_targets(conv.loglike,
                                           conv._W,
                                           nonzero,
-                                          dispersion=dispersion)
+                                          dispersion=None)
 
         exact_grid_inf = exact_grid_inference(conv,
                                               observed_target,
@@ -133,12 +136,12 @@ def main(nsim=300, CI=False):
     if CI is False:
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=500,
-                                            p=100,
-                                            signal_fac=0.5,
-                                            s=5,
-                                            sigma=3.,
-                                            rho=0.50,
+            _pivot.extend(test_approx_pivot(n=100,
+                                            p=400,
+                                            signal_fac=1.,
+                                            s=0,
+                                            sigma=1.,
+                                            rho=0.30,
                                             randomizer_scale=0.7))
 
             print("iteration completed ", i)
@@ -172,4 +175,4 @@ def main(nsim=300, CI=False):
 
 
 if __name__ == "__main__":
-    main(nsim=50, CI=True)
\ No newline at end of file
+    main(nsim=50, CI=False)
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index e38bde4fa..17ecb0423 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -204,6 +204,77 @@ def test_instance():
 #         cover.extend(cover_)
 #         print(np.mean(cover), 'coverage so far ')
 
+
+def test_selected_targets_disperse(n=500,
+                                   p=100,
+                                   signal_fac=1.,
+                                   s=5,
+                                   sigma=1.,
+                                   rho=0.4,
+                                   randomizer_scale=1,
+                                   full_dispersion=True):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = 1.
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        idx = np.arange(p)
+        sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
+        print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+            dispersion = None
+            if full_dispersion:
+                dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=dispersion)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
+
+            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+            coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
+
+
 def main(nsim=500, full=False):
     P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
@@ -220,17 +291,17 @@ def main(nsim=500, full=False):
             avg_length = intervals[:, 1] - intervals[:, 0]
         else:
             full_dispersion = True
-            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s,
+            p0, pA, cover_, intervals = test_selected_targets_disperse(n=n, p=p, s=int(p/2),
                                                               full_dispersion=full_dispersion)
             avg_length = intervals[:, 1] - intervals[:, 0]
 
         cover.extend(cover_)
         P0.extend(p0)
         PA.extend(pA)
-        print(
-            np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
-            np.mean(avg_length), 'null pvalue + power + length')
-
+        # print(
+        #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+        #     np.mean(avg_length), 'null pvalue + power + length')
+        print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 
 if __name__ == "__main__":
     main(nsim=100)

From 8cd202bff7dffac9b829abdc5496c87056d0bd2a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sat, 15 May 2021 22:41:57 -0400
Subject: [PATCH 087/187] commit changes

---
 selectinf/randomized/lasso.py                 |  2 +-
 .../randomized/tests/test_approx_reference.py | 57 +++++++++++++------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index f06e837eb..12d153ddb 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -321,7 +321,7 @@ def gaussian(X,
             ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
+            randomizer_scale = np.sqrt(mean_diag) * 0.7 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 1832b7cbe..4a9276741 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -70,36 +70,54 @@ def test_approx_pivot(n=500,
                       p=p,
                       signal=signal,
                       s=s,
-                      equicorrelated=False,
+                      equicorrelated=True,
                       rho=rho,
                       sigma=sigma,
-                      random_signs=True)[:3]
+                      random_signs=False)[:3]
 
     n, p = X.shape
 
     sigma_ = np.std(Y)
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+    if n>p:
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+    else:
+        dispersion = sigma_ ** 2
 
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+    #W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+    eps = np.random.standard_normal((n, 2000)) * Y.std()
+    lam_theory = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
+    W = lam_theory * np.ones(p)
 
     conv = const(X,
                  Y,
                  W,
-                 randomizer_scale=randomizer_scale * dispersion)
+                 ridge_term = 0.)
+                 #randomizer_scale=randomizer_scale * dispersion)
 
     signs = conv.fit()
     nonzero = signs != 0
+    print("number of selected ", nonzero.sum())
 
     if nonzero.sum()>0:
-        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero,
-                                          dispersion=dispersion)
+        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+        if n>p:
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=dispersion)
+
+        else:
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=sigma ** 2)
 
         inverse_info = conv.selective_MLE(observed_target,
                                           cov_target,
@@ -191,17 +209,20 @@ def test_approx_ci(n=500,
 
 def main(nsim=300, CI = False):
 
+    import matplotlib as mpl
+    mpl.use('tkagg')
     import matplotlib.pyplot as plt
     from statsmodels.distributions.empirical_distribution import ECDF
+
     if CI is False:
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=200,
-                                            p=100,
+            _pivot.extend(test_approx_pivot(n=100,
+                                            p=400,
                                             signal_fac=1.,
-                                            s=5,
-                                            sigma=3.,
-                                            rho=0.20,
+                                            s=10,
+                                            sigma=5.,
+                                            rho=0.30,
                                             randomizer_scale=1.))
 
             print("iteration completed ", i)

From 18033ae85a5679791115b433b3a04711f9a6589e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 18 May 2021 10:35:54 -0400
Subject: [PATCH 088/187] test to compare unbiased estimates

---
 selectinf/randomized/lasso.py                 |   2 +-
 .../tests/test_unbiased_estimates.py          | 150 ++++++++++++++++++
 2 files changed, 151 insertions(+), 1 deletion(-)
 create mode 100644 selectinf/randomized/tests/test_unbiased_estimates.py

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 12d153ddb..ca7d133db 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -210,7 +210,7 @@ def signed_basis_vector(p, j, s):
                                                 unpenalized_directions)
 
         opt_offset = self.initial_subgrad
-
+        self.opt_linear = opt_linear
         # now make the constraints and implied gaussian
 
         self._setup = True
diff --git a/selectinf/randomized/tests/test_unbiased_estimates.py b/selectinf/randomized/tests/test_unbiased_estimates.py
new file mode 100644
index 000000000..1a9918e72
--- /dev/null
+++ b/selectinf/randomized/tests/test_unbiased_estimates.py
@@ -0,0 +1,150 @@
+import numpy as np
+
+from ..lasso import lasso, selected_targets
+from ...tests.instance import gaussian_instance
+
+def UMVU(query,
+         X,
+         Y,
+         nonzero,
+         feat,
+         dispersion):
+
+    n, p = X.shape
+
+    nopt = nonzero.sum()
+
+    _, randomizer_prec = query.randomizer.cov_prec
+
+    implied_precision = np.zeros((n + nopt, n + nopt))
+
+    implied_precision[:n][:, :n] = (1. / dispersion) * (np.identity(n)) + (X.dot(X.T) * randomizer_prec)
+
+    implied_precision[n:][:, :n] = -query.opt_linear.T.dot(X.T) * randomizer_prec
+
+    implied_precision[:n][:, n:] = implied_precision[n:][:, :n].T
+
+    implied_precision[n:][:, n:] = query.opt_linear.T.dot(query.opt_linear) * randomizer_prec
+
+    implied_cov = np.linalg.inv(implied_precision)
+
+    _prec = np.linalg.inv(implied_cov[:n][:, :n])
+
+    linear_coef = (np.linalg.pinv(X[:, feat]).dot(_prec))
+    offset = -np.linalg.pinv(X[:, feat]).dot(X.dot(query.initial_subgrad)
+                                             - _prec.dot(implied_cov[:n][:, n:]).dot(query.opt_linear.T.dot(query.initial_subgrad))) * (randomizer_prec)
+
+    linear_coef *= dispersion
+    offset *= dispersion
+    UMVU = linear_coef.dot(Y) + offset
+
+    return UMVU
+
+def EST(query,
+        X,
+        Y,
+        nonzero,
+        feat,
+        dispersion):
+
+    (observed_target,
+     cov_target,
+     cov_target_score,
+     alternatives) = selected_targets(query.loglike,
+                                      query._W,
+                                      feat,
+                                      dispersion=dispersion)
+
+    _, randomizer_prec = query.randomizer.cov_prec
+    cond_cov = query.cond_cov
+    logdens_linear = query.sampler.logdens_transform[0]
+    cond_mean = query.cond_mean
+
+    prec_target = np.linalg.inv(cov_target)
+    prec_opt = np.linalg.inv(cond_cov)
+
+    target_linear = cov_target_score.T.dot(prec_target)
+    target_offset = (-X.T.dot(Y) + query.initial_subgrad) - target_linear.dot(observed_target)
+
+    target_lin = - logdens_linear.dot(target_linear)
+    target_off = cond_mean - target_lin.dot(observed_target)
+
+    _prec = prec_target + (target_linear.T.dot(target_linear) * randomizer_prec) - target_lin.T.dot(
+        prec_opt).dot(target_lin)
+    _P = target_linear.T.dot(target_offset) * randomizer_prec
+
+    linear_coef = cov_target.dot(_prec)
+    offset = cov_target.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))
+    est = linear_coef.dot(observed_target) + offset
+
+    return est
+
+def test_UMVU(n=500,
+              p=100,
+              signal_fac=1.,
+              s=5,
+              sigma=3.,
+              rho=0.7,
+              randomizer_scale=np.sqrt(0.5)):
+
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=True,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        sigma_ = np.std(Y)
+        W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     #ridge_term=0.,
+                     randomizer_scale=randomizer_scale * sigma)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+
+        if nonzero.sum() > 0:
+            #dispersion = sigma ** 2
+            if p > n/2:
+                dispersion = np.std(Y) ** 2
+            else:
+                dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+            feat = nonzero.copy()
+            feat[-5:] = 1
+            dispersion = np.linalg.norm(Y - X[:, feat].dot(np.linalg.pinv(X[:, feat]).dot(Y))) ** 2 / (n - feat.sum())
+
+            umvu = UMVU(conv,
+                        X,
+                        Y,
+                        nonzero,
+                        feat,
+                        dispersion)
+
+            est = EST(conv,
+                      X,
+                      Y,
+                      nonzero,
+                      feat,
+                      dispersion)
+
+            print("check ", np.allclose(est, umvu, atol=1e-04), umvu, est)
+
+            return umvu, est
+
+def main():
+
+    test_UMVU(n=400, p=100, s=5)
+
+if __name__ == "__main__":
+    main()

From b133174867104102d681aa5671dc97ff89abd8fc Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 24 May 2021 09:22:42 -0400
Subject: [PATCH 089/187] updates to test for unbiased est

---
 selectinf/randomized/tests/test_unbiased_estimates.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/tests/test_unbiased_estimates.py b/selectinf/randomized/tests/test_unbiased_estimates.py
index 1a9918e72..eb8beac0d 100644
--- a/selectinf/randomized/tests/test_unbiased_estimates.py
+++ b/selectinf/randomized/tests/test_unbiased_estimates.py
@@ -138,13 +138,13 @@ def test_UMVU(n=500,
                       feat,
                       dispersion)
 
-            print("check ", np.allclose(est, umvu, atol=1e-04), umvu, est)
+            print("check ", np.allclose(est-umvu, np.zeros(est.shape[0]), atol=1e-03), est-umvu)
 
             return umvu, est
 
 def main():
 
-    test_UMVU(n=400, p=100, s=5)
+    test_UMVU(n=100, p=400, s=5)
 
 if __name__ == "__main__":
     main()

From 93d1a67d07f1f9903fe7b9cbfb0140654cc22024 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 24 May 2021 09:37:46 -0400
Subject: [PATCH 090/187] commit changes before switch

---
 selectinf/randomized/approx_reference.py      |  10 +-
 selectinf/randomized/lasso.py                 |   2 +-
 .../randomized/tests/test_approx_reference.py | 139 +++++++++---------
 3 files changed, 76 insertions(+), 75 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index af8b936c8..0041cccb7 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -64,9 +64,7 @@ def __init__(self,
 
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
-        ngrid = 40
-
-        scale_ = 4 * np.max(np.sqrt(np.diag(inverse_info)))
+        ngrid = 60
 
         self.stat_grid = np.zeros((ntarget, ngrid))
         for j in range(ntarget):
@@ -181,13 +179,14 @@ def _construct_families(self):
 
             grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
             logW = (approx_fn(grid) -
-                    0.5 * (grid - self.observed_target[m])**2 / var_target)
+                   0.5 * (grid - self.observed_target[m])**2 / var_target)
             logW -= logW.max()
+            weights = np.exp(logW)
 
             # construction of families follows `selectinf.learning.core`
             
             self._families.append(discrete_family(grid,
-                                                  np.exp(logW)))
+                                                  weights))
             
             # logG = - 0.5 * grid**2 / var_target
             # logG -= logG.max()
@@ -223,6 +222,7 @@ def _approx_pivots(self,
 
             _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
                               x=observed_target)
+            #_cdf = family.cdf(mean_parameter[m]/var_target, x=observed_target)
             if alternatives[m] == 'twosided':
                 pivot.append(2 * min(_cdf, 1 - _cdf))
             elif alternatives[m] == 'greater':
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index ca7d133db..477b5b75c 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -321,7 +321,7 @@ def gaussian(X,
             ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1)
 
         if randomizer_scale is None:
-            randomizer_scale = np.sqrt(mean_diag) * 0.7 * np.std(Y) * np.sqrt(n / (n - 1.))
+            randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.))
 
         randomizer = randomization.isotropic_gaussian((p,), randomizer_scale)
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 4a9276741..aaf2544c4 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -63,75 +63,76 @@ def test_approx_pivot(n=500,
                       rho=0.4,
                       randomizer_scale=1.):
 
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=True,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=False)[:3]
-
-    n, p = X.shape
-
-    sigma_ = np.std(Y)
-    if n>p:
-        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-    else:
-        dispersion = sigma_ ** 2
-
-    #W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-    eps = np.random.standard_normal((n, 2000)) * Y.std()
-    lam_theory = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
-    W = lam_theory * np.ones(p)
-
-    conv = const(X,
-                 Y,
-                 W,
-                 ridge_term = 0.)
-                 #randomizer_scale=randomizer_scale * dispersion)
+    while True:
 
-    signs = conv.fit()
-    nonzero = signs != 0
-    print("number of selected ", nonzero.sum())
+        inst, const = gaussian_instance, lasso.gaussian
+        signal = np.sqrt(signal_fac * 2 * np.log(p))
 
-    if nonzero.sum()>0:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=True,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
 
-        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-        if n>p:
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
-                                              dispersion=dispersion)
+        n, p = X.shape
 
+        sigma_ = np.std(Y)
+        if n > p:
+            dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
         else:
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
-                                              dispersion=sigma ** 2)
-
-        inverse_info = conv.selective_MLE(observed_target,
-                                          cov_target,
-                                          cov_target_score)[1]
-
-        approximate_grid_inf = approximate_grid_inference(conv,
-                                                          observed_target,
-                                                          cov_target,
-                                                          cov_target_score)
-
-        pivot = approximate_grid_inf._approx_pivots(beta_target)
-
-        return pivot
-
+            dispersion = sigma_ ** 2
+
+        # W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+        eps = np.random.standard_normal((n, 2000)) * Y.std()
+        lam_theory = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
+        W = lam_theory * np.ones(p)
+
+        conv = const(X,
+                     Y,
+                     W,
+                     ridge_term=0.)
+        # randomizer_scale=randomizer_scale * dispersion)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("number of selected ", nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+            if n > p:
+                (observed_target,
+                 cov_target,
+                 cov_target_score,
+                 alternatives) = selected_targets(conv.loglike,
+                                                  conv._W,
+                                                  nonzero,
+                                                  dispersion=dispersion)
+
+            else:
+                (observed_target,
+                 cov_target,
+                 cov_target_score,
+                 alternatives) = selected_targets(conv.loglike,
+                                                  conv._W,
+                                                  nonzero,
+                                                  dispersion=sigma ** 2)
+
+            inverse_info = conv.selective_MLE(observed_target,
+                                              cov_target,
+                                              cov_target_score)[1]
+
+            approximate_grid_inf = approximate_grid_inference(conv,
+                                                              observed_target,
+                                                              cov_target,
+                                                              cov_target_score)
+
+            pivot = approximate_grid_inf._approx_pivots(beta_target)
+
+            return pivot
 
 def test_approx_ci(n=500,
                    p=100,
@@ -219,9 +220,9 @@ def main(nsim=300, CI = False):
         for i in range(nsim):
             _pivot.extend(test_approx_pivot(n=100,
                                             p=400,
-                                            signal_fac=1.,
-                                            s=10,
-                                            sigma=5.,
+                                            signal_fac=0.5,
+                                            s=0,
+                                            sigma=1.,
                                             rho=0.30,
                                             randomizer_scale=1.))
 
@@ -243,7 +244,7 @@ def main(nsim=300, CI = False):
                                       signal_fac=1.,
                                       s=5,
                                       sigma=3.,
-                                      rho=0.4,
+                                      rho=0.3,
                                       randomizer_scale=1.)
 
             coverage_ += cov
@@ -253,4 +254,4 @@ def main(nsim=300, CI = False):
             print("iteration completed ", n + 1)
 
 if __name__ == "__main__":
-    main(nsim=40, CI = False)
+    main(nsim=50, CI = False)

From a4c47ba9791b49473a28b7a955eaefdef58aedf5 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 7 Jun 2021 12:18:31 -0400
Subject: [PATCH 091/187] MLE code updated

---
 selectinf/randomized/query.py                 | 807 ++++++------------
 .../tests/test_selective_MLE_high.py          |  56 +-
 2 files changed, 305 insertions(+), 558 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index aa1cbd8a6..05afbcd8e 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -16,42 +16,30 @@
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from .approx_reference import approximate_grid_inference
 
-class query(object):
 
+class query(object):
     r"""
     This class is the base of randomized selective inference
     based on convex programs.
-
     The main mechanism is to take an initial penalized program
-
     .. math::
-
         \text{minimize}_B \ell(B) + {\cal P}(B)
-
     and add a randomization and small ridge term yielding
-
     .. math::
-
-        \text{minimize}_B \ell(B) + {\cal P}(B) - 
+        \text{minimize}_B \ell(B) + {\cal P}(B) -
         \langle \omega, B \rangle + \frac{\epsilon}{2} \|B\|^2_2
-
     """
 
     def __init__(self, randomization, perturb=None):
 
         """
-
         Parameters
         ----------
-
         randomization : `selection.randomized.randomization.randomization`
-            Instance of a randomization scheme. 
+            Instance of a randomization scheme.
             Describes the law of $\omega$.
-
         perturb : ndarray, optional
             Value of randomization vector, an instance of $\omega$.
-
-
         """
         self.randomization = randomization
         self.perturb = perturb
@@ -64,21 +52,17 @@ def __init__(self, randomization, perturb=None):
     def randomize(self, perturb=None):
 
         """
-
         The actual randomization step.
-
         Parameters
         ----------
-
         perturb : ndarray, optional
             Value of randomization vector, an instance of $\omega$.
-
         """
 
         if not self._randomized:
-            (self.randomized_loss, 
-             self._initial_omega) = self.randomization.randomize(self.loss, 
-                                                                 self.epsilon, 
+            (self.randomized_loss,
+             self._initial_omega) = self.randomization.randomize(self.loss,
+                                                                 self.epsilon,
                                                                  perturb=perturb)
         self._randomized = True
 
@@ -97,8 +81,8 @@ def solve(self):
 
         raise NotImplementedError('abstract method')
 
-class gaussian_query(query):
 
+class gaussian_query(query):
     useC = True
 
     """
@@ -118,7 +102,7 @@ def fit(self, perturb=None):
 
     # Private methods
 
-    def _setup_sampler(self, 
+    def _setup_sampler(self,
                        linear_part,
                        offset,
                        opt_linear,
@@ -131,10 +115,10 @@ def _setup_sampler(self,
         if not np.all(A.dot(self.observed_opt_state) - b <= 0):
             raise ValueError('constraints not satisfied')
 
-        (cond_mean, 
-         cond_cov, 
-         cond_precision, 
-         logdens_linear) = self._setup_implied_gaussian(opt_linear, 
+        (cond_mean,
+         cond_cov,
+         cond_precision,
+         logdens_linear) = self._setup_implied_gaussian(opt_linear,
                                                         opt_offset,
                                                         dispersion)
 
@@ -146,12 +130,13 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
             arg = opt + mean_term
             return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
 
-        log_density = functools.partial(log_density, 
-                                        logdens_linear, 
-                                        opt_offset, 
+        log_density = functools.partial(log_density,
+                                        logdens_linear,
+                                        opt_offset,
                                         cond_precision)
 
-        self.cond_mean, self.cond_cov = cond_mean, cond_cov
+        _, randomizer_prec = self.randomizer.cov_prec
+        self.cond_mean, self.cond_cov, self.randomizer_prec = cond_mean, cond_cov, randomizer_prec
 
         affine_con = constraints(A,
                                  b,
@@ -163,17 +148,18 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
                                                self.observed_score_state,
                                                log_density,
                                                (logdens_linear, opt_offset),
+                                               self.randomizer_prec,
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
 
-    def _setup_implied_gaussian(self, 
-                                opt_linear, 
+    def _setup_implied_gaussian(self,
+                                opt_linear,
                                 opt_offset,
                                 # optional dispersion parameter
                                 # for covariance of randomization
                                 dispersion=1):
 
-        _, prec = self.randomizer.cov_prec 
+        _, prec = self.randomizer.cov_prec
         prec = prec / dispersion
 
         if np.asarray(prec).shape in [(), (0,)]:
@@ -190,9 +176,9 @@ def _setup_implied_gaussian(self,
         return cond_mean, cond_cov, cond_precision, logdens_linear
 
     def summary(self,
-                observed_target, 
-                target_cov, 
-                target_score_cov, 
+                observed_target,
+                target_cov,
+                target_score_cov,
                 alternatives,
                 opt_sample=None,
                 target_sample=None,
@@ -204,38 +190,27 @@ def summary(self,
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         ndraw : int (optional)
             Defaults to 1000.
-
         burnin : int (optional)
             Defaults to 1000.
-
         compute_intervals : bool
             Compute confidence intervals?
-
         dispersion : float (optional)
             Use a known value for dispersion, or Pearson's X^2?
         """
@@ -246,7 +221,7 @@ def summary(self,
         if opt_sample is None:
             opt_sample, logW = self.sampler.sample(ndraw, burnin)
         else:
-            if len(opt_sample) == 1: # only a sample, so weights are 1s
+            if len(opt_sample) == 1:  # only a sample, so weights are 1s
                 opt_sample = opt_sample[0]
                 logW = np.zeros(ndraw)
             else:
@@ -272,17 +247,16 @@ def summary(self,
         else:
             pvalues = pivots
 
-        result = pd.DataFrame({'target':observed_target,
-                               'pvalue':pvalues})
+        result = pd.DataFrame({'target': observed_target,
+                               'pvalue': pvalues})
 
         if compute_intervals:
-
             MLE = self.selective_MLE(observed_target,
                                      target_cov,
                                      target_score_cov)[0]
             MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
 
-            intervals = self.sampler.confidence_intervals(  
+            intervals = self.sampler.confidence_intervals(
                 observed_target,
                 target_cov,
                 target_score_cov,
@@ -291,8 +265,8 @@ def summary(self,
                 initial_guess=MLE_intervals,
                 level=level)
 
-            result.insert(2, 'lower_confidence', intervals[:,0])
-            result.insert(3, 'upper_confidence', intervals[:,1])
+            result.insert(2, 'lower_confidence', intervals[:, 0])
+            result.insert(3, 'upper_confidence', intervals[:, 1])
 
         if not np.all(parameter == 0):
             result.insert(4, 'pivot', pivots)
@@ -301,33 +275,26 @@ def summary(self,
         return result
 
     def selective_MLE(self,
-                      observed_target, 
-                      target_cov, 
-                      target_score_cov, 
+                      observed_target,
+                      target_cov,
+                      target_score_cov,
                       level=0.9,
-                      solve_args={'tol':1.e-12}):
+                      solve_args={'tol': 1.e-12}):
         """
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         level : float, optional
             Confidence level.
-
         solve_args : dict, optional
             Arguments passed to solver.
-
         """
-        
+
         return self.sampler.selective_MLE(observed_target,
                                           target_cov,
                                           target_score_cov,
@@ -336,50 +303,43 @@ def selective_MLE(self,
                                           solve_args=solve_args)
 
     def posterior(self,
-                  observed_target, 
-                  target_cov, 
-                  target_score_cov, 
+                  observed_target,
+                  target_cov,
+                  target_score_cov,
                   prior=None,
                   dispersion=None,
-                  solve_args={'tol':1.e-12}):
+                  solve_args={'tol': 1.e-12}):
         """
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         prior : callable
             A callable object that takes a single argument
             `parameter` of the same shape as `observed_target`
             and returns (value of log prior, gradient of log prior)
-
         dispersion : float, optional
             Dispersion parameter for log-likelihood.
-
         solve_args : dict, optional
             Arguments passed to solver.
-
         """
-        
+
         if dispersion is None:
             dispersion = 1
             print('Using dispersion parameter 1...')
-            
+
         if prior is None:
             Di = 1. / (200 * np.diag(target_cov))
+
             def prior(target_parameter):
                 grad_prior = -target_parameter * Di
-                log_prior = -0.5 * np.sum(target_parameter**2 * Di)
+                log_prior = -0.5 * np.sum(target_parameter ** 2 * Di)
                 return log_prior, grad_prior
-        
+
         return posterior(self,
                          observed_target,
                          target_cov,
@@ -396,26 +356,19 @@ def approximate_grid_inference(self,
                                    solve_args={'tol': 1.e-12}):
 
         """
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
-
         solve_args : dict, optional
             Arguments passed to solver.
-
         """
 
         G = approximate_grid_inference(self,
@@ -425,8 +378,8 @@ def approximate_grid_inference(self,
                                        solve_args=solve_args)
         return G.summary(alternatives=alternatives)
 
-class multiple_queries(object):
 
+class multiple_queries(object):
     '''
     Combine several queries of a given data
     through randomized algorithms.
@@ -434,16 +387,12 @@ class multiple_queries(object):
 
     def __init__(self, objectives):
         '''
-
         Parameters
         ----------
-
         objectives : sequence
            A sequences of randomized objective functions.
-
         Notes
         -----
-
         Each element of `objectives` must
         have a `setup_sampler` method that returns
         a description of the distribution of the
@@ -454,10 +403,8 @@ def __init__(self, objectives):
         `form_covariances` to linearly decompose
         each score in terms of a target
         and an asymptotically independent piece.
-
         Returns
         -------
-
         None
         '''
 
@@ -470,9 +417,9 @@ def fit(self):
 
     def summary(self,
                 observed_target,
-                opt_sampling_info,  # a sequence of (target_cov, score_cov) 
-                                    # objects in theory all target_cov
-                                    # should be about the same...
+                opt_sampling_info,  # a sequence of (target_cov, score_cov)
+                # objects in theory all target_cov
+                # should be about the same...
                 alternatives=None,
                 parameter=None,
                 level=0.9,
@@ -483,32 +430,23 @@ def summary(self,
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         ndraw : int (optional)
             Defaults to 1000.
-
         burnin : int (optional)
             Defaults to 1000.
-
         compute_intervals : bool
             Compute confidence intervals?
-
         """
 
         if parameter is None:
@@ -525,10 +463,10 @@ def summary(self,
             if opt_sampling_info[i][0] is None or opt_sampling_info[i][1] is None:
                 raise ValueError("did not input target and score covariance info")
             opt_sample, opt_logW = self.objectives[i].sampler.sample(ndraw, burnin)
-            self.opt_sampling_info.append((self.objectives[i].sampler, 
-                                           opt_sample, 
+            self.opt_sampling_info.append((self.objectives[i].sampler,
+                                           opt_sample,
                                            opt_logW,
-                                           opt_sampling_info[i][0], 
+                                           opt_sampling_info[i][0],
                                            opt_sampling_info[i][1]))
 
         pivots = self.coefficient_pvalues(observed_target,
@@ -547,10 +485,10 @@ def summary(self,
             intervals = self.confidence_intervals(observed_target,
                                                   level)
 
-        result = pd.DataFrame({'target':observed_target,
-                               'pvalue':pvalues,
-                               'lower_confidence':intervals[:,0],
-                               'upper_confidence':intervals[:,1]})
+        result = pd.DataFrame({'target': observed_target,
+                               'pvalue': pvalues,
+                               'lower_confidence': intervals[:, 0],
+                               'upper_confidence': intervals[:, 1]})
 
         if not np.all(parameter == 0):
             result.insert(4, 'pivot', pivots)
@@ -567,30 +505,23 @@ def coefficient_pvalues(self,
         '''
         Construct selective p-values
         for each parameter of the target.
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         parameter : ndarray (optional)
             A vector of parameters with shape `self.shape`
             at which to evaluate p-values. Defaults
             to `np.zeros(self.shape)`.
-
         sample_args : sequence
            Arguments to `self.sample` if sample is not found
            for a given objective.
-
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
-
         Returns
         -------
         pvalues : ndarray
-
         '''
 
         for i in range(len(self.objectives)):
@@ -599,10 +530,10 @@ def coefficient_pvalues(self,
                 self.opt_sampling_info[i][1] = _sample
                 self.opt_sampling_info[i][2] = _logW
 
-        ndraw = self.opt_sampling_info[0][1].shape[0] # nsample for normal samples taken from the 1st objective
+        ndraw = self.opt_sampling_info[0][1].shape[0]  # nsample for normal samples taken from the 1st objective
 
-        _intervals = optimization_intervals(self.opt_sampling_info, 
-                                            observed_target, 
+        _intervals = optimization_intervals(self.opt_sampling_info,
+                                            observed_target,
                                             ndraw)
 
         pvals = []
@@ -614,7 +545,6 @@ def coefficient_pvalues(self,
 
         return np.array(pvals)
 
-
     def confidence_intervals(self,
                              observed_target,
                              sample_args=(),
@@ -623,25 +553,19 @@ def confidence_intervals(self,
         '''
         Construct selective confidence intervals
         for each parameter of the target.
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         sample_args : sequence
            Arguments to `self.sample` if sample is not found
            for a given objective.
-
         level : float
             Confidence level.
-
         Returns
         -------
         limits : ndarray
             Confidence intervals for each target.
-
         '''
 
         for i in range(len(self.objectives)):
@@ -650,10 +574,10 @@ def confidence_intervals(self,
                 self.opt_sampling_info[i][1] = _sample
                 self.opt_sampling_info[i][2] = _logW
 
-        ndraw = self.opt_sampling_info[0][1].shape[0] # nsample for normal samples taken from the 1st objective
+        ndraw = self.opt_sampling_info[0][1].shape[0]  # nsample for normal samples taken from the 1st objective
 
-        _intervals = optimization_intervals(self.opt_sampling_info, 
-                                            observed_target, 
+        _intervals = optimization_intervals(self.opt_sampling_info,
+                                            observed_target,
                                             ndraw)
 
         limits = []
@@ -663,7 +587,7 @@ def confidence_intervals(self,
             keep[i] = 1.
             limits.append(_intervals.confidence_interval(keep, level=level))
 
-        return np.array(limits)       
+        return np.array(limits)
 
 
 class optimization_sampler(object):
@@ -698,36 +622,27 @@ def hypothesis_test(self,
         using sampler with
         gradient map `self.gradient` and
         projection map `self.projection`.
-
         Parameters
         ----------
-
         test_stat : callable
            Test statistic to evaluate on sample from
            selective distribution.
-
         observed_value : float
            Observed value of test statistic.
            Used in p-value calculation.
-
         sample_args : sequence
            Arguments to `self.sample` if sample is None.
-
         sample : np.array (optional)
            If not None, assumed to be a sample of shape (-1,) + `self.shape`
            representing a sample of the target from parameters.
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc. If not None,
            `ndraw, burnin, stepsize` are ignored.
-
         parameter : np.float (optional)
-
         alternative : ['greater', 'less', 'twosided']
             What alternative to use.
-
         Returns
         -------
-
         pvalue : float
         '''
 
@@ -767,59 +682,50 @@ def confidence_intervals(self,
                              level=0.9,
                              initial_guess=None):
         '''
-
         Parameters
         ----------
- 
+
         observed : np.float
             A vector of parameters with shape `self.shape`,
             representing coordinates of the target.
-
         sample_args : sequence
            Arguments to `self.sample` if sample is None.
-
         sample : np.array (optional)
            If not None, assumed to be a sample of shape (-1,) + `self.shape`
            representing a sample of the target from parameters `self.reference`.
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc.
-
         level : float (optional)
             Specify the
             confidence level.
-
         initial_guess : np.float
             Initial guesses at upper and lower limits, optional.
-
         Notes
         -----
-
         Construct selective confidence intervals
         for each parameter of the target.
-
         Returns
         -------
-
         intervals : [(float, float)]
             List of confidence intervals.
         '''
 
         if sample is None:
             sample, logW = self.sample(*sample_args)
-            sample = np.vstack([sample]*5) # why times 5?
-            logW = np.hstack([logW]*5) 
+            sample = np.vstack([sample] * 5)  # why times 5?
+            logW = np.hstack([logW] * 5)
         else:
             sample, logW = sample
 
         ndraw = sample.shape[0]
 
-        _intervals = optimization_intervals([(self, 
-                                              sample, 
+        _intervals = optimization_intervals([(self,
+                                              sample,
                                               logW,
-                                              target_cov, 
+                                              target_cov,
                                               score_cov)],
-                                            observed_target, 
-                                            ndraw, 
+                                            observed_target,
+                                            ndraw,
                                             normal_sample=normal_sample)
 
         limits = []
@@ -848,35 +754,27 @@ def coefficient_pvalues(self,
         '''
         Construct selective p-values
         for each parameter of the target.
-
         Parameters
         ----------
-
         observed : np.float
             A vector of parameters with shape `self.shape`,
             representing coordinates of the target.
-
         parameter : np.float (optional)
             A vector of parameters with shape `self.shape`
             at which to evaluate p-values. Defaults
             to `np.zeros(self.shape)`.
-
         sample_args : sequence
            Arguments to `self.sample` if sample is None.
-
         sample : np.array (optional)
            If not None, assumed to be a sample of shape (-1,) + `self.shape`
            representing a sample of the target from parameters `self.reference`.
            Allows reuse of the same sample for construction of confidence
            intervals, hypothesis tests, etc.
-
         alternatives : list of ['greater', 'less', 'twosided']
             What alternative to use.
-
         Returns
         -------
         pvalues : np.float
-
         '''
 
         if alternatives is None:
@@ -891,21 +789,21 @@ def coefficient_pvalues(self,
         if parameter is None:
             parameter = np.zeros(observed_target.shape[0])
 
-        _intervals = optimization_intervals([(self, 
-                                              sample, 
+        _intervals = optimization_intervals([(self,
+                                              sample,
                                               logW,
-                                              target_cov, 
+                                              target_cov,
                                               score_cov)],
-                                            observed_target, 
-                                            ndraw, 
+                                            observed_target,
+                                            ndraw,
                                             normal_sample=normal_sample)
         pvals = []
 
         for i in range(observed_target.shape[0]):
             keep = np.zeros_like(observed_target)
             keep[i] = 1.
-            pvals.append(_intervals.pivot(keep, 
-                                          candidate=parameter[i], 
+            pvals.append(_intervals.pivot(keep,
+                                          candidate=parameter[i],
                                           alternative=alternatives[i]))
 
         return np.array(pvals)
@@ -916,14 +814,14 @@ def _reconstruct_score_from_target(self,
         if transform is not None:
             direction, nuisance = transform
             score_sample = (np.multiply.outer(target_sample,
-                                              direction) + 
+                                              direction) +
                             nuisance[None, :])
         else:
             score_sample = target_sample
         return score_sample
 
-class affine_gaussian_sampler(optimization_sampler):
 
+class affine_gaussian_sampler(optimization_sampler):
     '''
     Sample from an affine truncated Gaussian
     '''
@@ -933,42 +831,36 @@ def __init__(self,
                  initial_point,
                  observed_score_state,
                  log_cond_density,
-                 logdens_transform, # described how score enters log_density.
+                 logdens_transform,  # described how score enters log_density.
+                 randomizer_prec,
                  selection_info=None,
                  useC=False):
 
         '''
         Parameters
         ----------
-
         affine_con : `selection.constraints.affine.constraints`
              Affine constraints
-
         initial_point : ndarray
              Feasible point for affine constraints.
-
         observed_score_state : ndarray
              Observed score of convex loss (slightly modified).
-             Essentially (asymptotically) equivalent 
-             to $\nabla \ell(\beta^*) + 
+             Essentially (asymptotically) equivalent
+             to $\nabla \ell(\beta^*) +
              Q(\beta^*)\beta^*$ where $\beta^*$ is population
              minimizer. For linear regression, it is always
              $-X^Ty$.
-
         log_cond_density : callable
              Density of optimization variables given score
-
         logdens_transform : tuple
              Description of how conditional mean
              of optimization variables depends on score.
-
         selection_info : optional
              Function of optimization variables that
              will be conditioned on.
-
         useC : bool, optional
             Use python or C solver.
-        
+
         '''
 
         self.affine_con = affine_con
@@ -982,6 +874,7 @@ def __init__(self,
         self._log_cond_density = log_cond_density
         self.logdens_transform = logdens_transform
         self.useC = useC
+        self.randomizer_prec = randomizer_prec
 
     def log_cond_density(self,
                          opt_sample,
@@ -990,9 +883,9 @@ def log_cond_density(self,
 
         if transform is not None:
             direction, nuisance = transform
-            return self._log_density_ray(0,   # candidate
-                                              # has been added to
-                                              # target
+            return self._log_density_ray(0,  # candidate
+                                         # has been added to
+                                         # target
                                          direction,
                                          nuisance,
                                          target_sample,
@@ -1012,16 +905,12 @@ def sample(self, ndraw, burnin):
         using projected Langevin sampler with
         gradient map `self.gradient` and
         projection map `self.projection`.
-
         Parameters
         ----------
-
         ndraw : int
            How long a chain to return?
-
         burnin : int
            How many samples to discard?
-
         '''
 
         _sample = sample_from_constraints(self.affine_con,
@@ -1030,63 +919,58 @@ def sample(self, ndraw, burnin):
                                           burnin=burnin)
         return _sample, np.zeros(_sample.shape[0])
 
-    def selective_MLE(self, 
-                      observed_target, 
-                      target_cov, 
-                      target_score_cov, 
-                      # initial (observed) value of optimization variables -- 
+    def selective_MLE(self,
+                      observed_target,
+                      target_cov,
+                      target_score_cov,
+                      # initial (observed) value of optimization variables --
                       # used as a feasible point.
-                      # precise value used only for independent estimator 
-                      init_soln, 
-                      solve_args={'tol':1.e-12}, 
+                      # precise value used only for independent estimator
+                      init_soln,
+                      solve_args={'tol': 1.e-12},
                       level=0.9):
         """
         Selective MLE based on approximation of
         CGF.
-
         Parameters
         ----------
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         init_soln : ndarray
             Feasible point for optimization problem.
-
         level : float, optional
             Confidence level.
-
         solve_args : dict, optional
             Arguments passed to solver.
-
         """
+        score_offset = self.observed_score_state + self.logdens_transform[1]
 
-        return selective_MLE(observed_target, 
-                             target_cov, 
-                             target_score_cov, 
-                             init_soln, 
+        return selective_MLE(observed_target,
+                             target_cov,
+                             target_score_cov,
+                             init_soln,
                              self.mean,
                              self.covariance,
                              self.logdens_transform[0],
                              self.affine_con.linear_part,
                              self.affine_con.offset,
+                             self.randomizer_prec,
+                             score_offset,
                              solve_args=solve_args,
                              level=level,
                              useC=self.useC)
 
-    def reparam_map(self, 
-                    parameter_target, 
-                    observed_target, 
-                    target_cov, 
-                    target_score_cov, 
-                    init_soln, 
-                    solve_args={'tol':1.e-12},
+    def reparam_map(self,
+                    parameter_target,
+                    observed_target,
+                    target_cov,
+                    target_score_cov,
+                    init_soln,
+                    solve_args={'tol': 1.e-12},
                     useC=True):
 
         prec_target = np.linalg.inv(target_cov)
@@ -1107,19 +991,23 @@ def reparam_map(self,
             solver = _solve_barrier_affine_py
 
         val, soln, hess = solver(conjugate_arg,
-                                 prec_opt, # JT: I think this quadratic is wrong should involve target_cov and target_lin too?
+                                 prec_opt,
+                                 # JT: I think this quadratic is wrong should involve target_cov and target_lin too?
                                  init_soln,
                                  self.affine_con.linear_part,
                                  self.affine_con.offset,
                                  **solve_args)
-            
+
         inter_map = target_cov.dot(target_lin.T.dot(prec_opt))
         param_map = parameter_target + inter_map.dot(mean_param - soln)
-        log_normalizer_map = ((parameter_target.T.dot(prec_target + target_lin.T.dot(prec_opt).dot(target_lin)).dot(parameter_target))/2. 
-                              - parameter_target.T.dot(target_lin.T).dot(prec_opt.dot(soln)) - target_offset.T.dot(prec_opt).dot(target_offset)/2. 
-                              + val - (param_map.T.dot(prec_target).dot(param_map))/2.)
+        log_normalizer_map = ((parameter_target.T.dot(prec_target + target_lin.T.dot(prec_opt).dot(target_lin)).dot(
+            parameter_target)) / 2.
+                              - parameter_target.T.dot(target_lin.T).dot(prec_opt.dot(soln)) - target_offset.T.dot(
+                    prec_opt).dot(target_offset) / 2.
+                              + val - (param_map.T.dot(prec_target).dot(param_map)) / 2.)
 
-        jacobian_map = (np.identity(ndim) + inter_map.dot(target_lin)) - inter_map.dot(hess).dot(prec_opt.dot(target_lin))
+        jacobian_map = (np.identity(ndim) + inter_map.dot(target_lin)) - inter_map.dot(hess).dot(
+            prec_opt.dot(target_lin))
 
         return param_map, log_normalizer_map, jacobian_map
 
@@ -1132,24 +1020,24 @@ def _log_density_ray(self,
 
         # implicitly caching (opt_sample, gaussian_sample) ?
 
-        if (not hasattr(self, "_direction") or not 
-            np.all(self._direction == direction)):
+        if (not hasattr(self, "_direction") or not
+        np.all(self._direction == direction)):
 
             logdens_lin, logdens_offset = self.logdens_transform
 
             if opt_sample.shape[1] == 1:
 
                 prec = 1. / self.covariance[0, 0]
-                quadratic_term = logdens_lin.dot(direction)**2 * prec
-                arg = (logdens_lin.dot(nuisance + logdens_offset) + 
+                quadratic_term = logdens_lin.dot(direction) ** 2 * prec
+                arg = (logdens_lin.dot(nuisance + logdens_offset) +
                        logdens_lin.dot(direction) * gaussian_sample +
-                       opt_sample[:,0])
+                       opt_sample[:, 0])
                 linear_term = logdens_lin.dot(direction) * prec * arg
-                constant_term = arg**2 * prec
+                constant_term = arg ** 2 * prec
 
-                self._cache = {'linear_term':linear_term,
-                               'quadratic_term':quadratic_term,
-                               'constant_term':constant_term}
+                self._cache = {'linear_term': linear_term,
+                               'quadratic_term': quadratic_term,
+                               'constant_term': constant_term}
             else:
                 self._direction = direction.copy()
 
@@ -1169,82 +1057,83 @@ def _log_density_ray(self,
                 logdens_lin, logdens_offset = self.logdens_transform
                 cov = self.covariance
                 prec = np.linalg.inv(cov)
-                linear_part = logdens_lin.dot(direction) # A gamma
+                linear_part = logdens_lin.dot(direction)  # A gamma
 
                 if 1 in opt_sample.shape:
-                    pass # stop3 what's this for?
+                    pass  # stop3 what's this for?
                 cov = self.covariance
 
                 quadratic_term = linear_part.T.dot(prec).dot(linear_part)
 
                 arg1 = opt_sample.T
-                arg2 = logdens_lin.dot(np.multiply.outer(direction, gaussian_sample) + 
-                                       (nuisance + logdens_offset)[:,None])
+                arg2 = logdens_lin.dot(np.multiply.outer(direction, gaussian_sample) +
+                                       (nuisance + logdens_offset)[:, None])
                 arg = arg1 + arg2
                 linear_term = linear_part.T.dot(prec).dot(arg)
                 constant_term = np.sum(prec.dot(arg) * arg, 0)
 
-                self._cache = {'linear_term':linear_term,
-                               'quadratic_term':quadratic_term,
-                               'constant_term':constant_term}
-        (linear_term, 
+                self._cache = {'linear_term': linear_term,
+                               'quadratic_term': quadratic_term,
+                               'constant_term': constant_term}
+        (linear_term,
          quadratic_term,
-         constant_term) = (self._cache['linear_term'], 
+         constant_term) = (self._cache['linear_term'],
                            self._cache['quadratic_term'],
                            self._cache['constant_term'])
-        return (-0.5 * candidate**2 * quadratic_term - 
-                 candidate * linear_term - 0.5 * constant_term)
+        return (-0.5 * candidate ** 2 * quadratic_term -
+                candidate * linear_term - 0.5 * constant_term)
+
 
 class optimization_intervals(object):
 
     def __init__(self,
-                 opt_sampling_info, # a sequence of 
-                                    # (opt_sampler, 
-                                    #  opt_sample, 
-                                    #  opt_logweights,
-                                    #  target_cov, 
-                                    #  score_cov) objects
-                                    #  in theory all target_cov 
-                                    #  should be about the same...
+                 opt_sampling_info,  # a sequence of
+                 # (opt_sampler,
+                 #  opt_sample,
+                 #  opt_logweights,
+                 #  target_cov,
+                 #  score_cov) objects
+                 #  in theory all target_cov
+                 #  should be about the same...
                  observed,
-                 nsample, # how large a normal sample
+                 nsample,  # how large a normal sample
                  target_cov=None,
                  normal_sample=None):
 
-        # not all opt_samples will be of the same size as nsample 
+        # not all opt_samples will be of the same size as nsample
         # let's repeat them as necessary
-        
+
         tiled_sampling_info = []
-        for (opt_sampler, 
-             opt_sample, 
+        for (opt_sampler,
+             opt_sample,
              opt_logW,
-             t_cov, 
-             t_score_cov) in opt_sampling_info: 
+             t_cov,
+             t_score_cov) in opt_sampling_info:
             if opt_sample is not None:
                 if opt_sample.shape[0] < nsample:
                     if opt_sample.ndim == 1:
-                        tiled_opt_sample = np.tile(opt_sample, 
-                                              int(np.ceil(nsample / 
-                                              opt_sample.shape[0])))[:nsample]
+                        tiled_opt_sample = np.tile(opt_sample,
+                                                   int(np.ceil(nsample /
+                                                               opt_sample.shape[0])))[:nsample]
                         tiled_opt_logW = np.tile(opt_logW,
-                                                 int(np.ceil(nsample / 
+                                                 int(np.ceil(nsample /
                                                              opt_logW.shape[0])))[:nsample]
                     else:
-                        tiled_opt_sample = np.tile(opt_sample, 
-                                              (int(np.ceil(nsample / 
-                                              opt_sample.shape[0])), 1))[:nsample]
+                        tiled_opt_sample = np.tile(opt_sample,
+                                                   (int(np.ceil(nsample /
+                                                                opt_sample.shape[0])), 1))[:nsample]
                         tiled_opt_logW = np.tile(opt_logW,
-                                                 (int(np.ceil(nsample / 
+                                                 (int(np.ceil(nsample /
                                                               opt_logW.shape[0])), 1))[:nsample]
                 else:
                     tiled_opt_sample = opt_sample[:nsample]
                     tiled_opt_logW = opt_logW[:nsample]
             else:
                 tiled_sample = None
-            tiled_sampling_info.append((opt_sampler, 
-                                        tiled_opt_sample, 
+            tiled_sampling_info.append((opt_sampler,
+                                        tiled_opt_sample,
                                         tiled_opt_logW,
-                                        t_cov, 
+                                        t_cov,
                                         t_score_cov))
 
         self.opt_sampling_info = tiled_sampling_info
@@ -1252,14 +1141,14 @@ def __init__(self,
         for opt_sampler, opt_sample, opt_logW, _, _ in opt_sampling_info:
 
             self._logden += opt_sampler.log_cond_density(
-                                opt_sample,
-                                opt_sampler.observed_score_state,
-                                transform=None) 
+                opt_sample,
+                opt_sampler.observed_score_state,
+                transform=None)
             self._logden -= opt_logW
             if opt_sample.shape[0] < nsample:
-                self._logden = np.tile(self._logden, 
-                                       int(np.ceil(nsample / 
-                                       opt_sample.shape[0])))[:nsample]
+                self._logden = np.tile(self._logden,
+                                       int(np.ceil(nsample /
+                                                   opt_sample.shape[0])))[:nsample]
 
         # this is our observed unpenalized estimator
         self.observed = observed.copy()
@@ -1274,9 +1163,9 @@ def __init__(self,
 
         if normal_sample is None:
             self._normal_sample = np.random.multivariate_normal(
-                                      mean=np.zeros(self.target_cov.shape[0]), 
-                                      cov=self.target_cov, 
-                                      size=(nsample,))
+                mean=np.zeros(self.target_cov.shape[0]),
+                cov=self.target_cov,
+                size=(nsample,))
         else:
             self._normal_sample = normal_sample
 
@@ -1303,12 +1192,11 @@ def pivot(self,
         nuisance = []
         translate_dirs = []
 
-        for (opt_sampler, 
-             opt_sample, 
-             _, 
-             _, 
+        for (opt_sampler,
+             opt_sample,
+             _,
+             _,
              target_score_cov) in self.opt_sampling_info:
-
             cur_score_cov = linear_func.dot(target_score_cov)
 
             # cur_nuisance is in the view's score coordinates
@@ -1316,10 +1204,10 @@ def pivot(self,
             nuisance.append(cur_nuisance)
             translate_dirs.append(cur_score_cov / target_cov)
 
-        weights = self._weights(sample_stat,  # normal sample 
-                                candidate,    # candidate value
-                                nuisance,       # nuisance sufficient stats for each view
-                                translate_dirs) # points will be moved like sample * target_score_cov
+        weights = self._weights(sample_stat,  # normal sample
+                                candidate,  # candidate value
+                                nuisance,  # nuisance sufficient stats for each view
+                                translate_dirs)  # points will be moved like sample * target_score_cov
 
         pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights)
 
@@ -1330,19 +1218,20 @@ def pivot(self,
         else:
             return 1 - pivot
 
-    def confidence_interval(self, 
-                            linear_func, 
-                            level=0.90, 
+    def confidence_interval(self,
+                            linear_func,
+                            level=0.90,
                             how_many_sd=20,
                             guess=None):
 
         sample_stat = self._normal_sample.dot(linear_func)
         observed_stat = self.observed.dot(linear_func)
-        
+
         def _rootU(gamma):
             return self.pivot(linear_func,
                               observed_stat + gamma,
                               alternative='less') - (1 - level) / 2.
+
         def _rootL(gamma):
             return self.pivot(linear_func,
                               observed_stat + gamma,
@@ -1352,10 +1241,10 @@ def _rootL(gamma):
             grid_min, grid_max = -how_many_sd * np.std(sample_stat), how_many_sd * np.std(sample_stat)
             upper = bisect(_rootU, grid_min, grid_max)
             lower = bisect(_rootL, grid_min, grid_max)
-            
+
         else:
             delta = 0.5 * (guess[1] - guess[0])
-            
+
             # find interval bracketing upper solution
             count = 0
             while True:
@@ -1383,14 +1272,14 @@ def _rootL(gamma):
 
     # Private methods
 
-    def _weights(self, 
+    def _weights(self,
                  stat_sample,
                  candidate,
                  nuisance,
                  translate_dirs):
 
         # Here we should loop through the views
-        # and move the score of each view 
+        # and move the score of each view
         # for each projected (through linear_func) normal sample
         # using the linear decomposition
 
@@ -1421,21 +1310,18 @@ def _weights(self,
 
         return np.exp(_logratio)
 
+
 def naive_confidence_intervals(diag_cov, observed, level=0.9):
     """
     Compute naive Gaussian based confidence
     intervals for target.
     Parameters
     ----------
-
     diag_cov : diagonal of a covariance matrix
-
     observed : np.float
         A vector of observed data of shape `target.shape`
-
     alpha : float (optional)
         1 - confidence level.
-
     Returns
     -------
     intervals : np.float
@@ -1444,223 +1330,72 @@ def naive_confidence_intervals(diag_cov, observed, level=0.9):
     alpha = 1 - level
     diag_cov = np.asarray(diag_cov)
     p = diag_cov.shape[0]
-    quantile = - ndist.ppf(alpha/2)
+    quantile = - ndist.ppf(alpha / 2)
     LU = np.zeros((2, p))
     for j in range(p):
         sigma = np.sqrt(diag_cov[j])
-        LU[0,j] = observed[j] - sigma * quantile
-        LU[1,j] = observed[j] + sigma * quantile
+        LU[0, j] = observed[j] - sigma * quantile
+        LU[1, j] = observed[j] + sigma * quantile
     return LU.T
 
+
 def naive_pvalues(diag_cov, observed, parameter):
     diag_cov = np.asarray(diag_cov)
     p = diag_cov.shape[0]
     pvalues = np.zeros(p)
     for j in range(p):
         sigma = np.sqrt(diag_cov[j])
-        pval = ndist.cdf((observed[j] - parameter[j])/sigma)
-        pvalues[j] = 2 * min(pval, 1-pval)
+        pval = ndist.cdf((observed[j] - parameter[j]) / sigma)
+        pvalues[j] = 2 * min(pval, 1 - pval)
     return pvalues
 
-# private function
-
-def _solve_barrier_affine_py(conjugate_arg,
-                             precision,
-                             feasible_point,
-                             con_linear,
-                             con_offset,
-                             step=1,
-                             nstep=1000,
-                             min_its=200,
-                             tol=1.e-10):
-
-    scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
-
-    if feasible_point is None:
-        feasible_point = 1. / scaling
-
-    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. \
-                          + np.log(1.+ 1./((con_offset - con_linear.dot(u))/ scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot(1./(scaling + con_offset - con_linear.dot(u)) -
-                                                                       1./(con_offset - con_linear.dot(u)))
-    barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1./((scaling + con_offset-con_linear.dot(u))**2.)
-                                                 + 1./((con_offset-con_linear.dot(u))**2.))).dot(con_linear)
-
-    current = feasible_point
-    current_value = np.inf
-
-    for itercount in range(nstep):
-        cur_grad = grad(current)
-
-        # make sure proposal is feasible
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * cur_grad
-            if np.all(con_offset-con_linear.dot(proposal) > 0):
-                break
-            step *= 0.5
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * cur_grad
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-            if count >= 20:
-                if not (np.isnan(proposed_value) or np.isnan(current_value)):
-                    break
-                else:
-                    raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value))
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its:
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    hess = np.linalg.inv(precision + barrier_hessian(current))
-    return current_value, current, hess
-
-def _solve_barrier_nonneg(conjugate_arg,
-                          precision,
-                          feasible_point=None,
-                          step=1,
-                          nstep=1000,
-                          tol=1.e-8):
-
-    scaling = np.sqrt(np.diag(precision))
-
-    if feasible_point is None:
-        feasible_point = 1. / scaling
-
-    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. + np.log(1.+ 1./(u / scaling)).sum()
-    grad = lambda u: -conjugate_arg + precision.dot(u) + (1./(scaling + u) - 1./u)
-    barrier_hessian = lambda u: (-1./((scaling + u)**2.) + 1./(u**2.))
-
-    current = feasible_point
-    current_value = np.inf
-
-    for itercount in range(nstep):
-        cur_grad = grad(current)
-
-        # make sure proposal is feasible
-
-        count = 0
-        while True:
-            count += 1
-            proposal = current - step * cur_grad
-            if np.all(proposal > 0):
-                break
-            step *= 0.5
-            if count >= 40:
-                raise ValueError('not finding a feasible point')
-
-        # make sure proposal is a descent
-
-        count = 0
-        while True:
-            proposal = current - step * cur_grad
-            proposed_value = objective(proposal)
-            if proposed_value <= current_value:
-                break
-            step *= 0.5
-            if count >= 20:
-                if not (np.isnan(proposed_value) or np.isnan(current_value)):
-                    break
-                else:
-                    raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value))
-
-        # stop if relative decrease is small
-
-        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
-            current = proposal
-            current_value = proposed_value
-            break
-
-        current = proposal
-        current_value = proposed_value
-
-        if itercount % 4 == 0:
-            step *= 2
-
-    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
-    return current_value, current, hess
-
-
-def selective_MLE(observed_target, 
-                  target_cov, 
-                  target_score_cov, 
-                  init_soln, # initial (observed) value of
-                             # optimization variables -- used as a
-                             # feasible point.  precise value used
-                             # only for independent estimator
+def selective_MLE(observed_target,
+                  target_cov,
+                  target_score_cov,
+                  init_soln,  # initial (observed) value of
+                  # optimization variables -- used as a
+                  # feasible point.  precise value used
+                  # only for independent estimator
                   cond_mean,
                   cond_cov,
                   logdens_linear,
                   linear_part,
                   offset,
-                  solve_args={'tol':1.e-12}, 
+                  randomizer_prec,
+                  score_offset,
+                  solve_args={'tol': 1.e-12},
                   level=0.9,
                   useC=False):
     """
     Selective MLE based on approximation of
     CGF.
-
     Parameters
     ----------
-
     observed_target : ndarray
         Observed estimate of target.
-
     target_cov : ndarray
         Estimated covaraince of target.
-       
     target_score_cov : ndarray
         Estimated covariance of target and score of randomized query.
-    
     init_soln : ndarray
         Feasible point for optimization problem.
-
     cond_mean : ndarray
         Conditional mean of optimization variables given target.
-
     cond_cov : ndarray
         Conditional covariance of optimization variables given target.
-    
     logdens_linear : ndarray
         Describes how conditional mean of optimization
         variables varies with target.
-    
     linear_part : ndarray
         Linear part of affine constraints: $\{o:Ao \leq b\}$
-
     offset : ndarray
         Offset part of affine constraints: $\{o:Ao \leq b\}$
-
     solve_args : dict, optional
         Arguments passed to solver.
-
     level : float, optional
         Confidence level.
-
     useC : bool, optional
         Use python or C solver.
-        
     """
 
     if np.asarray(observed_target).shape in [(), (0,)]:
@@ -1669,25 +1404,37 @@ def selective_MLE(observed_target,
     observed_target = np.atleast_1d(observed_target)
     prec_target = np.linalg.inv(target_cov)
 
+    prec_opt = np.linalg.inv(cond_cov)
+
     # target_lin determines how the conditional mean of optimization variables
     # vary with target
     # logdens_linear determines how the argument of the optimization density
     # depends on the score, not how the mean depends on score, hence the minus sign
 
-    target_lin = - logdens_linear.dot(target_score_cov.T.dot(prec_target)) 
-    target_offset = cond_mean - target_lin.dot(observed_target)
+    target_linear = target_score_cov.T.dot(prec_target)
+    target_offset = score_offset - target_linear.dot(observed_target)
 
-    prec_opt = np.linalg.inv(cond_cov)
+    target_lin = - logdens_linear.dot(target_linear)
+    target_off = cond_mean - target_lin.dot(observed_target)
+
+    if np.asarray(randomizer_prec).shape in [(), (0,)]:
+        _P = target_linear.T.dot(target_offset) * randomizer_prec
+        _prec = prec_target + (target_linear.T.dot(target_linear) * randomizer_prec) - target_lin.T.dot(prec_opt).dot(
+            target_lin)
+    else:
+        _P = target_linear.T.dot(randomizer_prec).dot(target_offset)
+        _prec = prec_target + (target_linear.T.dot(randomizer_prec).dot(target_linear)) - target_lin.T.dot(
+            prec_opt).dot(target_lin)
+
+    C = target_cov.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
-    useC= False
-    print("useC", useC)
     if useC:
         solver = solve_barrier_affine_C
     else:
         solver = _solve_barrier_affine_py
-    
+
     val, soln, hess = solver(conjugate_arg,
                              prec_opt,
                              init_soln,
@@ -1695,35 +1442,43 @@ def selective_MLE(observed_target,
                              offset,
                              **solve_args)
 
-    final_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln)))
-    ind_unbiased_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean
-                                                                                            - init_soln)))
+    final_estimator = target_cov.dot(_prec).dot(observed_target) \
+                      + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + C
+
+    unbiased_estimator = target_cov.dot(_prec).dot(observed_target) + target_cov.dot(
+        _P - target_lin.T.dot(prec_opt).dot(target_off))
 
     L = target_lin.T.dot(prec_opt)
-    observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
+    observed_info_natural = _prec + L.dot(target_lin) - L.dot(hess.dot(L.T))
+
     observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))
 
     Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+
     pvalues = ndist.cdf(Z_scores)
+
     pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
 
     alpha = 1 - level
     quantile = ndist.ppf(1 - alpha / 2.)
-    intervals = np.vstack([final_estimator - 
+
+    intervals = np.vstack([final_estimator -
                            quantile * np.sqrt(np.diag(observed_info_mean)),
-                           final_estimator + 
+                           final_estimator +
                            quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
-    log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg)/2.
-    result = pd.DataFrame({'MLE':final_estimator,
-                           'SE':np.sqrt(np.diag(observed_info_mean)),
-                           'Zvalue':Z_scores,
-                           'pvalue':pvalues,
-                           'lower_confidence':intervals[:,0],
-                           'upper_confidence':intervals[:,1],
-                           'unbiased':ind_unbiased_estimator})
+    log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
+
+    result = pd.DataFrame({'MLE': final_estimator,
+                           'SE': np.sqrt(np.diag(observed_info_mean)),
+                           'Zvalue': Z_scores,
+                           'pvalue': pvalues,
+                           'lower_confidence': intervals[:, 0],
+                           'upper_confidence': intervals[:, 1],
+                           'unbiased': unbiased_estimator})
     return result, observed_info_mean, log_ref
 
+
 def normalizing_constant(target_parameter,
                          observed_target,
                          target_cov,
@@ -1735,49 +1490,38 @@ def normalizing_constant(target_parameter,
                          linear_part,
                          offset,
                          useC=False):
-
     """
-
     Approximation of normalizing constant
     in affine constrained Gaussian.
-
     Parameters
     ----------
-
     observed_target : ndarray
         Observed estimate of target.
-
     target_cov : ndarray
         Estimated covaraince of target.
-       
+
     target_score_cov : ndarray
         Estimated covariance of target and score of randomized query.
-    
+
     init_soln : ndarray
         Feasible point for optimization problem.
-
     cond_mean : ndarray
         Conditional mean of optimization variables given target.
-
     cond_cov : ndarray
         Conditional covariance of optimization variables given target.
-    
+
     logdens_linear : ndarray
         Describes how conditional mean of optimization
         variables varies with target.
-    
+
     linear_part : ndarray
         Linear part of affine constraints: $\{o:Ao \leq b\}$
-
     offset : ndarray
         Offset part of affine constraints: $\{o:Ao \leq b\}$
-
     solve_args : dict, optional
         Arguments passed to solver.
-
     level : float, optional
         Confidence level.
-
     useC : bool, optional
         Use python or C solver.
     """
@@ -1799,25 +1543,25 @@ def normalizing_constant(target_parameter,
     nopt = cond_cov.shape[0]
     full_Q = np.zeros((ntarget + nopt,
                        ntarget + nopt))
-    full_Q[:ntarget][:,:ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear)))
-    full_Q[:ntarget][:,ntarget:] = -target_linear.dot(cond_precision)
-    full_Q[ntarget:][:,:ntarget] = (-target_linear.dot(cond_precision)).T
-    full_Q[ntarget:][:,ntarget:] = cond_precision
+    full_Q[:ntarget][:, :ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear)))
+    full_Q[:ntarget][:, ntarget:] = -target_linear.dot(cond_precision)
+    full_Q[ntarget:][:, :ntarget] = (-target_linear.dot(cond_precision)).T
+    full_Q[ntarget:][:, ntarget:] = cond_precision
 
-    linear_term = np.hstack([-prec_target.dot(target_parameter) + 
-                              corrected_mean.dot(cond_precision).dot(target_linear), 
-                              -cond_precision.dot(corrected_mean)])
+    linear_term = np.hstack([-prec_target.dot(target_parameter) +
+                             corrected_mean.dot(cond_precision).dot(target_linear),
+                             -cond_precision.dot(corrected_mean)])
 
     constant_term = 0.5 * (np.sum(target_parameter * prec_target.dot(target_parameter)) +
                            np.sum(corrected_mean * cond_precision.dot(corrected_mean)))
 
     full_con_linear = np.zeros((linear_part.shape[0],
                                 ntarget + nopt))
-    full_con_linear[:,ntarget:] = linear_part
+    full_con_linear[:, ntarget:] = linear_part
     full_feasible = np.zeros(ntarget + nopt)
     full_feasible[ntarget:] = feasible_point
 
-    solve_args={'tol':1.e-12}
+    solve_args = {'tol': 1.e-12}
 
     if useC:
         solver = solve_barrier_affine_C
@@ -1825,34 +1569,33 @@ def normalizing_constant(target_parameter,
         solver = _solve_barrier_affine_py
 
     value, soln, hess = solver(-linear_term,
-                                full_Q,
-                                full_feasible,
-                                full_con_linear,
-                                offset,
-                                **solve_args)
-    return (-value + 0.5 * np.sum(target_parameter * prec_target.dot(target_parameter)), 
-             soln[:ntarget], 
-             hess[:ntarget][:,:ntarget])
+                               full_Q,
+                               full_feasible,
+                               full_con_linear,
+                               offset,
+                               **solve_args)
+    return (-value + 0.5 * np.sum(target_parameter * prec_target.dot(target_parameter)),
+            soln[:ntarget],
+            hess[:ntarget][:, :ntarget])
 
 
 def _bisect(f, lb, ub, min_iter=20, max_iter=100, tol=1.e-3):
-
     while True:
         sign_l = np.sign(f(lb))
         sign_u = np.sign(f(ub))
         mid = 0.5 * (lb + ub)
         f_mid = f(mid)
         if sign_l == 1:
-            if f_mid > 0: # we should move closer to upper
+            if f_mid > 0:  # we should move closer to upper
                 lb = mid
             else:
                 ub = mid
         else:
-            if f_mid > 0: # we should move closer to lower
+            if f_mid > 0:  # we should move closer to lower
                 ub = mid
             else:
                 lb = mid
-                
+
         if np.fabs(f_mid) < tol:
             break
-    return mid
+    return mid
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 17ecb0423..e846f60e3 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -4,12 +4,14 @@
 from selectinf.randomized.lasso import lasso, full_targets, selected_targets, debiased_targets
 from selectinf.tests.instance import gaussian_instance
 
-def test_full_targets(n=200, 
-                      p=1000, 
-                      signal_fac=0.5, 
-                      s=5, sigma=3, 
-                      rho=0.4, 
-                      randomizer_scale=0.5,
+
+def test_full_targets(n=200,
+                      p=1000,
+                      signal_fac=0.5,
+                      s=5,
+                      sigma=3,
+                      rho=0.4,
+                      randomizer_scale=0.7,
                       full_dispersion=False):
     """
     Compare to R randomized lasso
@@ -22,7 +24,7 @@ def test_full_targets(n=200,
                           p=p,
                           signal=signal,
                           s=s,
-                          equicorrelated=False,
+                          equicorrelated=True,
                           rho=rho,
                           sigma=sigma,
                           random_signs=True)[:3]
@@ -51,7 +53,7 @@ def test_full_targets(n=200,
             else:
                 dispersion = None
 
-            if n>p:
+            if n > p:
                 (observed_target,
                  cov_target,
                  cov_target_score,
@@ -81,13 +83,13 @@ def test_full_targets(n=200,
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
-def test_selected_targets(n=2000, 
-                          p=200, 
-                          signal_fac=1.,
-                          s=5, 
-                          sigma=3, 
-                          rho=0.4, 
-                          randomizer_scale=1,
+def test_selected_targets(n=2000,
+                          p=200,
+                          signal_fac=1.2,
+                          s=5,
+                          sigma=2,
+                          rho=0.7,
+                          randomizer_scale=1.,
                           full_dispersion=True):
     """
     Compare to R randomized lasso
@@ -101,7 +103,7 @@ def test_selected_targets(n=2000,
                           p=p,
                           signal=signal,
                           s=s,
-                          equicorrelated=False,
+                          equicorrelated=True,
                           rho=rho,
                           sigma=sigma,
                           random_signs=True)[:3]
@@ -113,11 +115,12 @@ def test_selected_targets(n=2000,
         n, p = X.shape
 
         sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+        W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
 
         conv = const(X,
                      Y,
                      W,
+                     ridge_term=0.,
                      randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
@@ -134,7 +137,7 @@ def test_selected_targets(n=2000,
              cov_target_score,
              alternatives) = selected_targets(conv.loglike,
                                               conv._W,
-                                              nonzero, 
+                                              nonzero,
                                               dispersion=dispersion)
 
             result = conv.selective_MLE(observed_target,
@@ -143,18 +146,17 @@ def test_selected_targets(n=2000,
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
-            
+
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
 
-            print("observed_opt_state ", conv.observed_opt_state)
             # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
-def test_instance():
 
+def test_instance():
     n, p, s = 500, 100, 5
     X = np.random.standard_normal((n, p))
     beta = np.zeros(p)
@@ -169,6 +171,7 @@ def test_instance():
 
     M = E.copy()
     M[-3:] = 1
+    print("check ", M)
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
     (observed_target,
      cov_target,
@@ -191,10 +194,11 @@ def test_instance():
 
     coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
     print("observed_opt_state ", L.observed_opt_state)
-    #print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
+    # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
     return coverage
 
+
 # def main(nsim=500):
 #
 #     cover = []
@@ -279,7 +283,7 @@ def main(nsim=500, full=False):
     P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
 
-    n, p, s = 500, 100, 5
+    n, p, s = 500, 100, 0
 
     for i in range(nsim):
         if full:
@@ -291,8 +295,7 @@ def main(nsim=500, full=False):
             avg_length = intervals[:, 1] - intervals[:, 0]
         else:
             full_dispersion = True
-            p0, pA, cover_, intervals = test_selected_targets_disperse(n=n, p=p, s=int(p/2),
-                                                              full_dispersion=full_dispersion)
+            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
             avg_length = intervals[:, 1] - intervals[:, 0]
 
         cover.extend(cover_)
@@ -303,5 +306,6 @@ def main(nsim=500, full=False):
         #     np.mean(avg_length), 'null pvalue + power + length')
         print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 
+
 if __name__ == "__main__":
-    main(nsim=100)
+    main(nsim=50)
\ No newline at end of file

From 24ab71c1fbc1213a8f85212b138d364faa58c0f3 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 7 Jun 2021 12:23:17 -0400
Subject: [PATCH 092/187] updated posterior inference

---
 selectinf/randomized/posterior_inference.py  | 114 ++++++++++---------
 selectinf/randomized/tests/test_posterior.py |  78 ++++++++-----
 2 files changed, 109 insertions(+), 83 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 403a5a1f0..ef2d184a5 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -4,31 +4,25 @@
 from scipy.stats import norm as ndist, invgamma
 from scipy.linalg import fractional_matrix_power
 
-from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from ..algorithms.barrier_affine import solve_barrier_affine_py
 
-class posterior(object):
 
+class posterior(object):
     """
     Parameters
     ----------
-
     observed_target : ndarray
         Observed estimate of target.
-
     cov_target : ndarray
         Estimated covariance of target.
-
     cov_target_score : ndarray
         Estimated covariance of target and score of randomized query.
-
     prior : callable
         A callable object that takes a single argument
         `parameter` of the same shape as `observed_target`
         and returns (value of log prior, gradient of log prior)
-
     dispersion : float, optional
-        A dispersion parameter for likelihood. 
-
+        A dispersion parameter for likelihood.
     solve_args : dict
         Arguments passed to solver of affine barrier problem.
     """
@@ -40,29 +34,34 @@ def __init__(self,
                  cov_target_score,
                  prior,
                  dispersion=1,
-                 solve_args={'tol':1.e-12}):
+                 solve_args={'tol': 1.e-12}):
 
         self.solve_args = solve_args
-        
+
         linear_part = query.sampler.affine_con.linear_part
         offset = query.sampler.affine_con.offset
         logdens_linear = query.sampler.logdens_transform[0]
+        _, randomizer_prec = query.randomizer.cov_prec
+        score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
         result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
                                                                  cov_target,
                                                                  cov_target_score)
-            
+
         ### Note for an informative prior we might want to change this...
-        
+
         self.ntarget = cov_target.shape[0]
         self.nopt = query.cond_cov.shape[0]
 
         self.cond_precision = np.linalg.inv(query.cond_cov)
+        self.cov_target = cov_target
         self.prec_target = np.linalg.inv(cov_target)
 
         self.observed_target = observed_target
         self.cov_target_score = cov_target_score
         self.logdens_linear = logdens_linear
+        self.randomizer_prec = randomizer_prec
+        self.score_offset = score_offset
 
         self.feasible_point = query.observed_opt_state
         self.cond_mean = query.cond_mean
@@ -82,29 +81,24 @@ def log_posterior(self,
                       sigma=1):
 
         """
-
         Parameters
         ----------
-
         target_parameter : ndarray
             Value of parameter at which to evaluate
             posterior and its gradient.
-
         sigma : ndarray
             Noise standard deviation.
-
         """
 
-        sigmasq = sigma**2
-        mean_marginal = self.linear_coef.dot(target_parameter) + self.offset_coef
+        sigmasq = sigma ** 2
+
+        target = self.S.dot(target_parameter) + self.r
+
+        mean_marginal = self.linear_coef.dot(target) + self.offset_coef
         prec_marginal = self.prec_marginal
         conjugate_marginal = prec_marginal.dot(mean_marginal)
 
-        useC = True
-        if useC:
-            solver = solve_barrier_affine_C
-        else:
-            solver = _solve_barrier_affine_py
+        solver = solve_barrier_affine_py
 
         val, soln, hess = solver(conjugate_marginal,
                                  prec_marginal,
@@ -113,19 +107,18 @@ def log_posterior(self,
                                  self.offset,
                                  **self.solve_args)
 
-        log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal)/2.
+        log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2.
 
-        log_lik = -(((self.observed_target - target_parameter).T.dot(self.prec_target).dot(self.observed_target - target_parameter)) / 2.
-                  - log_normalizer)
+        log_lik = -(((self.observed_target - target).T.dot(self._prec).dot(
+            self.observed_target - target)) / 2. - log_normalizer)
 
-        grad_lik = (self.prec_target.dot(self.observed_target) -
-                    self.prec_target.dot(target_parameter) \
-                    - self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal))
+        grad_lik = self.S.T.dot(self._prec.dot(self.observed_target) - self._prec.dot(target) - self.linear_coef.T.dot(
+            prec_marginal.dot(soln) - conjugate_marginal))
 
         log_prior, grad_prior = self.prior(target_parameter)
 
         return (self.dispersion * (log_lik - self.log_ref) / sigmasq + log_prior,
-                self.dispersion * grad_lik/sigmasq + grad_prior)
+                self.dispersion * grad_lik / sigmasq + grad_prior)
 
     ### Private method
 
@@ -136,25 +129,37 @@ def _set_marginal_parameters(self):
         of randomization as well how to compute
         implied mean as a function of the true parameters.
         """
-        target_linear = -self.logdens_linear.dot(self.cov_target_score.T.dot(self.prec_target))
 
-        implied_precision = np.zeros((self.ntarget + self.nopt, self.ntarget + self.nopt))
-        implied_precision[:self.ntarget][:,:self.ntarget] = (self.prec_target +
-                                                           target_linear.T.dot(self.cond_precision.dot(target_linear)))
-        implied_precision[:self.ntarget][:,self.ntarget:] = -target_linear.T.dot(self.cond_precision)
-        implied_precision[self.ntarget:][:,:self.ntarget] = (-target_linear.T.dot(self.cond_precision)).T
-        implied_precision[self.ntarget:][:,self.ntarget:] = self.cond_precision
+        target_linear = self.cov_target_score.T.dot(self.prec_target)
+        target_offset = self.score_offset - target_linear.dot(self.observed_target)
+
+        target_lin = -self.logdens_linear.dot(target_linear)
+        target_off = self.cond_mean - target_lin.dot(self.observed_target)
+
+        self.linear_coef = target_lin
+        self.offset_coef = self.cond_mean - target_lin.dot(self.observed_target)
+
+        if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
+            _prec = self.prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) \
+                    - target_lin.T.dot(self.cond_precision).dot(target_lin)
+            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+        else:
+            _prec = self.prec_target + (target_linear.T.dot(self.randomizer_prec).dot(target_linear)) \
+                    - target_lin.T.dot(self.cond_precision).dot(target_lin)
+            _P = target_linear.T.dot(self.randomizer_prec).dot(target_offset)
+
+        _Q = np.linalg.inv(_prec + target_lin.T.dot(self.cond_precision).dot(target_lin))
+        self.prec_marginal = self.cond_precision - self.cond_precision.dot(target_lin).dot(_Q).dot(target_lin.T).dot(
+            self.cond_precision)
 
-        implied_cov = np.linalg.inv(implied_precision)
-        self.linear_coef = implied_cov[self.ntarget:][:,:self.ntarget].dot(self.prec_target)
+        r = np.linalg.inv(_prec).dot(target_lin.T.dot(self.cond_precision).dot(target_off) - _P)
+        S = np.linalg.inv(_prec).dot(self.prec_target)
 
-        target_offset = self.cond_mean - target_linear.dot(self.observed_target)
-        M = implied_cov[self.ntarget:][:,self.ntarget:].dot(self.cond_precision.dot(target_offset))
-        N = -target_linear.T.dot(self.cond_precision).dot(target_offset)
-        self.offset_coef = implied_cov[self.ntarget:][:,:self.ntarget].dot(N) + M
+        self.r = r
+        self.S = S
+        # print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
+        self._prec = _prec
 
-        self.cov_marginal = implied_cov[self.ntarget:][:,self.ntarget:]
-        self.prec_marginal = np.linalg.inv(self.cov_marginal)
 
 ### sampling methods
 
@@ -163,7 +168,6 @@ def langevin_sampler(selective_posterior,
                      nburnin=100,
                      proposal_scale=None,
                      step=1.):
-
     state = selective_posterior.initial_estimate
     stepsize = 1. / (step * selective_posterior.ntarget)
 
@@ -180,20 +184,20 @@ def langevin_sampler(selective_posterior,
 
     for i, sample in enumerate(sampler):
         sampler.scaling = np.sqrt(selective_posterior.dispersion)
-        samples[i,:] = sample.copy()
+        samples[i, :] = sample.copy()
         if i == nsample - 1:
             break
 
     return samples[nburnin:, :]
 
+
 def gibbs_sampler(selective_posterior,
                   nsample=2000,
                   nburnin=100,
                   proposal_scale=None,
                   step=1.):
-
     state = selective_posterior.initial_estimate
-    stepsize = 1./(step*selective_posterior.ntarget)
+    stepsize = 1. / (step * selective_posterior.ntarget)
 
     if proposal_scale is None:
         proposal_scale = selective_posterior.inverse_info
@@ -207,20 +211,20 @@ def gibbs_sampler(selective_posterior,
     scale_samples = np.zeros(nsample)
     scale_update = np.sqrt(selective_posterior.dispersion)
     for i in range(nsample):
-
         sample = sampler.__next__()
         samples[i, :] = sample
 
         scale_update_sq = invgamma.rvs(a=(0.1 +
-                                       selective_posterior.ntarget +
-                                       selective_posterior.ntarget/2),
-                                       scale=0.1-((scale_update**2)*sampler.posterior_[0]),
+                                          selective_posterior.ntarget +
+                                          selective_posterior.ntarget / 2),
+                                       scale=0.1 - ((scale_update ** 2) * sampler.posterior_[0]),
                                        size=1)
         scale_samples[i] = np.sqrt(scale_update_sq)
         sampler.scaling = np.sqrt(scale_update_sq)
 
     return samples[nburnin:, :], scale_samples[nburnin:]
 
+
 class langevin(object):
 
     def __init__(self,
@@ -254,7 +258,7 @@ def __next__(self):
         while True:
             self.posterior_ = self.gradient_map(self.state, self.scaling)
             candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_[1])
-                        + np.sqrt(2.)* (self.proposal_sqrt.dot(self._noise.rvs(self._shape))) * self._sqrt_step)
+                         + np.sqrt(2.) * (self.proposal_sqrt.dot(self._noise.rvs(self._shape))) * self._sqrt_step)
 
             if not np.all(np.isfinite(self.gradient_map(candidate, self.scaling)[1])):
                 self.stepsize *= 0.5
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index c9e3fc118..1b369c351 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -7,6 +7,7 @@
 from ..posterior_inference import (langevin_sampler,
                                    gibbs_sampler)
 
+
 def test_Langevin(n=500,
                   p=100,
                   signal_fac=1.,
@@ -16,7 +17,6 @@ def test_Langevin(n=500,
                   randomizer_scale=1.,
                   nsample=1500,
                   nburnin=100):
-
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
@@ -39,6 +39,7 @@ def test_Langevin(n=500,
     conv = const(X,
                  Y,
                  W,
+                 ridge_term=0.,
                  randomizer_scale=randomizer_scale * dispersion)
 
     signs = conv.fit()
@@ -74,12 +75,32 @@ def test_Langevin(n=500,
 
     return np.mean(coverage), np.mean(length)
 
-def test_instance(nsample=100, nburnin=50):
 
+def test_coverage(nsim=100):
+    cov, len = 0., 0.
+
+    for i in range(nsim):
+        cov_, len_ = test_Langevin(n=500,
+                                   p=100,
+                                   signal_fac=1.,
+                                   s=5,
+                                   sigma=3.,
+                                   rho=0.2,
+                                   randomizer_scale=1.,
+                                   nsample=1500,
+                                   nburnin=100)
+
+        cov += cov_
+        len += len_
+
+        print("coverage and lengths ", i, cov / (i + 1.), len / (i + 1.))
+
+
+def test_instance(nsample=100, nburnin=50):
     n, p, s = 500, 100, 5
     X = np.random.standard_normal((n, p))
     beta = np.zeros(p)
-    #beta[:s] = np.sqrt(2 * np.log(p) / n)
+    # beta[:s] = np.sqrt(2 * np.log(p) / n)
     Y = X.dot(beta) + np.random.standard_normal(n)
 
     scale_ = np.std(Y)
@@ -115,7 +136,6 @@ def test_instance(nsample=100, nburnin=50):
     lci = np.percentile(samples, 5, axis=0)
     uci = np.percentile(samples, 95, axis=0)
 
-
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
     coverage = (lci < beta_target) * (uci > beta_target)
     length = uci - lci
@@ -124,12 +144,11 @@ def test_instance(nsample=100, nburnin=50):
 
 
 def test_flexible_prior1(nsample=100, nburnin=50):
-
     np.random.seed(0)
     n, p, s = 500, 100, 5
     X = np.random.standard_normal((n, p))
     beta = np.zeros(p)
-    #beta[:s] = np.sqrt(2 * np.log(p) / n)
+    # beta[:s] = np.sqrt(2 * np.log(p) / n)
     Y = X.dot(beta) + np.random.standard_normal(n)
 
     scale_ = np.std(Y)
@@ -150,9 +169,10 @@ def test_flexible_prior1(nsample=100, nburnin=50):
                                       dispersion=dispersion)
 
     Di = 1. / (200 * np.diag(cov_target))
+
     def prior(target_parameter):
         grad_prior = -target_parameter * Di
-        log_prior = -np.sum(target_parameter**2 * Di)
+        log_prior = -np.sum(target_parameter ** 2 * Di)
         return log_prior, grad_prior
 
     seed_state = np.random.get_state()
@@ -181,14 +201,13 @@ def prior(target_parameter):
     np.testing.assert_equal(Z1, Z2)
     np.testing.assert_equal(W1, W2)
     np.testing.assert_allclose(samples1, samples2, rtol=1.e-3)
-    
 
-def test_flexible_prior2(nsample=1000, nburnin=50):
 
+def test_flexible_prior2(nsample=1000, nburnin=50):
     n, p, s = 500, 100, 5
     X = np.random.standard_normal((n, p))
     beta = np.zeros(p)
-    #beta[:s] = np.sqrt(2 * np.log(p) / n)
+    # beta[:s] = np.sqrt(2 * np.log(p) / n)
     Y = X.dot(beta) + np.random.standard_normal(n)
 
     scale_ = np.std(Y)
@@ -208,10 +227,11 @@ def test_flexible_prior2(nsample=1000, nburnin=50):
                                       M,
                                       dispersion=dispersion)
 
-    prior_var = 0.05**2
+    prior_var = 0.05 ** 2
+
     def prior(target_parameter):
         grad_prior = -target_parameter / prior_var
-        log_prior = -np.linalg.norm(target_parameter)**2 /(2. * prior_var)
+        log_prior = -np.linalg.norm(target_parameter) ** 2 / (2. * prior_var)
         return log_prior, grad_prior
 
     posterior_inf = L.posterior(observed_target,
@@ -220,19 +240,19 @@ def prior(target_parameter):
                                 dispersion=dispersion,
                                 prior=prior)
     adaptive_proposal = np.linalg.inv(np.linalg.inv(posterior_inf.inverse_info) +
-                                      np.identity(posterior_inf.inverse_info.shape[0]) / 0.05**2)
+                                      np.identity(posterior_inf.inverse_info.shape[0]) / 0.05 ** 2)
     samples = langevin_sampler(posterior_inf,
                                nsample=nsample,
                                proposal_scale=adaptive_proposal,
                                nburnin=nburnin)
     return samples
-    
+
+
 def test_hiv_data(nsample=10000,
                   nburnin=500,
                   level=0.90,
                   split_proportion=0.50,
-                  seedn = 1):
-
+                  seedn=1):
     np.random.seed(seedn)
 
     alpha = (1 - level) / 2
@@ -242,7 +262,7 @@ def test_hiv_data(nsample=10000,
     Y *= 15
     n, p = X.shape
     X /= np.sqrt(n)
-    
+
     ols_fit = np.linalg.pinv(X).dot(Y)
     _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
 
@@ -272,7 +292,7 @@ def test_hiv_data(nsample=10000,
                                            cov_target,
                                            cov_target_score,
                                            level=level,
-                                           solve_args={'tol':1.e-12})[:2]
+                                           solve_args={'tol': 1.e-12})[:2]
 
     approx_inf = conv.approximate_grid_inference(observed_target,
                                                  cov_target,
@@ -288,15 +308,15 @@ def test_hiv_data(nsample=10000,
                                         nburnin=nburnin,
                                         step=1.)
 
-    lower_langevin = np.percentile(samples_langevin, int(alpha*100), axis=0)
-    upper_langevin = np.percentile(samples_langevin, int((1-alpha)*100), axis=0)
+    lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0)
+    upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0)
 
     samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
                                                nsample=nsample,
                                                nburnin=nburnin)
 
-    lower_gibbs = np.percentile(samples_gibbs, int(alpha* 100), axis=0)
-    upper_gibbs = np.percentile(samples_gibbs, int((1-alpha)*100), axis=0)
+    lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0)
+    upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0)
 
     naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
     naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
@@ -313,16 +333,16 @@ def test_hiv_data(nsample=10000,
     print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
           np.mean(upper_langevin - lower_langevin),
           np.mean(upper_gibbs - lower_gibbs),
-          np.mean((2*Z_quantile)*np.sqrt(np.diag(posterior_inf.inverse_info))),
+          np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))),
           np.mean(mle['upper_confidence'] - mle['lower_confidence']),
           np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
-    )
+          )
 
-    print("lengths: naive intervals ", np.mean(naive_intervals[:,1]-naive_intervals[:,0]))
+    print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]))
 
     print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
 
-    scale_interval = np.percentile(scale_gibbs, [alpha*100, (1-alpha)*100])
+    scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100])
     output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
                            'Langevin_upper_credible': upper_langevin,
                            'Gibbs_lower_credible': lower_gibbs,
@@ -331,7 +351,7 @@ def test_hiv_data(nsample=10000,
                            'MLE_upper_confidence': mle['upper_confidence'],
                            'approx_lower_confidence': approx_inf['lower_confidence'],
                            'approx_upper_confidence': approx_inf['upper_confidence'],
-                           'Split_lower_confidence': split_intervals[:,0],
+                           'Split_lower_confidence': split_intervals[:, 0],
                            'Split_upper_confidence': split_intervals[:, 1],
                            'Naive_lower_confidence': naive_intervals[:, 0],
                            'Naive_upper_confidence': naive_intervals[:, 1]
@@ -339,7 +359,9 @@ def test_hiv_data(nsample=10000,
 
     return output, scale_interval, _sigma
 
+
 if __name__ == "__main__":
-    test_hiv_data(split_proportion=0.50)
+    # test_hiv_data(split_proportion=0.50)
+    test_coverage(nsim=100)
 
 

From 7062cd31d76f911ce5fb61a08ce0bcb0e1b1768e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 7 Jun 2021 12:27:02 -0400
Subject: [PATCH 093/187] updates to approx_reference

---
 selectinf/randomized/approx_reference.py      | 135 +++++++++++-------
 .../randomized/tests/test_approx_reference.py |  57 +++-----
 2 files changed, 107 insertions(+), 85 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 0041cccb7..a706b6789 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -3,8 +3,9 @@
 import numpy as np, pandas as pd
 from scipy.interpolate import interp1d
 
-from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from ..distributions.discrete_family import discrete_family
+from ..algorithms.barrier_affine import solve_barrier_affine_py
+
 
 class approximate_grid_inference(object):
 
@@ -13,31 +14,24 @@ def __init__(self,
                  observed_target,
                  target_cov,
                  target_score_cov,
-                 solve_args={'tol':1.e-12}):
+                 solve_args={'tol': 1.e-12}):
 
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         query : `gaussian_query`
             A Gaussian query which has information
             to describe implied Gaussian.
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         solve_args : dict, optional
             Arguments passed to solver.
-
         """
 
         self.solve_args = solve_args
@@ -46,8 +40,7 @@ def __init__(self,
                                                    target_cov,
                                                    target_score_cov,
                                                    solve_args=solve_args)[:2]
-        mle = result['MLE']
-        
+
         self.linear_part = query.sampler.affine_con.linear_part
         self.offset = query.sampler.affine_con.offset
 
@@ -62,15 +55,20 @@ def __init__(self,
 
         self.init_soln = query.observed_opt_state
 
+        self.randomizer_prec = query.sampler.randomizer_prec
+        self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
+
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
         ngrid = 60
 
         self.stat_grid = np.zeros((ntarget, ngrid))
         for j in range(ntarget):
-            self.stat_grid[j,:] = np.linspace(observed_target[j] - 1.5*_scale[j],
-                                              observed_target[j] + 1.5*_scale[j],
-                                              num=ngrid)
+            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                               observed_target[j] + 1.5 * _scale[j],
+                                               num=ngrid)
+
+        self.opt_linear = query.opt_linear
 
     def summary(self,
                 alternatives=None,
@@ -79,20 +77,15 @@ def summary(self,
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         """
 
         if parameter is not None:
@@ -102,13 +95,13 @@ def summary(self,
             pivots = None
 
         pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
-                                     alternatives=alternatives)
+                                      alternatives=alternatives)
         lower, upper = self._approx_intervals(level=level)
 
-        result = pd.DataFrame({'target':self.observed_target,
-                               'pvalue':pvalues,
-                               'lower_confidence':lower,
-                               'upper_confidence':upper})
+        result = pd.DataFrame({'target': self.observed_target,
+                               'pvalue': pvalues,
+                               'lower_confidence': lower,
+                               'upper_confidence': upper})
 
         if not np.all(parameter == 0):
             result.insert(4, 'pivot', pivots)
@@ -117,31 +110,30 @@ def summary(self,
         return result
 
     def _approx_log_reference(self,
-                             observed_target,
-                             target_cov,
-                             target_score_cov,
-                             grid):
+                              observed_target,
+                              target_cov,
+                              target_score_cov,
+                              grid):
 
         """
         Approximate the log of the reference density on a grid.
-
         """
         if np.asarray(observed_target).shape in [(), (0,)]:
-           raise ValueError('no target specified')
+            raise ValueError('no target specified')
 
         prec_target = np.linalg.inv(target_cov)
         target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
 
         ref_hat = []
-        solver = solve_barrier_affine_C
+        solver = solve_barrier_affine_py
         for k in range(grid.shape[0]):
             # in the usual D = N + Gamma theta.hat,
             # target_lin is "something" times Gamma,
             # where "something" comes from implied Gaussian
             # cond_mean is "something" times D
             # Gamma is target_score_cov.T.dot(prec_target)
-            
-            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) + 
+
+            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
                               self.cond_mean)
             conjugate_arg = self.prec_opt.dot(cond_mean_grid)
 
@@ -158,14 +150,19 @@ def _approx_log_reference(self,
 
     def _construct_families(self):
 
+        self._construct_density()
+
         self._families = []
+
         for m in range(self.ntarget):
             p = self.target_score_cov.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
+
             target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            var_target = target_cov_uni[0, 0]
             target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
+            var_target = 1. / ((self.precs[m])[0, 0])
+
             approx_log_ref = self._approx_log_reference(observed_target_uni,
                                                         target_cov_uni,
                                                         target_score_cov_uni,
@@ -179,15 +176,14 @@ def _construct_families(self):
 
             grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
             logW = (approx_fn(grid) -
-                   0.5 * (grid - self.observed_target[m])**2 / var_target)
+                    0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
             logW -= logW.max()
-            weights = np.exp(logW)
 
             # construction of families follows `selectinf.learning.core`
-            
+
             self._families.append(discrete_family(grid,
-                                                  weights))
-            
+                                                  np.exp(logW)))
+
             # logG = - 0.5 * grid**2 / var_target
             # logG -= logG.max()
             # import matplotlib.pyplot as plt
@@ -207,22 +203,24 @@ def _approx_pivots(self,
 
         if not hasattr(self, "_families"):
             self._construct_families()
-            
+
         if alternatives is None:
             alternatives = ['twosided'] * self.ntarget
 
         pivot = []
+        p = self.target_score_cov.shape[1]
 
         for m in range(self.ntarget):
+
             family = self._families[m]
-            observed_target = self.observed_target[m]
-            var_target = self.target_cov[m, m]
+            var_target = 1. / ((self.precs[m])[0, 0])
 
+            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
+            #print("mean ", np.allclose(mean[0], mean_parameter[m]), self.r[m], self.S[m])
             # construction of pivot from families follows `selectinf.learning.core`
 
-            _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
-                              x=observed_target)
-            #_cdf = family.cdf(mean_parameter[m]/var_target, x=observed_target)
+            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
+
             if alternatives[m] == 'twosided':
                 pivot.append(2 * min(_cdf, 1 - _cdf))
             elif alternatives[m] == 'greater':
@@ -238,18 +236,57 @@ def _approx_intervals(self,
 
         if not hasattr(self, "_families"):
             self._construct_families()
-            
+
         lower, upper = [], []
 
         for m in range(self.ntarget):
             # construction of intervals from families follows `selectinf.learning.core`
             family = self._families[m]
             observed_target = self.observed_target[m]
+
             l, u = family.equal_tailed_interval(observed_target,
-                                                        alpha=1-level)
-            var_target = self.target_cov[m, m]
-            lower.append(l *  var_target + observed_target)
+                                                alpha=1 - level)
+
+            var_target = 1. / ((self.precs[m])[0, 0])
+
+            lower.append(l * var_target + observed_target)
             upper.append(u * var_target + observed_target)
 
         return np.asarray(lower), np.asarray(upper)
 
+    ### Private method
+    def _construct_density(self):
+
+        precs = {}
+        S = {}
+        r = {}
+
+        p = self.target_score_cov.shape[1]
+
+        for m in range(self.ntarget):
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
+            prec_target = 1. / target_cov_uni
+            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+
+            target_linear = target_score_cov_uni.T.dot(prec_target)
+            target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
+                (target_linear.shape[0],))
+
+            target_lin = -self.logdens_linear.dot(target_linear)
+            target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
+
+            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+                self.prec_opt).dot(target_lin)
+
+            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
+            _S = np.linalg.inv(_prec).dot(prec_target)
+
+            S[m] = _S
+            r[m] = _r
+            precs[m] = _prec
+
+        self.precs = precs
+        self.S = S
+        self.r = r
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index aaf2544c4..62c83f7fa 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -63,10 +63,10 @@ def test_approx_pivot(n=500,
                       rho=0.4,
                       randomizer_scale=1.):
 
-    while True:
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
 
-        inst, const = gaussian_instance, lasso.gaussian
-        signal = np.sqrt(signal_fac * 2 * np.log(p))
+    while True:
 
         X, Y, beta = inst(n=n,
                           p=p,
@@ -75,55 +75,39 @@ def test_approx_pivot(n=500,
                           equicorrelated=True,
                           rho=rho,
                           sigma=sigma,
-                          random_signs=True)[:3]
+                          random_signs=False)[:3]
 
         n, p = X.shape
 
         sigma_ = np.std(Y)
-        if n > p:
+        if n > (2 * p):
             dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
         else:
             dispersion = sigma_ ** 2
 
-        # W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
         eps = np.random.standard_normal((n, 2000)) * Y.std()
-        lam_theory = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
+        lam_theory = 0.6 * np.median(np.abs(X.T.dot(eps)).max(1))
         W = lam_theory * np.ones(p)
 
         conv = const(X,
                      Y,
                      W,
-                     ridge_term=0.)
-        # randomizer_scale=randomizer_scale * dispersion)
+                     ridge_term=0.,
+                     randomizer_scale=randomizer_scale * dispersion)
 
         signs = conv.fit()
         nonzero = signs != 0
-        print("number of selected ", nonzero.sum())
 
         if nonzero.sum() > 0:
-
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-            if n > p:
-                (observed_target,
-                 cov_target,
-                 cov_target_score,
-                 alternatives) = selected_targets(conv.loglike,
-                                                  conv._W,
-                                                  nonzero,
-                                                  dispersion=dispersion)
-
-            else:
-                (observed_target,
-                 cov_target,
-                 cov_target_score,
-                 alternatives) = selected_targets(conv.loglike,
-                                                  conv._W,
-                                                  nonzero,
-                                                  dispersion=sigma ** 2)
-
-            inverse_info = conv.selective_MLE(observed_target,
-                                              cov_target,
-                                              cov_target_score)[1]
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=dispersion)
 
             approximate_grid_inf = approximate_grid_inference(conv,
                                                               observed_target,
@@ -134,6 +118,7 @@ def test_approx_pivot(n=500,
 
             return pivot
 
+
 def test_approx_ci(n=500,
                    p=100,
                    signal_fac=1.,
@@ -218,9 +203,9 @@ def main(nsim=300, CI = False):
     if CI is False:
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=100,
-                                            p=400,
-                                            signal_fac=0.5,
+            _pivot.extend(test_approx_pivot(n=400,
+                                            p=100,
+                                            signal_fac=1.,
                                             s=0,
                                             sigma=1.,
                                             rho=0.30,
@@ -244,7 +229,7 @@ def main(nsim=300, CI = False):
                                       signal_fac=1.,
                                       s=5,
                                       sigma=3.,
-                                      rho=0.3,
+                                      rho=0.4,
                                       randomizer_scale=1.)
 
             coverage_ += cov

From 93e808f64533a3019ee57dd09cf10e4aaa5e883f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sat, 12 Jun 2021 23:54:28 -0400
Subject: [PATCH 094/187] removed interp1d for now to compute reference on a
 grid

---
 selectinf/randomized/approx_reference.py      |   1 -
 selectinf/randomized/exact_reference.py       | 130 ++++++----
 .../randomized/tests/test_approx_reference.py |  16 +-
 .../randomized/tests/test_exact_reference.py  | 224 ++++++------------
 4 files changed, 161 insertions(+), 210 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index a706b6789..62ab28f56 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -208,7 +208,6 @@ def _approx_pivots(self,
             alternatives = ['twosided'] * self.ntarget
 
         pivot = []
-        p = self.target_score_cov.shape[1]
 
         for m in range(self.ntarget):
 
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 5e5c43db8..96fab032e 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -18,35 +18,25 @@ def __init__(self,
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         query : `gaussian_query`
             A Gaussian query which has information
             to describe implied Gaussian.
-
         observed_target : ndarray
             Observed estimate of target.
-
         target_cov : ndarray
             Estimated covaraince of target.
-
         target_score_cov : ndarray
             Estimated covariance of target and score of randomized query.
-
         solve_args : dict, optional
             Arguments passed to solver.
-
         """
 
-        self.solve_args = solve_args
-
         result, inverse_info = query.selective_MLE(observed_target,
                                                    target_cov,
                                                    target_score_cov,
                                                    solve_args=solve_args)[:2]
-        mle = result['MLE']
 
         self.linear_part = query.sampler.affine_con.linear_part
         self.offset = query.sampler.affine_con.offset
@@ -62,16 +52,21 @@ def __init__(self,
 
         self.init_soln = query.observed_opt_state
 
+        self.randomizer_prec = query.sampler.randomizer_prec
+        self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
+
         self.ntarget = ntarget = target_cov.shape[0]
-        _scale = 4. * np.sqrt(np.diag(inverse_info))
-        ngrid = 40
+        _scale = 4 * np.sqrt(np.diag(inverse_info))
+        ngrid = 1000
 
         self.stat_grid = np.zeros((ntarget, ngrid))
         for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1. * _scale[j],
-                                               observed_target[j] + 1. * _scale[j],
+            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                               observed_target[j] + 1.5 * _scale[j],
                                                num=ngrid)
 
+        self.opt_linear = query.opt_linear
+
     def summary(self,
                 alternatives=None,
                 parameter=None,
@@ -79,31 +74,26 @@ def summary(self,
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
-
         Parameters
         ----------
-
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
-
         parameter : np.array
             Hypothesized value for parameter -- defaults to 0.
-
         level : float
             Confidence level.
-
         """
 
         if parameter is not None:
-            pivots = self.approx_pivots(parameter,
+            pivots = self._pivots(parameter,
                                         alternatives=alternatives)
         else:
             pivots = None
 
-        pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
+        pvalues = self._pivots(np.zeros_like(self.observed_target),
                                       alternatives=alternatives)
-        lower, upper = self._approx_intervals(level=level)
+        lower, upper = self._intervals(level=level)
 
         result = pd.DataFrame({'target': self.observed_target,
                                'pvalue': pvalues,
@@ -152,21 +142,23 @@ def log_reference(self,
             implied_prec = 1./implied_cov
 
             _A = self.cond_cov.dot(eta) * implied_prec
+            R = np.identity(num_opt) - _A.dot(eta.T)
+
             A = self.linear_part.dot(_A).reshape((-1,))
-            b = self.linear_part.dot((-np.identity(num_opt) + _A.dot(eta.T)).dot(self.init_soln))
+            b = -self.linear_part.dot(R).dot(self.init_soln)
+
+            trunc_ = np.true_divide((self.offset + b), A)
 
             neg_indx = np.asarray([j for j in range(num_con) if A[j] < 0.])
             pos_indx = np.asarray([j for j in range(num_con) if A[j] > 0.])
 
-            trunc_ = (self.offset + b) / A
-
             if pos_indx.shape[0]>0 and neg_indx.shape[0]>0:
 
                 trunc_lower = np.max(trunc_[neg_indx])
                 trunc_upper = np.min(trunc_[pos_indx])
 
-                lower_limit = (trunc_lower - implied_mean) * implied_prec
-                upper_limit = (trunc_upper - implied_mean) * implied_prec
+                lower_limit = (trunc_lower - implied_mean) * np.sqrt(implied_prec)
+                upper_limit = (trunc_upper - implied_mean) * np.sqrt(implied_prec)
 
                 ref_hat.append(np.log(ndist.cdf(upper_limit) - ndist.cdf(lower_limit)))
 
@@ -174,7 +166,7 @@ def log_reference(self,
 
                 trunc_upper = np.min(trunc_[pos_indx])
 
-                upper_limit = (trunc_upper - implied_mean) * implied_prec
+                upper_limit = (trunc_upper - implied_mean) * np.sqrt(implied_prec)
 
                 ref_hat.append(np.log(ndist.cdf(upper_limit)))
 
@@ -182,7 +174,7 @@ def log_reference(self,
 
                 trunc_lower = np.max(trunc_[neg_indx])
 
-                lower_limit = (trunc_lower - implied_mean) * implied_prec
+                lower_limit = (trunc_lower - implied_mean) * np.sqrt(implied_prec)
 
                 ref_hat.append(np.log(1. - ndist.cdf(lower_limit)))
 
@@ -190,33 +182,30 @@ def log_reference(self,
 
     def _construct_families(self):
 
+        self._construct_density()
+
         self._families = []
+
         for m in range(self.ntarget):
             p = self.target_score_cov.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
+
             target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            var_target = target_cov_uni[0, 0]
             target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
+            var_target = 1. / ((self.precs[m])[0, 0])
+
             log_ref = self.log_reference(observed_target_uni,
                                          target_cov_uni,
                                          target_score_cov_uni,
                                          self.stat_grid[m])
 
-            grid_approx_fn = interp1d(self.stat_grid[m],
-                                      log_ref,
-                                      kind='quadratic',
-                                      bounds_error=False,
-                                      fill_value='extrapolate')
-
-            grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
-            logW = (grid_approx_fn(grid) -
-                    0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
+            logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
             logW -= logW.max()
 
             # construction of families follows `selectinf.learning.core`
 
-            self._families.append(discrete_family(grid,
+            self._families.append(discrete_family(self.stat_grid[m],
                                                   np.exp(logW)))
 
     def _pivots(self,
@@ -228,19 +217,18 @@ def _pivots(self,
 
         if alternatives is None:
             alternatives = ['twosided'] * self.ntarget
-        else:
-            alternatives = [alternatives] *self.ntarget
+
         pivot = []
 
         for m in range(self.ntarget):
+
             family = self._families[m]
-            observed_target = self.observed_target[m]
-            var_target = self.target_cov[m, m]
+            var_target = 1. / ((self.precs[m])[0, 0])
+
+            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
 
-            # construction of pivot from families follows `selectinf.learning.core`
+            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
 
-            _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
-                              x=observed_target)
             if alternatives[m] == 'twosided':
                 pivot.append(2 * min(_cdf, 1 - _cdf))
             elif alternatives[m] == 'greater':
@@ -263,10 +251,54 @@ def _intervals(self,
             # construction of intervals from families follows `selectinf.learning.core`
             family = self._families[m]
             observed_target = self.observed_target[m]
+
             l, u = family.equal_tailed_interval(observed_target,
                                                 alpha=1 - level)
-            var_target = self.target_cov[m, m]
+
+            var_target = 1. / ((self.precs[m])[0, 0])
+
             lower.append(l * var_target + observed_target)
             upper.append(u * var_target + observed_target)
 
-        return np.asarray(lower), np.asarray(upper)
\ No newline at end of file
+        return np.asarray(lower), np.asarray(upper)
+
+    ### Private method
+    def _construct_density(self):
+
+        precs = {}
+        S = {}
+        r = {}
+
+        p = self.target_score_cov.shape[1]
+
+        for m in range(self.ntarget):
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
+            prec_target = 1. / target_cov_uni
+            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+
+            target_linear = target_score_cov_uni.T.dot(prec_target)
+            target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
+                (target_linear.shape[0],))
+
+            target_lin = -self.logdens_linear.dot(target_linear)
+            target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
+
+            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+                self.prec_opt).dot(target_lin)
+
+            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
+            _S = np.linalg.inv(_prec).dot(prec_target)
+
+            S[m] = _S
+            r[m] = _r
+            precs[m] = _prec
+
+        self.precs = precs
+        self.S = S
+        self.r = r
+
+
+
+
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 62c83f7fa..2c942f89d 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -93,10 +93,11 @@ def test_approx_pivot(n=500,
                      Y,
                      W,
                      ridge_term=0.,
-                     randomizer_scale=randomizer_scale * dispersion)
+                     randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
         nonzero = signs != 0
+        print("no of variables selected ", nonzero.sum())
 
         if nonzero.sum() > 0:
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
@@ -150,7 +151,7 @@ def test_approx_ci(n=500,
     conv = const(X,
                  Y,
                  W,
-                 randomizer_scale=randomizer_scale * dispersion)
+                 randomizer_scale=randomizer_scale * sigma_)
 
     signs = conv.fit()
     nonzero = signs != 0
@@ -165,7 +166,6 @@ def test_approx_ci(n=500,
                                           nonzero,
                                           dispersion=dispersion)
 
-        ntarget = observed_target.shape[0]
         result, inverse_info = conv.selective_MLE(observed_target,
                                                   cov_target,
                                                   cov_target_score)[:2]
@@ -203,12 +203,12 @@ def main(nsim=300, CI = False):
     if CI is False:
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=400,
+            _pivot.extend(test_approx_pivot(n=500,
                                             p=100,
-                                            signal_fac=1.,
+                                            signal_fac=0.5,
                                             s=0,
-                                            sigma=1.,
-                                            rho=0.30,
+                                            sigma=2.,
+                                            rho=0.50,
                                             randomizer_scale=1.))
 
             print("iteration completed ", i)
@@ -239,4 +239,4 @@ def main(nsim=300, CI = False):
             print("iteration completed ", n + 1)
 
 if __name__ == "__main__":
-    main(nsim=50, CI = False)
+    main(nsim=20, CI = False)
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index c023b0d65..18a061344 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -12,167 +12,87 @@ def test_approx_pivot(n=500,
                       rho=0.4,
                       randomizer_scale=1.):
 
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=0,
-                      s=s,
-                      equicorrelated=True,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=False)[:3]
-
-    n, p = X.shape
-
-    sigma_ = np.std(Y)
-    #dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-    dispersion = sigma_ ** 2
-
-    #W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * np.sqrt(dispersion)
-    eps = np.random.standard_normal((n, 2000)) * Y.std()
-    lam_theory = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
-
-    conv = const(X,
-                 Y,
-                 lam_theory * np.ones(p),
-                 randomizer_scale=randomizer_scale * dispersion)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-    print("size of selected set ", nonzero.sum())
-
-    if nonzero.sum()>0:
-        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero,
-                                          dispersion=None)
-
-        exact_grid_inf = exact_grid_inference(conv,
-                                              observed_target,
-                                              cov_target,
-                                              cov_target_score)
-
-        pivot = exact_grid_inf._pivots(beta_target)
-
-        return pivot
-
-def test_approx_ci(n=500,
-                   p=100,
-                   signal_fac=1.,
-                   s=5,
-                   sigma=2.,
-                   rho=0.4,
-                   randomizer_scale=1.,
-                   level=0.9):
-
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
-
-    n, p = X.shape
-
-    sigma_ = np.std(Y)
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * np.sqrt(dispersion)
-
-    conv = const(X,
-                 Y,
-                 W,
-                 randomizer_scale=randomizer_scale * dispersion)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-
-    if nonzero.sum()>0:
-
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero,
-                                          dispersion=dispersion)
-
-        result, inverse_info = conv.selective_MLE(observed_target,
+    while True:
+
+        inst, const = gaussian_instance, lasso.gaussian
+        signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=True,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+
+        if n > (2 * p):
+            dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+        else:
+            dispersion = sigma_ ** 2
+
+        eps = np.random.standard_normal((n, 2000)) * Y.std()
+        W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
+
+        conv = const(X,
+                     Y,
+                     W,
+                     ridge_term=0.)
+                     #randomizer_scale=randomizer_scale * np.sqrt(dispersion))
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("size of selected set ", nonzero.sum())
+
+        if nonzero.sum() > 0:
+            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=dispersion)
+
+            exact_grid_inf = exact_grid_inference(conv,
+                                                  observed_target,
                                                   cov_target,
-                                                  cov_target_score)[:2]
+                                                  cov_target_score)
 
-        exact_grid_inf = exact_grid_inference(conv,
-                                              observed_target,
-                                              cov_target,
-                                              cov_target_score)
+            pivot = exact_grid_inf._pivots(beta_target)
 
-        lci, uci = exact_grid_inf._intervals(level)
+            return pivot
 
-    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-    coverage = (lci < beta_target) * (uci > beta_target)
-    length = uci - lci
-
-    return np.mean(coverage), np.mean(length), np.mean(length-(3.3 * np.sqrt(np.diag(inverse_info))))
-
-def main(nsim=300, CI=False):
+def main(nsim=300):
 
     import matplotlib as mpl
     mpl.use('tkagg')
     import matplotlib.pyplot as plt
     from statsmodels.distributions.empirical_distribution import ECDF
 
-    if CI is False:
-        _pivot = []
-        for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=100,
-                                            p=400,
-                                            signal_fac=1.,
-                                            s=0,
-                                            sigma=1.,
-                                            rho=0.30,
-                                            randomizer_scale=0.7))
-
-            print("iteration completed ", i)
-
-        plt.clf()
-        ecdf_pivot = ECDF(np.asarray(_pivot))
-        grid = np.linspace(0, 1, 101)
-        plt.plot(grid, ecdf_pivot(grid), c='blue', marker='^')
-        plt.plot(grid, grid, 'k--')
-        plt.show()
-
-    if CI is True:
-        coverage_ = 0.
-        length_ = 0.
-        length_diff_ = 0.
-        for n in range(nsim):
-            cov, len, len_diff = test_approx_ci(n=500,
-                                                p=100,
-                                                signal_fac=1.,
-                                                s=5,
-                                                sigma=3.,
-                                                rho=0.50,
-                                                randomizer_scale=1.)
-
-            coverage_ += cov
-            length_ += len
-            length_diff_ += len_diff
-            print("coverage so far ", coverage_ / (n + 1.))
-            print("lengths so far ", length_ / (n + 1.), length_diff_/(n+1.))
-            print("iteration completed ", n + 1)
-
+    _pivot = []
+    for i in range(nsim):
+        _pivot.extend(test_approx_pivot(n=400,
+                                        p=100,
+                                        signal_fac=0.5,
+                                        s=0,
+                                        sigma=1.,
+                                        rho=0.30,
+                                        randomizer_scale=1.))
+
+        print("iteration completed ", i)
+
+    plt.clf()
+    ecdf_pivot = ECDF(np.asarray(_pivot))
+    grid = np.linspace(0, 1, 101)
+    plt.plot(grid, ecdf_pivot(grid), c='blue', marker='^')
+    plt.plot(grid, grid, 'k--')
+    plt.show()
 
 if __name__ == "__main__":
-    main(nsim=50, CI=False)
\ No newline at end of file
+    main(nsim=100)
\ No newline at end of file

From 43d78d2243a661e31a9e0bf451da81fc3815f893 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 14 Jun 2021 13:01:30 -0400
Subject: [PATCH 095/187] added option to use interp1d

---
 selectinf/randomized/approx_reference.py | 56 ++++++++++++++++--------
 selectinf/randomized/exact_reference.py  | 51 ++++++++++++++-------
 2 files changed, 73 insertions(+), 34 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 62ab28f56..4c14dfad8 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -14,7 +14,8 @@ def __init__(self,
                  observed_target,
                  target_cov,
                  target_score_cov,
-                 solve_args={'tol': 1.e-12}):
+                 solve_args={'tol': 1.e-12},
+                 useIP=False):
 
         """
         Produce p-values and confidence intervals for targets
@@ -60,15 +61,24 @@ def __init__(self,
 
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
-        ngrid = 60
 
-        self.stat_grid = np.zeros((ntarget, ngrid))
-        for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                               observed_target[j] + 1.5 * _scale[j],
-                                               num=ngrid)
+        if useIP == False:
+            ngrid = 1000
+            self.stat_grid = np.zeros((ntarget, ngrid))
+            for j in range(ntarget):
+                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                                   observed_target[j] + 1.5 * _scale[j],
+                                                   num=ngrid)
+        else:
+            ngrid = 60
+            self.stat_grid = np.zeros((ntarget, ngrid))
+            for j in range(ntarget):
+                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                                   observed_target[j] + 1.5 * _scale[j],
+                                                   num=ngrid)
 
         self.opt_linear = query.opt_linear
+        self.useIP = useIP
 
     def summary(self,
                 alternatives=None,
@@ -168,21 +178,29 @@ def _construct_families(self):
                                                         target_score_cov_uni,
                                                         self.stat_grid[m])
 
-            approx_fn = interp1d(self.stat_grid[m],
-                                 approx_log_ref,
-                                 kind='quadratic',
-                                 bounds_error=False,
-                                 fill_value='extrapolate')
 
-            grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
-            logW = (approx_fn(grid) -
-                    0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
-            logW -= logW.max()
+            if self.useIP == False:
+                logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+                logW -= logW.max()
+                self._families.append(discrete_family(self.stat_grid[m],
+                                                      np.exp(logW)))
+            else:
+                approx_fn = interp1d(self.stat_grid[m],
+                                     approx_log_ref,
+                                     kind='quadratic',
+                                     bounds_error=False,
+                                     fill_value='extrapolate')
 
-            # construction of families follows `selectinf.learning.core`
+                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+                logW = (approx_fn(grid) -
+                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
 
-            self._families.append(discrete_family(grid,
-                                                  np.exp(logW)))
+                logW -= logW.max()
+                self._families.append(discrete_family(grid,
+                                                      np.exp(logW)))
+
+
+            # construction of families follows `selectinf.learning.core`
 
             # logG = - 0.5 * grid**2 / var_target
             # logG -= logG.max()
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 96fab032e..80169a9a0 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -13,7 +13,8 @@ def __init__(self,
                  observed_target,
                  target_cov,
                  target_score_cov,
-                 solve_args={'tol': 1.e-12}):
+                 solve_args={'tol': 1.e-12},
+                 useIP=False):
 
         """
         Produce p-values and confidence intervals for targets
@@ -57,15 +58,24 @@ def __init__(self,
 
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
-        ngrid = 1000
 
-        self.stat_grid = np.zeros((ntarget, ngrid))
-        for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                               observed_target[j] + 1.5 * _scale[j],
-                                               num=ngrid)
+        if useIP == False:
+            ngrid = 1000
+            self.stat_grid = np.zeros((ntarget, ngrid))
+            for j in range(ntarget):
+                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                                   observed_target[j] + 1.5 * _scale[j],
+                                                   num=ngrid)
+        else:
+            ngrid = 60
+            self.stat_grid = np.zeros((ntarget, ngrid))
+            for j in range(ntarget):
+                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                                   observed_target[j] + 1.5 * _scale[j],
+                                                   num=ngrid)
 
         self.opt_linear = query.opt_linear
+        self.useIP = useIP
 
     def summary(self,
                 alternatives=None,
@@ -199,14 +209,25 @@ def _construct_families(self):
                                          target_cov_uni,
                                          target_score_cov_uni,
                                          self.stat_grid[m])
-
-            logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
-            logW -= logW.max()
-
-            # construction of families follows `selectinf.learning.core`
-
-            self._families.append(discrete_family(self.stat_grid[m],
-                                                  np.exp(logW)))
+            if self.useIP == False:
+                logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+                logW -= logW.max()
+                self._families.append(discrete_family(self.stat_grid[m],
+                                                      np.exp(logW)))
+            else:
+                approx_fn = interp1d(self.stat_grid[m],
+                                     log_ref,
+                                     kind='quadratic',
+                                     bounds_error=False,
+                                     fill_value='extrapolate')
+
+                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+                logW = (approx_fn(grid) -
+                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
+
+                logW -= logW.max()
+                self._families.append(discrete_family(grid,
+                                                      np.exp(logW)))
 
     def _pivots(self,
                 mean_parameter,

From 0b623f9feb7bbd5f48c10b05b70b60c50b6f56b8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 14 Jun 2021 13:02:02 -0400
Subject: [PATCH 096/187] updated tests

---
 .../randomized/tests/test_approx_reference.py | 26 +++++++++++--------
 .../randomized/tests/test_exact_reference.py  | 19 +++++++++-----
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 2c942f89d..a7233a123 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -61,7 +61,9 @@ def test_approx_pivot(n=500,
                       s=5,
                       sigma=2.,
                       rho=0.4,
-                      randomizer_scale=1.):
+                      randomizer_scale=1.,
+                      equicorrelated=False,
+                      useIP=False):
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
@@ -72,7 +74,7 @@ def test_approx_pivot(n=500,
                           p=p,
                           signal=signal,
                           s=s,
-                          equicorrelated=True,
+                          equicorrelated=equicorrelated,
                           rho=rho,
                           sigma=sigma,
                           random_signs=False)[:3]
@@ -86,14 +88,13 @@ def test_approx_pivot(n=500,
             dispersion = sigma_ ** 2
 
         eps = np.random.standard_normal((n, 2000)) * Y.std()
-        lam_theory = 0.6 * np.median(np.abs(X.T.dot(eps)).max(1))
-        W = lam_theory * np.ones(p)
+        W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
 
         conv = const(X,
                      Y,
                      W,
-                     ridge_term=0.,
-                     randomizer_scale=randomizer_scale * sigma_)
+                     ridge_term=0.)
+                     #randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
         nonzero = signs != 0
@@ -113,7 +114,8 @@ def test_approx_pivot(n=500,
             approximate_grid_inf = approximate_grid_inference(conv,
                                                               observed_target,
                                                               cov_target,
-                                                              cov_target_score)
+                                                              cov_target_score,
+                                                              useIP=useIP)
 
             pivot = approximate_grid_inf._approx_pivots(beta_target)
 
@@ -203,13 +205,15 @@ def main(nsim=300, CI = False):
     if CI is False:
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=500,
-                                            p=100,
+            _pivot.extend(test_approx_pivot(n=100,
+                                            p=400,
                                             signal_fac=0.5,
                                             s=0,
                                             sigma=2.,
-                                            rho=0.50,
-                                            randomizer_scale=1.))
+                                            rho=0.30,
+                                            randomizer_scale=1.,
+                                            equicorrelated=True,
+                                            useIP=True))
 
             print("iteration completed ", i)
 
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 18a061344..ddeb6cee4 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -10,7 +10,9 @@ def test_approx_pivot(n=500,
                       s=5,
                       sigma=2.,
                       rho=0.4,
-                      randomizer_scale=1.):
+                      randomizer_scale=1.,
+                      equicorrelated=False,
+                      useIP=False):
 
     while True:
 
@@ -21,7 +23,7 @@ def test_approx_pivot(n=500,
                           p=p,
                           signal=signal,
                           s=s,
-                          equicorrelated=True,
+                          equicorrelated=equicorrelated,
                           rho=rho,
                           sigma=sigma,
                           random_signs=True)[:3]
@@ -62,7 +64,8 @@ def test_approx_pivot(n=500,
             exact_grid_inf = exact_grid_inference(conv,
                                                   observed_target,
                                                   cov_target,
-                                                  cov_target_score)
+                                                  cov_target_score,
+                                                  useIP=useIP)
 
             pivot = exact_grid_inf._pivots(beta_target)
 
@@ -77,20 +80,22 @@ def main(nsim=300):
 
     _pivot = []
     for i in range(nsim):
-        _pivot.extend(test_approx_pivot(n=400,
-                                        p=100,
+        _pivot.extend(test_approx_pivot(n=100,
+                                        p=400,
                                         signal_fac=0.5,
                                         s=0,
                                         sigma=1.,
                                         rho=0.30,
-                                        randomizer_scale=1.))
+                                        randomizer_scale=1.,
+                                        equicorrelated=True,
+                                        useIP=False))
 
         print("iteration completed ", i)
 
     plt.clf()
     ecdf_pivot = ECDF(np.asarray(_pivot))
     grid = np.linspace(0, 1, 101)
-    plt.plot(grid, ecdf_pivot(grid), c='blue', marker='^')
+    plt.plot(grid, ecdf_pivot(grid), c='blue')
     plt.plot(grid, grid, 'k--')
     plt.show()
 

From 58792c80666943b755df3a93ad4247d68e981d26 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 14 Jun 2021 13:31:59 -0400
Subject: [PATCH 097/187] added barrier affine

---
 selectinf/algorithms/barrier_affine.py | 139 +++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 selectinf/algorithms/barrier_affine.py

diff --git a/selectinf/algorithms/barrier_affine.py b/selectinf/algorithms/barrier_affine.py
new file mode 100644
index 000000000..88812c278
--- /dev/null
+++ b/selectinf/algorithms/barrier_affine.py
@@ -0,0 +1,139 @@
+import numpy as np
+
+def solve_barrier_affine_py(conjugate_arg,
+                            precision,
+                            feasible_point,
+                            con_linear,
+                            con_offset,
+                            step=1,
+                            nstep=1000,
+                            min_its=200,
+                            tol=1.e-10):
+
+    scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. \
+                          + np.log(1.+ 1./((con_offset - con_linear.dot(u))/ scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot(1./(scaling + con_offset - con_linear.dot(u)) -
+                                                                       1./(con_offset - con_linear.dot(u)))
+    barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1./((scaling + con_offset-con_linear.dot(u))**2.)
+                                                 + 1./((con_offset-con_linear.dot(u))**2.))).dot(con_linear)
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        cur_grad = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * cur_grad
+            if np.all(con_offset-con_linear.dot(proposal) > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * cur_grad
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+            if count >= 20:
+                if not (np.isnan(proposed_value) or np.isnan(current_value)):
+                    break
+                else:
+                    raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value))
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its:
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    hess = np.linalg.inv(precision + barrier_hessian(current))
+    return current_value, current, hess
+
+def solve_barrier_nonneg(conjugate_arg,
+                         precision,
+                         feasible_point=None,
+                         step=1,
+                         nstep=1000,
+                         tol=1.e-8):
+
+    scaling = np.sqrt(np.diag(precision))
+
+    if feasible_point is None:
+        feasible_point = 1. / scaling
+
+    objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u) / 2. + np.log(
+        1. + 1. / (u / scaling)).sum()
+    grad = lambda u: -conjugate_arg + precision.dot(u) + (1. / (scaling + u) - 1. / u)
+    barrier_hessian = lambda u: (-1. / ((scaling + u) ** 2.) + 1. / (u ** 2.))
+
+    current = feasible_point
+    current_value = np.inf
+
+    for itercount in range(nstep):
+        cur_grad = grad(current)
+
+        # make sure proposal is feasible
+
+        count = 0
+        while True:
+            count += 1
+            proposal = current - step * cur_grad
+            if np.all(proposal > 0):
+                break
+            step *= 0.5
+            if count >= 40:
+                raise ValueError('not finding a feasible point')
+
+        # make sure proposal is a descent
+
+        count = 0
+        while True:
+            proposal = current - step * cur_grad
+            proposed_value = objective(proposal)
+            if proposed_value <= current_value:
+                break
+            step *= 0.5
+            if count >= 20:
+                if not (np.isnan(proposed_value) or np.isnan(current_value)):
+                    break
+                else:
+                    raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value))
+
+        # stop if relative decrease is small
+
+        if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value):
+            current = proposal
+            current_value = proposed_value
+            break
+
+        current = proposal
+        current_value = proposed_value
+
+        if itercount % 4 == 0:
+            step *= 2
+
+    hess = np.linalg.inv(precision + np.diag(barrier_hessian(current)))
+    return current_value, current, hess

From 9011fcc59e4b160897a77cd90d4329a9f961608a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 17 Jun 2021 09:20:56 -0400
Subject: [PATCH 098/187] fixed a sign

---
 selectinf/randomized/posterior_inference.py | 12 ++++++------
 selectinf/randomized/query.py               |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index ef2d184a5..85dc64b5e 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -109,8 +109,8 @@ def log_posterior(self,
 
         log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2.
 
-        log_lik = -(((self.observed_target - target).T.dot(self._prec).dot(
-            self.observed_target - target)) / 2. - log_normalizer)
+        log_lik = -((self.observed_target - target).T.dot(self._prec).dot(self.observed_target - target)) / 2. \
+                  - log_normalizer
 
         grad_lik = self.S.T.dot(self._prec.dot(self.observed_target) - self._prec.dot(target) - self.linear_coef.T.dot(
             prec_marginal.dot(soln) - conjugate_marginal))
@@ -137,7 +137,7 @@ def _set_marginal_parameters(self):
         target_off = self.cond_mean - target_lin.dot(self.observed_target)
 
         self.linear_coef = target_lin
-        self.offset_coef = self.cond_mean - target_lin.dot(self.observed_target)
+        self.offset_coef = target_off
 
         if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
             _prec = self.prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) \
@@ -149,15 +149,14 @@ def _set_marginal_parameters(self):
             _P = target_linear.T.dot(self.randomizer_prec).dot(target_offset)
 
         _Q = np.linalg.inv(_prec + target_lin.T.dot(self.cond_precision).dot(target_lin))
-        self.prec_marginal = self.cond_precision - self.cond_precision.dot(target_lin).dot(_Q).dot(target_lin.T).dot(
-            self.cond_precision)
+        self.prec_marginal = self.cond_precision - self.cond_precision.dot(target_lin).dot(_Q).dot(target_lin.T).dot(self.cond_precision)
 
         r = np.linalg.inv(_prec).dot(target_lin.T.dot(self.cond_precision).dot(target_off) - _P)
         S = np.linalg.inv(_prec).dot(self.prec_target)
 
         self.r = r
         self.S = S
-        # print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
+        #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
         self._prec = _prec
 
 
@@ -185,6 +184,7 @@ def langevin_sampler(selective_posterior,
     for i, sample in enumerate(sampler):
         sampler.scaling = np.sqrt(selective_posterior.dispersion)
         samples[i, :] = sample.copy()
+        #print("sample ", i, samples[i,:])
         if i == nsample - 1:
             break
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 05afbcd8e..97f9b3e70 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -15,7 +15,7 @@
 from .posterior_inference import posterior
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from .approx_reference import approximate_grid_inference
-
+from ..algorithms.barrier_affine import solve_barrier_affine_py
 
 class query(object):
     r"""
@@ -1433,7 +1433,7 @@ def selective_MLE(observed_target,
     if useC:
         solver = solve_barrier_affine_C
     else:
-        solver = _solve_barrier_affine_py
+        solver = solve_barrier_affine_py
 
     val, soln, hess = solver(conjugate_arg,
                              prec_opt,

From 9e47c0587483d143148f06f80f69f0d26fc02ed0 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 27 Jun 2021 14:14:01 -0400
Subject: [PATCH 099/187] update to test

---
 selectinf/randomized/query.py                      | 1 +
 selectinf/randomized/tests/test_exact_reference.py | 9 ++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 97f9b3e70..8d6fb2da8 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -135,6 +135,7 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
                                         opt_offset,
                                         cond_precision)
 
+
         _, randomizer_prec = self.randomizer.cov_prec
         self.cond_mean, self.cond_cov, self.randomizer_prec = cond_mean, cond_cov, randomizer_prec
 
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index ddeb6cee4..7cb49ff11 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -43,8 +43,7 @@ def test_approx_pivot(n=500,
         conv = const(X,
                      Y,
                      W,
-                     ridge_term=0.)
-                     #randomizer_scale=randomizer_scale * np.sqrt(dispersion))
+                     randomizer_scale=randomizer_scale * np.sqrt(dispersion))
 
         signs = conv.fit()
         nonzero = signs != 0
@@ -82,11 +81,11 @@ def main(nsim=300):
     for i in range(nsim):
         _pivot.extend(test_approx_pivot(n=100,
                                         p=400,
-                                        signal_fac=0.5,
+                                        signal_fac=1.,
                                         s=0,
-                                        sigma=1.,
+                                        sigma=2.,
                                         rho=0.30,
-                                        randomizer_scale=1.,
+                                        randomizer_scale=0.7,
                                         equicorrelated=True,
                                         useIP=False))
 

From 05d08e959c7c170167b07c6c0f6720b6d7aa7861 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 27 Jun 2021 23:17:52 -0400
Subject: [PATCH 100/187] modified mle and reference code for group lasso

---
 .../randomized/approx_reference_grouplasso.py | 264 +++++++++++++-----
 .../tests/test_approx_reference_grouplasso.py |  79 +++---
 2 files changed, 232 insertions(+), 111 deletions(-)

diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
index f028fcbe3..d4c0decdd 100644
--- a/selectinf/randomized/approx_reference_grouplasso.py
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -12,7 +12,7 @@
 import regreg.api as rr
 from .randomization import randomization
 from ..base import restricted_estimator
-from .query import _solve_barrier_affine_py
+from ..algorithms.barrier_affine import solve_barrier_affine_py as solver
 from ..distributions.discrete_family import discrete_family
 
 class group_lasso(object):
@@ -75,6 +75,8 @@ def fit(self,
 
         tol = 1.e-20
 
+        _, self.randomizer_prec = self.randomizer.cov_prec
+
         # now we are collecting the directions and norms of the active groups
         for g in sorted(np.unique(self.groups)):  # g is group label
 
@@ -175,8 +177,6 @@ def compute_Lg(g):
         self.linear_part = -np.eye(self.observed_opt_state.shape[0])
         self.offset = np.zeros(self.observed_opt_state.shape[0])
 
-        # print("K.K.T. map", np.allclose(self._initial_omega, self.observed_score_state + self.opt_linear.dot(self.observed_opt_state)
-        #                                + self.opt_offset, rtol=1e-03))
         return active_signs, soln
 
     def _solve_randomized_problem(self,
@@ -302,15 +302,32 @@ def selective_MLE(self,
         observed_target = np.atleast_1d(observed_target)
         prec_target = inv(target_cov)
 
+        prec_opt = self.cond_precision
+
+        score_offset = self.observed_score_state + self.opt_offset
+
         # target_lin determines how the conditional mean of optimization variables
         # vary with target
         # logdens_linear determines how the argument of the optimization density
         # depends on the score, not how the mean depends on score, hence the minus sign
 
-        target_lin = - logdens_linear.dot(target_score_cov.T.dot(prec_target))
-        target_offset = cond_mean - target_lin.dot(observed_target)
+        target_linear = target_score_cov.T.dot(prec_target)
+        target_offset = score_offset - target_linear.dot(observed_target)
 
-        prec_opt = self.cond_precision
+        target_lin = - logdens_linear.dot(target_linear)
+        target_off = cond_mean - target_lin.dot(observed_target)
+
+        if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
+            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+                prec_opt).dot(
+                target_lin)
+        else:
+            _P = target_linear.T.dot(self.randomizer_prec).dot(target_offset)
+            _prec = prec_target + (target_linear.T.dot(self.randomizer_prec).dot(target_linear)) - target_lin.T.dot(
+                prec_opt).dot(target_lin)
+
+        C = target_cov.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))
 
         conjugate_arg = prec_opt.dot(cond_mean)
 
@@ -324,23 +341,32 @@ def selective_MLE(self,
                                                            useJacobian,
                                                            **solve_args)
 
-        log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
+        final_estimator = target_cov.dot(_prec).dot(observed_target) \
+                          + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + C
+
+        unbiased_estimator = target_cov.dot(_prec).dot(observed_target) + target_cov.dot(
+            _P - target_lin.T.dot(prec_opt).dot(target_off))
 
-        final_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln)))
-        ind_unbiased_estimator = observed_target + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean
-                                                                                                - init_soln)))
         L = target_lin.T.dot(prec_opt)
-        observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T))
+        observed_info_natural = _prec + L.dot(target_lin) - L.dot(hess.dot(L.T))
+
         observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))
 
         Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+
         pvalues = ndist.cdf(Z_scores)
+
         pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
 
-        alpha = 1. - level
+        alpha = 1 - level
         quantile = ndist.ppf(1 - alpha / 2.)
-        intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
-                               final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
+
+        intervals = np.vstack([final_estimator -
+                               quantile * np.sqrt(np.diag(observed_info_mean)),
+                               final_estimator +
+                               quantile * np.sqrt(np.diag(observed_info_mean))]).T
+
+        log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
 
         result = pd.DataFrame({'MLE': final_estimator,
                                'SE': np.sqrt(np.diag(observed_info_mean)),
@@ -348,7 +374,7 @@ def selective_MLE(self,
                                'pvalue': pvalues,
                                'lower_confidence': intervals[:, 0],
                                'upper_confidence': intervals[:, 1],
-                               'unbiased': ind_unbiased_estimator})
+                               'unbiased': unbiased_estimator})
 
         return result, observed_info_mean, log_ref
 
@@ -383,7 +409,8 @@ class approximate_grid_inference(object):
     def __init__(self,
                  query,
                  dispersion,
-                 solve_args={'tol': 1.e-12}):
+                 solve_args={'tol': 1.e-12},
+                 useIP=True):
 
         """
         Produce p-values and confidence intervals for targets
@@ -407,12 +434,6 @@ def __init__(self,
 
         result, inverse_info = query.selective_MLE(dispersion=dispersion)[:2]
 
-        (observed_target, target_cov, target_score_cov, alternatives) = query.selected_targets(dispersion)
-
-        self.observed_target = observed_target
-        self.target_score_cov = target_score_cov
-        self.target_cov = target_cov
-
         self.linear_part = query.linear_part
         self.offset = query.offset
 
@@ -423,17 +444,37 @@ def __init__(self,
         self.C = query.C
         self.active_dirs = query.active_dirs
 
+        (observed_target, target_cov, target_score_cov, alternatives) = query.selected_targets(dispersion)
+        self.observed_target = observed_target
+        self.target_score_cov = target_score_cov
+        self.target_cov = target_cov
+
         self.init_soln = query.observed_opt_state
 
+        self.randomizer_prec = query.randomizer_prec
+        self.score_offset = query.observed_score_state + query.opt_offset
+
         self.ntarget = ntarget = target_cov.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
-        ngrid = 40
 
-        self.stat_grid = np.zeros((ntarget, ngrid))
-        for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                               observed_target[j] + 1.5 * _scale[j],
-                                               num=ngrid)
+        if useIP == False:
+            ngrid = 1000
+            self.stat_grid = np.zeros((ntarget, ngrid))
+            for j in range(ntarget):
+                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                                   observed_target[j] + 1.5 * _scale[j],
+                                                   num=ngrid)
+        else:
+            ngrid = 100
+            self.stat_grid = np.zeros((ntarget, ngrid))
+            for j in range(ntarget):
+                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                                   observed_target[j] + 1.5 * _scale[j],
+                                                   num=ngrid)
+
+        self.opt_linear = query.opt_linear
+        self.useIP = useIP
+
     def summary(self,
                 alternatives=None,
                 parameter=None,
@@ -453,7 +494,7 @@ def summary(self,
         """
 
         if parameter is not None:
-            pivots = self.approx_pivots(parameter,
+            pivots = self._approx_pivots(parameter,
                                         alternatives=alternatives)
         else:
             pivots = None
@@ -473,15 +514,16 @@ def summary(self,
 
         return result
 
-    def _approx_log_reference(self,
-                              observed_target,
-                              target_cov,
-                              target_score_cov,
-                              grid):
+    def log_reference(self,
+                      observed_target,
+                      target_cov,
+                      target_score_cov,
+                      grid):
 
         """
         Approximate the log of the reference density on a grid.
         """
+
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
@@ -489,58 +531,88 @@ def _approx_log_reference(self,
         target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
 
         ref_hat = []
-        solver = _solve_barrier_affine_py
 
         for k in range(grid.shape[0]):
+            # in the usual D = N + Gamma theta.hat,
+            # target_lin is "something" times Gamma,
+            # where "something" comes from implied Gaussian
+            # cond_mean is "something" times D
+            # Gamma is target_score_cov.T.dot(prec_target)
+
+            num_opt = self.prec_opt.shape[0]
+            num_con = self.linear_part.shape[0]
 
             cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
                               self.cond_mean)
-            conjugate_arg = self.prec_opt.dot(cond_mean_grid)
 
-            val, soln, _ = solver(conjugate_arg,
-                               self.prec_opt,
-                               self.init_soln,
-                               self.linear_part,
-                               self.offset,
-                               **self.solve_args)
+            #direction for decomposing o
 
-            log_jacob = jacobian_grad_hess(soln, self.C, self.active_dirs)
+            eta = -self.prec_opt.dot(self.logdens_linear.dot(target_score_cov.T))
 
-            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.) + log_jacob[0])
+            implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
+            implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
+            implied_prec = 1./implied_cov
+
+            _A = self.cond_cov.dot(eta) * implied_prec
+            R = np.identity(num_opt) - _A.dot(eta.T)
+
+            A = self.linear_part.dot(_A).reshape((-1,))
+            b = self.offset-self.linear_part.dot(R).dot(self.init_soln)
+
+            conjugate_arg = implied_mean * implied_prec
+
+            val, soln, _ = solver(np.asarray([conjugate_arg]),
+                                  np.reshape(implied_prec, (1,1)),
+                                  eta.T.dot(self.init_soln),
+                                  A.reshape((A.shape[0],1)),
+                                  b,
+                                  **self.solve_args)
+
+            gamma_ = _A.dot(soln) + R.dot(self.init_soln)
+            log_jacob = jacobian_grad_hess(gamma_, self.C, self.active_dirs)
+
+            ref_hat.append(-val - ((conjugate_arg ** 2) * implied_cov)/ 2. + log_jacob[0])
 
         return np.asarray(ref_hat)
 
     def _construct_families(self):
 
+        self._construct_density()
+
         self._families = []
+
         for m in range(self.ntarget):
             p = self.target_score_cov.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
+
             target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            var_target = target_cov_uni[0, 0]
             target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
-            approx_log_ref = self._approx_log_reference(observed_target_uni,
-                                                        target_cov_uni,
-                                                        target_score_cov_uni,
-                                                        self.stat_grid[m])
-
-            approx_fn = interp1d(self.stat_grid[m],
-                                 approx_log_ref,
-                                 kind='quadratic',
-                                 bounds_error=False,
-                                 fill_value='extrapolate')
-
-            grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
-            logW = (approx_fn(grid) -
-                    0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
-            logW -= logW.max()
-
-            # construction of families follows `selectinf.learning.core`
+            var_target = 1. / ((self.precs[m])[0, 0])
+
+            log_ref = self.log_reference(observed_target_uni,
+                                         target_cov_uni,
+                                         target_score_cov_uni,
+                                         self.stat_grid[m])
+            if self.useIP == False:
+                logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+                logW -= logW.max()
+                self._families.append(discrete_family(self.stat_grid[m],
+                                                      np.exp(logW)))
+            else:
+                approx_fn = interp1d(self.stat_grid[m],
+                                     log_ref,
+                                     kind='quadratic',
+                                     bounds_error=False,
+                                     fill_value='extrapolate')
 
-            self._families.append(discrete_family(grid,
-                                                  np.exp(logW)))
+                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+                logW = (approx_fn(grid) -
+                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
 
+                logW -= logW.max()
+                self._families.append(discrete_family(grid,
+                                                      np.exp(logW)))
 
     def _approx_pivots(self,
                        mean_parameter,
@@ -555,15 +627,15 @@ def _approx_pivots(self,
         pivot = []
 
         for m in range(self.ntarget):
-            print("variable computed ", m)
+
             family = self._families[m]
-            observed_target = self.observed_target[m]
-            var_target = self.target_cov[m, m]
+            var_target = 1. / ((self.precs[m])[0, 0])
 
-            # construction of pivot from families follows `selectinf.learning.core`
+            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
+
+            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
+            print("variable completed ", m)
 
-            _cdf = family.cdf((mean_parameter[m] - observed_target) / var_target,
-                              x=observed_target)
             if alternatives[m] == 'twosided':
                 pivot.append(2 * min(_cdf, 1 - _cdf))
             elif alternatives[m] == 'greater':
@@ -575,7 +647,7 @@ def _approx_pivots(self,
         return pivot
 
     def _approx_intervals(self,
-                          level=0.9):
+                   level=0.9):
 
         if not hasattr(self, "_families"):
             self._construct_families()
@@ -586,14 +658,54 @@ def _approx_intervals(self,
             # construction of intervals from families follows `selectinf.learning.core`
             family = self._families[m]
             observed_target = self.observed_target[m]
+
             l, u = family.equal_tailed_interval(observed_target,
                                                 alpha=1 - level)
-            var_target = self.target_cov[m, m]
+
+            var_target = 1. / ((self.precs[m])[0, 0])
+
             lower.append(l * var_target + observed_target)
             upper.append(u * var_target + observed_target)
 
         return np.asarray(lower), np.asarray(upper)
 
+    ### Private method
+    def _construct_density(self):
+
+        precs = {}
+        S = {}
+        r = {}
+
+        p = self.target_score_cov.shape[1]
+
+        for m in range(self.ntarget):
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
+            prec_target = 1. / target_cov_uni
+            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+
+            target_linear = target_score_cov_uni.T.dot(prec_target)
+            target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
+                (target_linear.shape[0],))
+
+            target_lin = -self.logdens_linear.dot(target_linear)
+            target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
+
+            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+                self.prec_opt).dot(target_lin)
+
+            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
+            _S = np.linalg.inv(_prec).dot(prec_target)
+
+            S[m] = _S
+            r[m] = _r
+            precs[m] = _prec
+
+        self.precs = precs
+        self.S = S
+        self.r = r
+
 
 def solve_barrier_affine_jacobian_py(conjugate_arg,
                                      precision,
@@ -719,13 +831,15 @@ def jacobian_grad_hess(gamma, C, active_dirs):
         GammaMinus = calc_GammaMinus(gamma, active_dirs)
 
         # eigendecomposition
-        evalues, evectors = eig(GammaMinus + C)
+        #evalues, evectors = eig(GammaMinus + C)
 
         # log Jacobian
-        J = log(evalues).sum()
+        #J = log(evalues).sum()
+        J = np.log(np.linalg.det(GammaMinus + C))
 
         # inverse
-        GpC_inv = evectors.dot(np.diag(1 / evalues).dot(evectors.T))
+        #GpC_inv = evectors.dot(np.diag(1 / evalues).dot(evectors.T))
+        GpC_inv = np.linalg.inv(GammaMinus + C)
 
         # summing matrix (gamma.size by C.shape[0])
         S = block_diag(*[np.ones((1, ug.size - 1)) for ug in active_dirs.values()])
diff --git a/selectinf/randomized/tests/test_approx_reference_grouplasso.py b/selectinf/randomized/tests/test_approx_reference_grouplasso.py
index 0b4f53474..5228a410a 100644
--- a/selectinf/randomized/tests/test_approx_reference_grouplasso.py
+++ b/selectinf/randomized/tests/test_approx_reference_grouplasso.py
@@ -11,54 +11,61 @@ def test_approx_pivot(n=500,
                       sigma=3.,
                       rho=0.3,
                       randomizer_scale=1,
-                      weight_frac=1.2):
+                      weight_frac=1.5):
 
-    inst, const = gaussian_group_instance, group_lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
+    while True:
 
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      sgroup=sgroup,
-                      groups=groups,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
+        inst, const = gaussian_group_instance, group_lasso.gaussian
+        signal = np.sqrt(signal_fac * 2 * np.log(p))
 
-    n, p = X.shape
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          sgroup=sgroup,
+                          groups=groups,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
 
-    sigma_ = np.std(Y)
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+        n, p = X.shape
 
-    penalty_weights = dict([(i, weight_frac * sigma_ * np.sqrt(2 * np.log(p))) for i in np.unique(groups)])
+        sigma_ = np.std(Y)
 
-    conv = const(X,
-                 Y,
-                 groups,
-                 penalty_weights,
-                 randomizer_scale=randomizer_scale * dispersion)
+        if n > (2 * p):
+            dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+        else:
+            dispersion = sigma_ ** 2
 
-    signs, _ = conv.fit()
-    nonzero = signs != 0
-    print("number of selected variables ", nonzero.sum())
+        penalty_weights = dict([(i, weight_frac * sigma_ * np.sqrt(2 * np.log(p))) for i in np.unique(groups)])
 
-    if nonzero.sum()>0:
+        conv = const(X,
+                     Y,
+                     groups,
+                     penalty_weights,
+                     randomizer_scale=randomizer_scale * np.sqrt(dispersion))
 
-        conv._setup_implied_gaussian()
+        signs, _ = conv.fit()
+        nonzero = signs != 0
+        print("number of selected variables ", nonzero.sum())
 
-        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+        if nonzero.sum() > 0:
+            conv._setup_implied_gaussian()
 
-        approximate_grid_inf = approximate_grid_inference(conv,
-                                                          dispersion)
+            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
-        pivot = approximate_grid_inf._approx_pivots(beta_target)
+            approximate_grid_inf = approximate_grid_inference(conv,
+                                                              dispersion)
 
-        return pivot
+            pivot = approximate_grid_inf._approx_pivots(beta_target)
+
+            return pivot
 
 
 def main(nsim=300, CI = False):
 
+    import matplotlib as mpl
+    mpl.use('tkagg')
     import matplotlib.pyplot as plt
     from statsmodels.distributions.empirical_distribution import ECDF
     if CI is False:
@@ -66,13 +73,13 @@ def main(nsim=300, CI = False):
         for i in range(nsim):
             _pivot.extend(test_approx_pivot(n=500,
                                             p=100,
-                                            signal_fac=0.3,
-                                            sgroup=3,
-                                            groups=np.arange(20).repeat(5),
-                                            sigma=1.,
+                                            signal_fac=1.,
+                                            sgroup=0,
+                                            groups=np.arange(25).repeat(4),
+                                            sigma=2.,
                                             rho=0.20,
                                             randomizer_scale=0.5,
-                                            weight_frac=1.))
+                                            weight_frac=1.2))
 
             print("iteration completed ", i)
 

From ccfeb0fb5364e4a7984def456c6e2e576e15fbb2 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Jul 2021 11:13:04 -0700
Subject: [PATCH 101/187] approx reference test

---
 selectinf/randomized/tests/test_approx_reference.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index b1bb3c8fc..bbfe4b719 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -181,16 +181,11 @@ def test_approx_ci(n=500,
         scale_ = np.max(_scale)
         ngrid = int(2 * scale_/0.1)
 
-        approximate_grid_inf = approximate_grid_inference(observed_target,
+        approximate_grid_inf = approximate_grid_inference(conv,
+                                                          observed_target,
                                                           cov_target,
                                                           cov_target_score,
-                                                          inverse_info,
-                                                          conv.observed_opt_state,
-                                                          conv.sampler.affine_con.mean,
-                                                          conv.sampler.affine_con.covariance,
-                                                          conv.sampler.logdens_transform[0],
-                                                          conv.sampler.affine_con.linear_part,
-                                                          conv.sampler.affine_con.offset)
+                                                          useIP=False)
 
         lci, uci = approximate_grid_inf._approx_intervals(level)
 

From d753a92b006bf62fa913dd7e20a365bbf013cd99 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 6 Jul 2021 11:26:06 -0700
Subject: [PATCH 102/187] removing unused code

---
 selectinf/randomized/query.py | 119 ----------------------------------
 1 file changed, 119 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index aefe70698..592065367 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1433,123 +1433,4 @@ def selective_MLE(observed_target,
     return result, observed_info_mean, log_ref
 
 
-def normalizing_constant(target_parameter,
-                         observed_target,
-                         target_cov,
-                         target_score_cov,
-                         feasible_point,
-                         cond_mean,
-                         cond_cov,
-                         logdens_linear,
-                         linear_part,
-                         offset,
-                         useC=False):
-    """
-    Approximation of normalizing constant
-    in affine constrained Gaussian.
-    Parameters
-    ----------
-    observed_target : ndarray
-        Observed estimate of target.
-    target_cov : ndarray
-        Estimated covaraince of target.
-
-    target_score_cov : ndarray
-        Estimated covariance of target and score of randomized query.
-
-    init_soln : ndarray
-        Feasible point for optimization problem.
-    cond_mean : ndarray
-        Conditional mean of optimization variables given target.
-    cond_cov : ndarray
-        Conditional covariance of optimization variables given target.
-
-    logdens_linear : ndarray
-        Describes how conditional mean of optimization
-        variables varies with target.
-
-    linear_part : ndarray
-        Linear part of affine constraints: $\{o:Ao \leq b\}$
-    offset : ndarray
-        Offset part of affine constraints: $\{o:Ao \leq b\}$
-    solve_args : dict, optional
-        Arguments passed to solver.
-    level : float, optional
-        Confidence level.
-    useC : bool, optional
-        Use python or C solver.
-    """
-
-    target_parameter = np.atleast_1d(target_parameter)
-
-    cond_precision = np.linalg.inv(cond_cov)
-    prec_target = np.linalg.inv(target_cov)
-    target_linear = -logdens_linear.dot(target_score_cov.dot(prec_target))
-    nuisance_correction = target_linear.dot(observed_target)
-    corrected_mean = cond_mean - nuisance_correction
-
-    # rest of the objective is the target mahalanobis distance
-    # plus the mahalanobis distance for optimization variables
-    # this includes a term linear in the target, i.e.
-    # the source of `target_linear`
-
-    ntarget = target_cov.shape[0]
-    nopt = cond_cov.shape[0]
-    full_Q = np.zeros((ntarget + nopt,
-                       ntarget + nopt))
-    full_Q[:ntarget][:, :ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear)))
-    full_Q[:ntarget][:, ntarget:] = -target_linear.dot(cond_precision)
-    full_Q[ntarget:][:, :ntarget] = (-target_linear.dot(cond_precision)).T
-    full_Q[ntarget:][:, ntarget:] = cond_precision
-
-    linear_term = np.hstack([-prec_target.dot(target_parameter) +
-                             corrected_mean.dot(cond_precision).dot(target_linear),
-                             -cond_precision.dot(corrected_mean)])
-
-    constant_term = 0.5 * (np.sum(target_parameter * prec_target.dot(target_parameter)) +
-                           np.sum(corrected_mean * cond_precision.dot(corrected_mean)))
-
-    full_con_linear = np.zeros((linear_part.shape[0],
-                                ntarget + nopt))
-    full_con_linear[:, ntarget:] = linear_part
-    full_feasible = np.zeros(ntarget + nopt)
-    full_feasible[ntarget:] = feasible_point
-
-    solve_args = {'tol': 1.e-12}
-
-    if useC:
-        solver = solve_barrier_affine_C
-    else:
-        solver = _solve_barrier_affine_py
-
-    value, soln, hess = solver(-linear_term,
-                               full_Q,
-                               full_feasible,
-                               full_con_linear,
-                               offset,
-                               **solve_args)
-    return (-value + 0.5 * np.sum(target_parameter * prec_target.dot(target_parameter)),
-            soln[:ntarget],
-            hess[:ntarget][:, :ntarget])
-
-
-def _bisect(f, lb, ub, min_iter=20, max_iter=100, tol=1.e-3):
-    while True:
-        sign_l = np.sign(f(lb))
-        sign_u = np.sign(f(ub))
-        mid = 0.5 * (lb + ub)
-        f_mid = f(mid)
-        if sign_l == 1:
-            if f_mid > 0:  # we should move closer to upper
-                lb = mid
-            else:
-                ub = mid
-        else:
-            if f_mid > 0:  # we should move closer to lower
-                ub = mid
-            else:
-                lb = mid
 
-        if np.fabs(f_mid) < tol:
-            break
-    return mid

From 13994bfabb3b05b9df988a1d2e217865c75e446d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 13:04:11 -0700
Subject: [PATCH 103/187] renaming logdens_linear

---
 selectinf/randomized/approx_reference.py      |  8 +--
 .../randomized/approx_reference_grouplasso.py | 26 ++++----
 selectinf/randomized/exact_reference.py       |  8 +--
 selectinf/randomized/group_lasso.py           |  8 +--
 selectinf/randomized/lasso.py                 | 10 +--
 selectinf/randomized/posterior_inference.py   |  6 +-
 selectinf/randomized/query.py                 | 64 ++++++++++---------
 7 files changed, 66 insertions(+), 64 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 4c14dfad8..ee8d81391 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -45,7 +45,7 @@ def __init__(self,
         self.linear_part = query.sampler.affine_con.linear_part
         self.offset = query.sampler.affine_con.offset
 
-        self.logdens_linear = query.sampler.logdens_transform[0]
+        self.regress_opt = query.sampler.logdens_transform[0]
         self.cond_mean = query.cond_mean
         self.prec_opt = np.linalg.inv(query.cond_cov)
         self.cond_cov = query.cond_cov
@@ -132,7 +132,7 @@ def _approx_log_reference(self,
             raise ValueError('no target specified')
 
         prec_target = np.linalg.inv(target_cov)
-        target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
+        target_lin = self.regress_opt.dot(target_score_cov.T.dot(prec_target))
 
         ref_hat = []
         solver = solve_barrier_affine_py
@@ -290,7 +290,7 @@ def _construct_density(self):
             target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
                 (target_linear.shape[0],))
 
-            target_lin = -self.logdens_linear.dot(target_linear)
+            target_lin = self.regress_opt.dot(target_linear)
             target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
 
             _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
@@ -306,4 +306,4 @@ def _construct_density(self):
 
         self.precs = precs
         self.S = S
-        self.r = r
\ No newline at end of file
+        self.r = r
diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
index d4c0decdd..c478d8f45 100644
--- a/selectinf/randomized/approx_reference_grouplasso.py
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -244,19 +244,19 @@ def _setup_implied_gaussian(self):
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = self.opt_linear.T.dot(self.opt_linear) * prec
             cond_cov = inv(cond_precision)
-            logdens_linear = cond_cov.dot(self.opt_linear.T) * prec
+            regress_opt = -cond_cov.dot(self.opt_linear.T) * prec
         else:
             cond_precision = self.opt_linear.T.dot(prec.dot(self.opt_linear))
             cond_cov = inv(cond_precision)
-            logdens_linear = cond_cov.dot(self.opt_linear.T).dot(prec)
+            regress_opt = -cond_cov.dot(self.opt_linear.T).dot(prec)
 
-        cond_mean = -logdens_linear.dot(self.observed_score_state + self.opt_offset)
+        cond_mean = regress_opt.dot(self.observed_score_state + self.opt_offset)
         self.cond_mean = cond_mean
         self.cond_cov = cond_cov
         self.cond_precision = cond_precision
-        self.logdens_linear = logdens_linear
+        self.regress_opt = regress_opt
 
-        return cond_mean, cond_cov, cond_precision, logdens_linear
+        return cond_mean, cond_cov, cond_precision, regress_opt
 
     def selective_MLE(self,
                       solve_args={'tol': 1.e-12},
@@ -277,7 +277,7 @@ def selective_MLE(self,
         init_soln:  (opt_state) initial (observed) value of optimization variables
         cond_mean: conditional mean of optimization variables (model on _setup_implied_gaussian)
         cond_cov: conditional variance of optimization variables (model on _setup_implied_gaussian)
-        logdens_linear: (model on _setup_implied_gaussian)
+        regress_opt: (model on _setup_implied_gaussian)
         linear_part: like A_scaling (from lasso)
         offset: like b_scaling (from lasso)
         solve_args: passed on to solver
@@ -292,7 +292,7 @@ def selective_MLE(self,
         init_soln = self.observed_opt_state  # just the gammas
         cond_mean = self.cond_mean
         cond_cov = self.cond_cov
-        logdens_linear = self.logdens_linear
+        regress_opt = self.regress_opt
         linear_part = self.linear_part
         offset = self.offset
 
@@ -308,13 +308,13 @@ def selective_MLE(self,
 
         # target_lin determines how the conditional mean of optimization variables
         # vary with target
-        # logdens_linear determines how the argument of the optimization density
+        # regress_opt determines how the argument of the optimization density
         # depends on the score, not how the mean depends on score, hence the minus sign
 
         target_linear = target_score_cov.T.dot(prec_target)
         target_offset = score_offset - target_linear.dot(observed_target)
 
-        target_lin = - logdens_linear.dot(target_linear)
+        target_lin = regress_opt.dot(target_linear)
         target_off = cond_mean - target_lin.dot(observed_target)
 
         if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
@@ -437,7 +437,7 @@ def __init__(self,
         self.linear_part = query.linear_part
         self.offset = query.offset
 
-        self.logdens_linear = query.logdens_linear
+        self.regress_opt = query.regress_opt
         self.cond_mean = query.cond_mean
         self.prec_opt = np.linalg.inv(query.cond_cov)
         self.cond_cov = query.cond_cov
@@ -528,7 +528,7 @@ def log_reference(self,
             raise ValueError('no target specified')
 
         prec_target = np.linalg.inv(target_cov)
-        target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
+        target_lin = self.regress_opt.dot(target_score_cov.T.dot(prec_target))
 
         ref_hat = []
 
@@ -547,7 +547,7 @@ def log_reference(self,
 
             #direction for decomposing o
 
-            eta = -self.prec_opt.dot(self.logdens_linear.dot(target_score_cov.T))
+            eta = self.prec_opt.dot(self.regress_opt.dot(target_score_cov.T))
 
             implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
             implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
@@ -688,7 +688,7 @@ def _construct_density(self):
             target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
                 (target_linear.shape[0],))
 
-            target_lin = -self.logdens_linear.dot(target_linear)
+            target_lin = self.regress_opt.dot(target_linear)
             target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
 
             _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 80169a9a0..9ca4ebe05 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -42,7 +42,7 @@ def __init__(self,
         self.linear_part = query.sampler.affine_con.linear_part
         self.offset = query.sampler.affine_con.offset
 
-        self.logdens_linear = query.sampler.logdens_transform[0]
+        self.regress_opt = query.sampler.logdens_transform[0]
         self.cond_mean = query.cond_mean
         self.prec_opt = np.linalg.inv(query.cond_cov)
         self.cond_cov = query.cond_cov
@@ -126,7 +126,7 @@ def log_reference(self,
             raise ValueError('no target specified')
 
         prec_target = np.linalg.inv(target_cov)
-        target_lin = - self.logdens_linear.dot(target_score_cov.T.dot(prec_target))
+        target_lin = self.regress_opt.dot(target_score_cov.T.dot(prec_target))
 
         ref_hat = []
 
@@ -145,7 +145,7 @@ def log_reference(self,
 
             #direction for decomposing o
 
-            eta = -self.prec_opt.dot(self.logdens_linear.dot(target_score_cov.T))
+            eta = self.prec_opt.dot(self.regress_opt.dot(target_score_cov.T))
 
             implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
             implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
@@ -302,7 +302,7 @@ def _construct_density(self):
             target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
                 (target_linear.shape[0],))
 
-            target_lin = -self.logdens_linear.dot(target_linear)
+            target_lin = self.regress_opt.dot(target_linear)
             target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
 
             _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
diff --git a/selectinf/randomized/group_lasso.py b/selectinf/randomized/group_lasso.py
index 595651bee..09c239df0 100644
--- a/selectinf/randomized/group_lasso.py
+++ b/selectinf/randomized/group_lasso.py
@@ -201,7 +201,7 @@ def fit(self,
         dispersion = 1.
 
         (prec_opt_linear,
-         logdens_linear) = self._get_precision_opt_linear(opt_linear,
+         regress_opt) = self._get_precision_opt_linear(opt_linear,
                                                           ordered_vars,
                                                           dispersion)
 
@@ -231,7 +231,7 @@ def fit(self,
                                                   self.observed_score_state,
                                                   log_cond_density,
                                                   log_det,
-                                                  (np.atleast_2d(logdens_linear.T[:,idx_g].dot(dir_g).T), 
+                                                  (np.atleast_2d(regress_opt.T[:,idx_g].dot(dir_g).T), 
                                                    opt_offset))
             self._samplers[group] = sampler
 
@@ -375,9 +375,9 @@ def _get_precision_opt_linear(self, opt_linear, variables, dispersion=1):
 
         cond_precision = opt_linear.T.dot(value)
         cond_cov = np.linalg.inv(cond_precision)
-        logdens_linear = cond_cov.dot(value.T) * dispersion # is this last dispersion correct?
+        regress_opt = -cond_cov.dot(value.T) * dispersion # is this last dispersion correct?
 
-        return value, logdens_linear
+        return value, regress_opt
 
     def _solve_randomized_problem(self, 
                                   perturb=None, 
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 8133365ce..ff41f46de 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -898,7 +898,7 @@ def _setup_implied_gaussian(self,
         # because opt_linear has shape p x E with the columns
         # being those non-zero columns of the solution. Above S_E = np.diag(signs)
         # the conditional precision is S_E Q[E][:,E] * pi / ((1 - pi) * dispersion) S_E
-        # and logdens_linear is Q[E][:,E]^{-1} S_E
+        # and regress_opt is -Q[E][:,E]^{-1} S_E
         # padded with zeros
         # to be E x p
 
@@ -916,12 +916,12 @@ def _setup_implied_gaussian(self,
         assert(np.linalg.norm(cond_precision - cond_precision.T) / 
                np.linalg.norm(cond_precision) < 1.e-6)
         cond_cov = np.linalg.inv(cond_precision)
-        logdens_linear = np.zeros((len(ordered_vars),
+        regress_opt = np.zeros((len(ordered_vars),
                                    self.nfeature)) 
-        logdens_linear[:, ordered_vars] = cond_cov * signs[None, :] / (dispersion * ratio)
-        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+        regress_opt[:, ordered_vars] = -cond_cov * signs[None, :] / (dispersion * ratio)
+        cond_mean = regress_opt.dot(self.observed_score_state + opt_offset)
 
-        return cond_mean, cond_cov, cond_precision, logdens_linear
+        return cond_mean, cond_cov, cond_precision, regress_opt
 
     def _solve_randomized_problem(self, 
                                   # optional binary vector 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 85dc64b5e..33f132d17 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -40,7 +40,7 @@ def __init__(self,
 
         linear_part = query.sampler.affine_con.linear_part
         offset = query.sampler.affine_con.offset
-        logdens_linear = query.sampler.logdens_transform[0]
+        regress_opt = query.sampler.logdens_transform[0]
         _, randomizer_prec = query.randomizer.cov_prec
         score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
@@ -59,7 +59,7 @@ def __init__(self,
 
         self.observed_target = observed_target
         self.cov_target_score = cov_target_score
-        self.logdens_linear = logdens_linear
+        self.regress_opt = regress_opt
         self.randomizer_prec = randomizer_prec
         self.score_offset = score_offset
 
@@ -133,7 +133,7 @@ def _set_marginal_parameters(self):
         target_linear = self.cov_target_score.T.dot(self.prec_target)
         target_offset = self.score_offset - target_linear.dot(self.observed_target)
 
-        target_lin = -self.logdens_linear.dot(target_linear)
+        target_lin = self.regress_opt.dot(target_linear)
         target_off = self.cond_mean - target_lin.dot(self.observed_target)
 
         self.linear_coef = target_lin
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 592065367..38b34f2c6 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -118,20 +118,20 @@ def _setup_sampler(self,
         (cond_mean,
          cond_cov,
          cond_precision,
-         logdens_linear) = self._setup_implied_gaussian(opt_linear,
+         regress_opt) = self._setup_implied_gaussian(opt_linear,
                                                         opt_offset,
                                                         dispersion)
 
-        def log_density(logdens_linear, offset, cond_prec, opt, score):
+        def log_density(regress_opt, offset, cond_prec, opt, score):
             if score.ndim == 1:
-                mean_term = logdens_linear.dot(score.T + offset).T
+                mean_term = regress_opt.dot(score.T + offset).T
             else:
-                mean_term = logdens_linear.dot(score.T + offset[:, None]).T
-            arg = opt + mean_term
+                mean_term = regress_opt.dot(score.T + offset[:, None]).T
+            arg = opt - mean_term
             return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
 
         log_density = functools.partial(log_density,
-                                        logdens_linear,
+                                        regress_opt,
                                         opt_offset,
                                         cond_precision)
 
@@ -148,7 +148,7 @@ def log_density(logdens_linear, offset, cond_prec, opt, score):
                                                self.observed_opt_state,
                                                self.observed_score_state,
                                                log_density,
-                                               (logdens_linear, opt_offset),
+                                               (regress_opt, opt_offset),
                                                self.randomizer_prec,
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
@@ -166,15 +166,17 @@ def _setup_implied_gaussian(self,
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = opt_linear.T.dot(opt_linear) * prec
             cond_cov = np.linalg.inv(cond_precision)
-            logdens_linear = cond_cov.dot(opt_linear.T) * prec
+            regress_opt = -cond_cov.dot(opt_linear.T) * prec
         else:
             cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
             cond_cov = np.linalg.inv(cond_precision)
-            logdens_linear = cond_cov.dot(opt_linear.T).dot(prec)
+            regress_opt = -cond_cov.dot(opt_linear.T).dot(prec)
 
-        cond_mean = -logdens_linear.dot(self.observed_score_state + opt_offset)
+        # regress_opt is regression coefficient of opt onto score + u...
 
-        return cond_mean, cond_cov, cond_precision, logdens_linear
+        cond_mean = regress_opt.dot(self.observed_score_state + opt_offset)
+
+        return cond_mean, cond_cov, cond_precision, regress_opt
 
     def summary(self,
                 observed_target,
@@ -833,7 +835,7 @@ def __init__(self,
                  observed_score_state,
                  log_cond_density,
                  logdens_transform,  # described how score enters log_density.
-                 randomizer_prec,
+                 cov_product, # product score_cov.dot(randomizer_prec),
                  selection_info=None,
                  useC=False):
 
@@ -875,7 +877,7 @@ def __init__(self,
         self._log_cond_density = log_cond_density
         self.logdens_transform = logdens_transform
         self.useC = useC
-        self.randomizer_prec = randomizer_prec
+        self.cov_product = cov_product
 
     def log_cond_density(self,
                          opt_sample,
@@ -977,16 +979,16 @@ def _log_density_ray(self,
         if (not hasattr(self, "_direction") or not
         np.all(self._direction == direction)):
 
-            logdens_lin, logdens_offset = self.logdens_transform
+            regress_opt, logdens_offset = self.logdens_transform
 
             if opt_sample.shape[1] == 1:
 
                 prec = 1. / self.covariance[0, 0]
-                quadratic_term = logdens_lin.dot(direction) ** 2 * prec
-                arg = (logdens_lin.dot(nuisance + logdens_offset) +
-                       logdens_lin.dot(direction) * gaussian_sample +
-                       opt_sample[:, 0])
-                linear_term = logdens_lin.dot(direction) * prec * arg
+                quadratic_term = regress_opt.dot(direction) ** 2 * prec
+                arg = (opt_sample[:, 0] -
+                       regress_opt.dot(nuisance + logdens_offset) -
+                       regress_opt.dot(direction) * gaussian_sample) 
+                linear_term = -regress_opt.dot(direction) * prec * arg
                 constant_term = arg ** 2 * prec
 
                 self._cache = {'linear_term': linear_term,
@@ -996,22 +998,22 @@ def _log_density_ray(self,
                 self._direction = direction.copy()
 
                 # density is a Gaussian evaluated at
-                # O_i + A(N + (Z_i + theta) * gamma + b)
+                # O_i - A(N + (Z_i + theta) * gamma + b)
 
                 # b is logdens_offset
-                # A is logdens_linear
+                # A is regress_opt
                 # Z_i is gaussian_sample[i] (real-valued)
                 # gamma is direction
                 # O_i is opt_sample[i]
 
                 # let arg1 = O_i
                 # let arg2 = A(N+b + Z_i \cdot gamma)
-                # then it is of the form (arg1 + arg2 + theta * A gamma)
+                # then it is of the form (arg1 - arg2 - theta * A gamma)
 
-                logdens_lin, logdens_offset = self.logdens_transform
+                regress_opt, logdens_offset = self.logdens_transform
                 cov = self.covariance
                 prec = np.linalg.inv(cov)
-                linear_part = logdens_lin.dot(direction)  # A gamma
+                linear_part = -regress_opt.dot(direction)  # -A gamma
 
                 if 1 in opt_sample.shape:
                     pass  # stop3 what's this for?
@@ -1020,10 +1022,10 @@ def _log_density_ray(self,
                 quadratic_term = linear_part.T.dot(prec).dot(linear_part)
 
                 arg1 = opt_sample.T
-                arg2 = logdens_lin.dot(np.multiply.outer(direction, gaussian_sample) +
-                                       (nuisance + logdens_offset)[:, None])
+                arg2 = -regress_opt.dot(np.multiply.outer(direction, gaussian_sample) +
+                                        (nuisance + logdens_offset)[:, None])
                 arg = arg1 + arg2
-                linear_term = linear_part.T.dot(prec).dot(arg)
+                linear_term = -regress_opt.T.dot(prec).dot(arg)
                 constant_term = np.sum(prec.dot(arg) * arg, 0)
 
                 self._cache = {'linear_term': linear_term,
@@ -1312,7 +1314,7 @@ def selective_MLE(observed_target,
                   # only for independent estimator
                   cond_mean,
                   cond_cov,
-                  logdens_linear,
+                  regress_opt,
                   linear_part,
                   offset,
                   randomizer_prec,
@@ -1337,7 +1339,7 @@ def selective_MLE(observed_target,
         Conditional mean of optimization variables given target.
     cond_cov : ndarray
         Conditional covariance of optimization variables given target.
-    logdens_linear : ndarray
+    regress_opt : ndarray
         Describes how conditional mean of optimization
         variables varies with target.
     linear_part : ndarray
@@ -1362,13 +1364,13 @@ def selective_MLE(observed_target,
 
     # target_lin determines how the conditional mean of optimization variables
     # vary with target
-    # logdens_linear determines how the argument of the optimization density
+    # regress_opt determines how the argument of the optimization density
     # depends on the score, not how the mean depends on score, hence the minus sign
 
     target_linear = target_score_cov.T.dot(prec_target)
     target_offset = score_offset - target_linear.dot(observed_target)
 
-    target_lin = - logdens_linear.dot(target_linear)
+    target_lin = regress_opt.dot(target_linear)
     target_off = cond_mean - target_lin.dot(observed_target)
 
     if np.asarray(randomizer_prec).shape in [(), (0,)]:

From 6a9a9012b07233a17e7149215731a15d0777163d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 14:06:54 -0700
Subject: [PATCH 104/187] rename target_linear->score_decomp,
 target_offset->score_resid

---
 selectinf/randomized/approx_reference.py      | 12 ++++-----
 .../randomized/approx_reference_grouplasso.py | 26 +++++++++----------
 selectinf/randomized/exact_reference.py       | 12 ++++-----
 selectinf/randomized/posterior_inference.py   | 14 +++++-----
 selectinf/randomized/query.py                 | 21 ++++++++-------
 5 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index ee8d81391..62fea3aad 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -286,17 +286,17 @@ def _construct_density(self):
             prec_target = 1. / target_cov_uni
             target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
-            target_linear = target_score_cov_uni.T.dot(prec_target)
-            target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
-                (target_linear.shape[0],))
+            score_decomp = target_score_cov_uni.T.dot(prec_target)
+            score_resid = (self.score_offset - score_decomp.dot(observed_target_uni)).reshape(
+                (score_decomp.shape[0],))
 
-            target_lin = self.regress_opt.dot(target_linear)
+            target_lin = self.regress_opt.dot(score_decomp)
             target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
 
-            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
                 self.prec_opt).dot(target_lin)
 
-            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
             _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
index c478d8f45..b7ff1f96c 100644
--- a/selectinf/randomized/approx_reference_grouplasso.py
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -311,20 +311,20 @@ def selective_MLE(self,
         # regress_opt determines how the argument of the optimization density
         # depends on the score, not how the mean depends on score, hence the minus sign
 
-        target_linear = target_score_cov.T.dot(prec_target)
-        target_offset = score_offset - target_linear.dot(observed_target)
+        score_decomp = target_score_cov.T.dot(prec_target)
+        score_resid = score_offset - score_decomp.dot(observed_target)
 
-        target_lin = regress_opt.dot(target_linear)
+        target_lin = regress_opt.dot(score_decomp)
         target_off = cond_mean - target_lin.dot(observed_target)
 
         if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
-            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
-            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
+            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
                 prec_opt).dot(
                 target_lin)
         else:
-            _P = target_linear.T.dot(self.randomizer_prec).dot(target_offset)
-            _prec = prec_target + (target_linear.T.dot(self.randomizer_prec).dot(target_linear)) - target_lin.T.dot(
+            _P = score_decomp.T.dot(self.randomizer_prec).dot(score_resid)
+            _prec = prec_target + (score_decomp.T.dot(self.randomizer_prec).dot(score_decomp)) - target_lin.T.dot(
                 prec_opt).dot(target_lin)
 
         C = target_cov.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))
@@ -684,17 +684,17 @@ def _construct_density(self):
             prec_target = 1. / target_cov_uni
             target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
-            target_linear = target_score_cov_uni.T.dot(prec_target)
-            target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
-                (target_linear.shape[0],))
+            score_decomp = target_score_cov_uni.T.dot(prec_target)
+            score_resid = (self.score_offset - score_decomp.dot(observed_target_uni)).reshape(
+                (score_decomp.shape[0],))
 
-            target_lin = self.regress_opt.dot(target_linear)
+            target_lin = self.regress_opt.dot(score_decomp)
             target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
 
-            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
                 self.prec_opt).dot(target_lin)
 
-            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
             _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 9ca4ebe05..28f70aa16 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -298,17 +298,17 @@ def _construct_density(self):
             prec_target = 1. / target_cov_uni
             target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
 
-            target_linear = target_score_cov_uni.T.dot(prec_target)
-            target_offset = (self.score_offset - target_linear.dot(observed_target_uni)).reshape(
-                (target_linear.shape[0],))
+            score_decomp = target_score_cov_uni.T.dot(prec_target)
+            score_resid = (self.score_offset - score_decomp.dot(observed_target_uni)).reshape(
+                (score_decomp.shape[0],))
 
-            target_lin = self.regress_opt.dot(target_linear)
+            target_lin = self.regress_opt.dot(score_decomp)
             target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
 
-            _prec = prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) - target_lin.T.dot(
+            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
                 self.prec_opt).dot(target_lin)
 
-            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
             _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 33f132d17..a63718aea 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -130,23 +130,23 @@ def _set_marginal_parameters(self):
         implied mean as a function of the true parameters.
         """
 
-        target_linear = self.cov_target_score.T.dot(self.prec_target)
-        target_offset = self.score_offset - target_linear.dot(self.observed_target)
+        score_decomp = self.cov_target_score.T.dot(self.prec_target)
+        score_resid = self.score_offset - score_decomp.dot(self.observed_target)
 
-        target_lin = self.regress_opt.dot(target_linear)
+        target_lin = self.regress_opt.dot(score_decomp)
         target_off = self.cond_mean - target_lin.dot(self.observed_target)
 
         self.linear_coef = target_lin
         self.offset_coef = target_off
 
         if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
-            _prec = self.prec_target + (target_linear.T.dot(target_linear) * self.randomizer_prec) \
+            _prec = self.prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) \
                     - target_lin.T.dot(self.cond_precision).dot(target_lin)
-            _P = target_linear.T.dot(target_offset) * self.randomizer_prec
+            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
         else:
-            _prec = self.prec_target + (target_linear.T.dot(self.randomizer_prec).dot(target_linear)) \
+            _prec = self.prec_target + (score_decomp.T.dot(self.randomizer_prec).dot(score_decomp)) \
                     - target_lin.T.dot(self.cond_precision).dot(target_lin)
-            _P = target_linear.T.dot(self.randomizer_prec).dot(target_offset)
+            _P = score_decomp.T.dot(self.randomizer_prec).dot(score_resid)
 
         _Q = np.linalg.inv(_prec + target_lin.T.dot(self.cond_precision).dot(target_lin))
         self.prec_marginal = self.cond_precision - self.cond_precision.dot(target_lin).dot(_Q).dot(target_lin.T).dot(self.cond_precision)
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 38b34f2c6..12fcbc8aa 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -119,8 +119,8 @@ def _setup_sampler(self,
          cond_cov,
          cond_precision,
          regress_opt) = self._setup_implied_gaussian(opt_linear,
-                                                        opt_offset,
-                                                        dispersion)
+                                                     opt_offset,
+                                                     dispersion)
 
         def log_density(regress_opt, offset, cond_prec, opt, score):
             if score.ndim == 1:
@@ -950,7 +950,8 @@ def selective_MLE(self,
         solve_args : dict, optional
             Arguments passed to solver.
         """
-        score_offset = self.observed_score_state + self.logdens_transform[1]
+
+        score_offset = self.observed_score_state + self.logdens_transform[1] # logdens_transform[1] is observed_subgrad
 
         return selective_MLE(observed_target,
                              target_cov,
@@ -1367,19 +1368,19 @@ def selective_MLE(observed_target,
     # regress_opt determines how the argument of the optimization density
     # depends on the score, not how the mean depends on score, hence the minus sign
 
-    target_linear = target_score_cov.T.dot(prec_target)
-    target_offset = score_offset - target_linear.dot(observed_target)
+    score_decomp = target_score_cov.T.dot(prec_target)
+    score_resid = score_offset - score_decomp.dot(observed_target)
 
-    target_lin = regress_opt.dot(target_linear)
+    target_lin = regress_opt.dot(score_decomp)
     target_off = cond_mean - target_lin.dot(observed_target)
 
     if np.asarray(randomizer_prec).shape in [(), (0,)]:
-        _P = target_linear.T.dot(target_offset) * randomizer_prec
-        _prec = prec_target + (target_linear.T.dot(target_linear) * randomizer_prec) - target_lin.T.dot(prec_opt).dot(
+        _P = score_decomp.T.dot(score_resid) * randomizer_prec
+        _prec = prec_target + (score_decomp.T.dot(score_decomp) * randomizer_prec) - target_lin.T.dot(prec_opt).dot(
             target_lin)
     else:
-        _P = target_linear.T.dot(randomizer_prec).dot(target_offset)
-        _prec = prec_target + (target_linear.T.dot(randomizer_prec).dot(target_linear)) - target_lin.T.dot(
+        _P = score_decomp.T.dot(randomizer_prec).dot(score_resid)
+        _prec = prec_target + (score_decomp.T.dot(randomizer_prec).dot(score_decomp)) - target_lin.T.dot(
             prec_opt).dot(target_lin)
 
     C = target_cov.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))

From 7fc46cf0e2cb5623424915332f682070c838cdeb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 15:44:04 -0700
Subject: [PATCH 105/187] some more renaming

---
 selectinf/randomized/approx_reference.py      |  74 +++---
 .../randomized/approx_reference_grouplasso.py | 149 ++++++-----
 selectinf/randomized/drop_losers.py           |  16 +-
 selectinf/randomized/exact_reference.py       |  76 +++---
 selectinf/randomized/group_lasso.py           |  52 ++--
 selectinf/randomized/lasso.py                 |  33 ++-
 selectinf/randomized/modelQ.py                |  30 +--
 selectinf/randomized/posterior_inference.py   |  37 +--
 selectinf/randomized/query.py                 | 236 +++++++++---------
 selectinf/randomized/screening.py             |  22 +-
 selectinf/randomized/slope.py                 |  27 +-
 11 files changed, 374 insertions(+), 378 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 62fea3aad..06eb5cd54 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -12,8 +12,8 @@ class approximate_grid_inference(object):
     def __init__(self,
                  query,
                  observed_target,
-                 target_cov,
-                 target_score_cov,
+                 cov_target,
+                 cov_target_score,
                  solve_args={'tol': 1.e-12},
                  useIP=False):
 
@@ -27,9 +27,9 @@ def __init__(self,
             to describe implied Gaussian.
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         solve_args : dict, optional
             Arguments passed to solver.
@@ -38,8 +38,8 @@ def __init__(self,
         self.solve_args = solve_args
 
         result, inverse_info = query.selective_MLE(observed_target,
-                                                   target_cov,
-                                                   target_score_cov,
+                                                   cov_target,
+                                                   cov_target_score,
                                                    solve_args=solve_args)[:2]
 
         self.linear_part = query.sampler.affine_con.linear_part
@@ -51,15 +51,15 @@ def __init__(self,
         self.cond_cov = query.cond_cov
 
         self.observed_target = observed_target
-        self.target_score_cov = target_score_cov
-        self.target_cov = target_cov
+        self.cov_target_score = cov_target_score
+        self.cov_target = cov_target
 
-        self.init_soln = query.observed_opt_state
+        self.observed_soln = query.observed_opt_state
 
         self.randomizer_prec = query.sampler.randomizer_prec
         self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
-        self.ntarget = ntarget = target_cov.shape[0]
+        self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
 
         if useIP == False:
@@ -121,8 +121,8 @@ def summary(self,
 
     def _approx_log_reference(self,
                               observed_target,
-                              target_cov,
-                              target_score_cov,
+                              cov_target,
+                              cov_target_score,
                               grid):
 
         """
@@ -131,25 +131,25 @@ def _approx_log_reference(self,
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
-        prec_target = np.linalg.inv(target_cov)
-        target_lin = self.regress_opt.dot(target_score_cov.T.dot(prec_target))
+        prec_target = np.linalg.inv(cov_target)
+        regress_opt_target = self.regress_opt.dot(cov_target_score.T.dot(prec_target))
 
         ref_hat = []
         solver = solve_barrier_affine_py
         for k in range(grid.shape[0]):
             # in the usual D = N + Gamma theta.hat,
-            # target_lin is "something" times Gamma,
+            # regress_opt_target is "something" times Gamma,
             # where "something" comes from implied Gaussian
             # cond_mean is "something" times D
-            # Gamma is target_score_cov.T.dot(prec_target)
+            # Gamma is cov_target_score.T.dot(prec_target)
 
-            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
+            cond_mean_grid = (regress_opt_target.dot(np.atleast_1d(grid[k] - observed_target)) +
                               self.cond_mean)
             conjugate_arg = self.prec_opt.dot(cond_mean_grid)
 
             val, _, _ = solver(conjugate_arg,
                                self.prec_opt,
-                               self.init_soln,
+                               self.observed_soln,
                                self.linear_part,
                                self.offset,
                                **self.solve_args)
@@ -165,17 +165,17 @@ def _construct_families(self):
         self._families = []
 
         for m in range(self.ntarget):
-            p = self.target_score_cov.shape[1]
+            p = self.cov_target_score.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
 
-            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
             approx_log_ref = self._approx_log_reference(observed_target_uni,
-                                                        target_cov_uni,
-                                                        target_score_cov_uni,
+                                                        cov_target_uni,
+                                                        cov_target_score_uni,
                                                         self.stat_grid[m])
 
 
@@ -278,31 +278,31 @@ def _construct_density(self):
         S = {}
         r = {}
 
-        p = self.target_score_cov.shape[1]
+        p = self.cov_target_score.shape[1]
 
         for m in range(self.ntarget):
             observed_target_uni = (self.observed_target[m]).reshape((1,))
-            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            prec_target = 1. / target_cov_uni
-            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            prec_target = 1. / cov_target_uni
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
-            score_decomp = target_score_cov_uni.T.dot(prec_target)
-            score_resid = (self.score_offset - score_decomp.dot(observed_target_uni)).reshape(
-                (score_decomp.shape[0],))
+            regress_score_target = cov_target_score_uni.T.dot(prec_target)
+            resid_score_target = (self.score_offset - regress_score_target.dot(observed_target_uni)).reshape(
+                (regress_score_target.shape[0],))
 
-            target_lin = self.regress_opt.dot(score_decomp)
-            target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
+            regress_opt_target = self.regress_opt.dot(regress_score_target)
+            resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
 
-            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
-                self.prec_opt).dot(target_lin)
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+                self.prec_opt).dot(regress_opt_target)
 
-            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
-            _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
+            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
             S[m] = _S
             r[m] = _r
-            precs[m] = _prec
+            precs[m] = prec_target_nosel
 
         self.precs = precs
         self.S = S
diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
index b7ff1f96c..3909a2a56 100644
--- a/selectinf/randomized/approx_reference_grouplasso.py
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -60,8 +60,8 @@ def fit(self,
             perturb=None):
 
         # solve the randomized version of group lasso
-        (self.initial_soln,
-         self.initial_subgrad) = self._solve_randomized_problem(perturb=perturb,
+        (self.observed_soln,
+         self.observed_subgrad) = self._solve_randomized_problem(perturb=perturb,
                                                                 solve_args=solve_args)
 
         # initialize variables
@@ -81,7 +81,7 @@ def fit(self,
         for g in sorted(np.unique(self.groups)):  # g is group label
 
             group_mask = self.groups == g
-            soln = self.initial_soln  # do not need to keep setting this
+            soln = self.observed_soln  # do not need to keep setting this
 
             if norm(soln[group_mask]) > tol * norm(soln):  # is group g appreciably nonzero
                 ordered_groups.append(g)
@@ -127,12 +127,10 @@ def fit(self,
         for i, var in enumerate(ordered_vars):
             opt_linearNoU[var, i] += self.ridge_term
 
-        opt_offset = self.initial_subgrad
-
         self.observed_score_state = -opt_linearNoU.dot(_beta_unpenalized)
         self.observed_score_state[~overall] += self.loglike.smooth_objective(beta_bar, 'grad')[~overall]
 
-        active_signs = np.sign(self.initial_soln)
+        active_signs = np.sign(self.observed_soln)
         active = np.flatnonzero(active_signs)
         self.active = active
 
@@ -171,7 +169,6 @@ def compute_Lg(g):
 
         self.opt_linear = opt_linearNoU.dot(U)
         self.active_dirs = active_dirs
-        self.opt_offset = opt_offset
         self.ordered_vars = ordered_vars
 
         self.linear_part = -np.eye(self.observed_opt_state.shape[0])
@@ -198,12 +195,12 @@ def _solve_randomized_problem(self,
 
         # if all groups are size 1, set up lasso penalty and run usual lasso solver... (see existing code)...
 
-        initial_soln = problem.solve(quad, **solve_args)
-        initial_subgrad = -(self.loglike.smooth_objective(initial_soln,
+        observed_soln = problem.solve(quad, **solve_args)
+        observed_subgrad = -(self.loglike.smooth_objective(observed_soln,
                                                           'grad') +
-                            quad.objective(initial_soln, 'grad'))
+                            quad.objective(observed_soln, 'grad'))
 
-        return initial_soln, initial_subgrad
+        return observed_soln, observed_subgrad
 
     @staticmethod
     def gaussian(X,
@@ -250,7 +247,7 @@ def _setup_implied_gaussian(self):
             cond_cov = inv(cond_precision)
             regress_opt = -cond_cov.dot(self.opt_linear.T).dot(prec)
 
-        cond_mean = regress_opt.dot(self.observed_score_state + self.opt_offset)
+        cond_mean = regress_opt.dot(self.observed_score_state + self.observed_subgrad)
         self.cond_mean = cond_mean
         self.cond_cov = cond_cov
         self.cond_precision = cond_precision
@@ -272,9 +269,9 @@ def selective_MLE(self,
         Parameters
         ----------
         observed_target: from selected_targets
-        target_cov: from selected_targets
-        target_cov_score: from selected_targets
-        init_soln:  (opt_state) initial (observed) value of optimization variables
+        cov_target: from selected_targets
+        cov_target_score: from selected_targets
+        observed_soln:  (opt_state) initial (observed) value of optimization variables
         cond_mean: conditional mean of optimization variables (model on _setup_implied_gaussian)
         cond_cov: conditional variance of optimization variables (model on _setup_implied_gaussian)
         regress_opt: (model on _setup_implied_gaussian)
@@ -287,9 +284,9 @@ def selective_MLE(self,
         """
 
         self._setup_implied_gaussian()  # Calculate useful quantities
-        (observed_target, target_cov, target_score_cov, alternatives) = self.selected_targets(dispersion)
+        (observed_target, cov_target, cov_target_score, alternatives) = self.selected_targets(dispersion)
 
-        init_soln = self.observed_opt_state  # just the gammas
+        observed_soln = self.observed_opt_state  # just the gammas
         cond_mean = self.cond_mean
         cond_cov = self.cond_cov
         regress_opt = self.regress_opt
@@ -300,40 +297,40 @@ def selective_MLE(self,
             raise ValueError('no target specified')
 
         observed_target = np.atleast_1d(observed_target)
-        prec_target = inv(target_cov)
+        prec_target = inv(cov_target)
 
         prec_opt = self.cond_precision
 
-        score_offset = self.observed_score_state + self.opt_offset
+        score_offset = self.observed_score_state + self.observed_subgrad
 
-        # target_lin determines how the conditional mean of optimization variables
+        # regress_opt_target determines how the conditional mean of optimization variables
         # vary with target
         # regress_opt determines how the argument of the optimization density
         # depends on the score, not how the mean depends on score, hence the minus sign
 
-        score_decomp = target_score_cov.T.dot(prec_target)
-        score_resid = score_offset - score_decomp.dot(observed_target)
+        regress_score_target = cov_target_score.T.dot(prec_target)
+        resid_score_target = score_offset - regress_score_target.dot(observed_target)
 
-        target_lin = regress_opt.dot(score_decomp)
-        target_off = cond_mean - target_lin.dot(observed_target)
+        regress_opt_target = regress_opt.dot(regress_score_target)
+        resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target)
 
         if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
-            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
-            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
+            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _prec = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
                 prec_opt).dot(
-                target_lin)
+                regress_opt_target)
         else:
-            _P = score_decomp.T.dot(self.randomizer_prec).dot(score_resid)
-            _prec = prec_target + (score_decomp.T.dot(self.randomizer_prec).dot(score_decomp)) - target_lin.T.dot(
-                prec_opt).dot(target_lin)
+            _P = regress_score_target.T.dot(self.randomizer_prec).dot(resid_score_target)
+            _prec = prec_target + (regress_score_target.T.dot(self.randomizer_prec).dot(regress_score_target)) - regress_opt_target.T.dot(
+                prec_opt).dot(regress_opt_target)
 
-        C = target_cov.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))
+        C = cov_target.dot(_P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
 
         conjugate_arg = prec_opt.dot(cond_mean)
 
         val, soln, hess = solve_barrier_affine_jacobian_py(conjugate_arg,
                                                            prec_opt,
-                                                           init_soln,
+                                                           observed_soln,
                                                            linear_part,
                                                            offset,
                                                            self.C,
@@ -341,16 +338,16 @@ def selective_MLE(self,
                                                            useJacobian,
                                                            **solve_args)
 
-        final_estimator = target_cov.dot(_prec).dot(observed_target) \
-                          + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + C
+        final_estimator = cov_target.dot(_prec).dot(observed_target) \
+                          + cov_target.dot(regress_opt_target.T.dot(prec_opt.dot(cond_mean - soln))) + C
 
-        unbiased_estimator = target_cov.dot(_prec).dot(observed_target) + target_cov.dot(
-            _P - target_lin.T.dot(prec_opt).dot(target_off))
+        unbiased_estimator = cov_target.dot(_prec).dot(observed_target) + cov_target.dot(
+            _P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
 
-        L = target_lin.T.dot(prec_opt)
-        observed_info_natural = _prec + L.dot(target_lin) - L.dot(hess.dot(L.T))
+        L = regress_opt_target.T.dot(prec_opt)
+        observed_info_natural = _prec + L.dot(regress_opt_target) - L.dot(hess.dot(L.T))
 
-        observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))
+        observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
 
         Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
 
@@ -422,9 +419,9 @@ def __init__(self,
             to describe implied Gaussian.
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         solve_args : dict, optional
             Arguments passed to solver.
@@ -444,17 +441,17 @@ def __init__(self,
         self.C = query.C
         self.active_dirs = query.active_dirs
 
-        (observed_target, target_cov, target_score_cov, alternatives) = query.selected_targets(dispersion)
+        (observed_target, cov_target, cov_target_score, alternatives) = query.selected_targets(dispersion)
         self.observed_target = observed_target
-        self.target_score_cov = target_score_cov
-        self.target_cov = target_cov
+        self.cov_target_score = cov_target_score
+        self.cov_target = cov_target
 
-        self.init_soln = query.observed_opt_state
+        self.observed_soln = query.observed_opt_state
 
         self.randomizer_prec = query.randomizer_prec
-        self.score_offset = query.observed_score_state + query.opt_offset
+        self.score_offset = query.observed_score_state + query.observed_subgrad
 
-        self.ntarget = ntarget = target_cov.shape[0]
+        self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
 
         if useIP == False:
@@ -516,8 +513,8 @@ def summary(self,
 
     def log_reference(self,
                       observed_target,
-                      target_cov,
-                      target_score_cov,
+                      cov_target,
+                      cov_target_score,
                       grid):
 
         """
@@ -527,27 +524,27 @@ def log_reference(self,
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
-        prec_target = np.linalg.inv(target_cov)
-        target_lin = self.regress_opt.dot(target_score_cov.T.dot(prec_target))
+        prec_target = np.linalg.inv(cov_target)
+        regress_opt_target = self.regress_opt.dot(cov_target_score.T.dot(prec_target))
 
         ref_hat = []
 
         for k in range(grid.shape[0]):
             # in the usual D = N + Gamma theta.hat,
-            # target_lin is "something" times Gamma,
+            # regress_opt_target is "something" times Gamma,
             # where "something" comes from implied Gaussian
             # cond_mean is "something" times D
-            # Gamma is target_score_cov.T.dot(prec_target)
+            # Gamma is cov_target_score.T.dot(prec_target)
 
             num_opt = self.prec_opt.shape[0]
             num_con = self.linear_part.shape[0]
 
-            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
+            cond_mean_grid = (regress_opt_target.dot(np.atleast_1d(grid[k] - observed_target)) +
                               self.cond_mean)
 
             #direction for decomposing o
 
-            eta = self.prec_opt.dot(self.regress_opt.dot(target_score_cov.T))
+            eta = self.prec_opt.dot(self.regress_opt.dot(cov_target_score.T))
 
             implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
             implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
@@ -557,18 +554,18 @@ def log_reference(self,
             R = np.identity(num_opt) - _A.dot(eta.T)
 
             A = self.linear_part.dot(_A).reshape((-1,))
-            b = self.offset-self.linear_part.dot(R).dot(self.init_soln)
+            b = self.offset-self.linear_part.dot(R).dot(self.observed_soln)
 
             conjugate_arg = implied_mean * implied_prec
 
             val, soln, _ = solver(np.asarray([conjugate_arg]),
                                   np.reshape(implied_prec, (1,1)),
-                                  eta.T.dot(self.init_soln),
+                                  eta.T.dot(self.observed_soln),
                                   A.reshape((A.shape[0],1)),
                                   b,
                                   **self.solve_args)
 
-            gamma_ = _A.dot(soln) + R.dot(self.init_soln)
+            gamma_ = _A.dot(soln) + R.dot(self.observed_soln)
             log_jacob = jacobian_grad_hess(gamma_, self.C, self.active_dirs)
 
             ref_hat.append(-val - ((conjugate_arg ** 2) * implied_cov)/ 2. + log_jacob[0])
@@ -582,17 +579,17 @@ def _construct_families(self):
         self._families = []
 
         for m in range(self.ntarget):
-            p = self.target_score_cov.shape[1]
+            p = self.cov_target_score.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
 
-            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
             log_ref = self.log_reference(observed_target_uni,
-                                         target_cov_uni,
-                                         target_score_cov_uni,
+                                         cov_target_uni,
+                                         cov_target_score_uni,
                                          self.stat_grid[m])
             if self.useIP == False:
                 logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
@@ -676,26 +673,26 @@ def _construct_density(self):
         S = {}
         r = {}
 
-        p = self.target_score_cov.shape[1]
+        p = self.cov_target_score.shape[1]
 
         for m in range(self.ntarget):
             observed_target_uni = (self.observed_target[m]).reshape((1,))
-            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            prec_target = 1. / target_cov_uni
-            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            prec_target = 1. / cov_target_uni
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
-            score_decomp = target_score_cov_uni.T.dot(prec_target)
-            score_resid = (self.score_offset - score_decomp.dot(observed_target_uni)).reshape(
-                (score_decomp.shape[0],))
+            regress_score_target = cov_target_score_uni.T.dot(prec_target)
+            resid_score_target = (self.score_offset - regress_score_target.dot(observed_target_uni)).reshape(
+                (regress_score_target.shape[0],))
 
-            target_lin = self.regress_opt.dot(score_decomp)
-            target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
+            regress_opt_target = self.regress_opt.dot(regress_score_target)
+            resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
 
-            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
-                self.prec_opt).dot(target_lin)
+            _prec = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+                self.prec_opt).dot(regress_opt_target)
 
-            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
-            _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
+            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
             S[m] = _S
diff --git a/selectinf/randomized/drop_losers.py b/selectinf/randomized/drop_losers.py
index 7c2a7bce6..ac3134144 100644
--- a/selectinf/randomized/drop_losers.py
+++ b/selectinf/randomized/drop_losers.py
@@ -84,13 +84,13 @@ def MLE_inference(self,
         
         observed_target = self.means[self._winners]
         std_win = self.std.loc[self._winners]
-        target_cov = np.diag(std_win**2 / (self._n1_win + self._n2_win))
-        target_score_cov = -target_cov
+        cov_target = np.diag(std_win**2 / (self._n1_win + self._n2_win))
+        cov_target_score = -cov_target
         
         result = gaussian_query.selective_MLE(self,
                                               observed_target,
-                                              target_cov,
-                                              target_score_cov,
+                                              cov_target,
+                                              cov_target_score,
                                               level=level,
                                               solve_args=solve_args)
         result[0].insert(0, 'arm', self._winners)
@@ -120,13 +120,13 @@ def summary(self,
         """
         observed_target = self.means[self._winners]
         std_win = self.std.loc[self._winners]
-        target_cov = np.diag(std_win**2 / (self._n1_win + self._n2_win))
-        target_score_cov = -target_cov
+        cov_target = np.diag(std_win**2 / (self._n1_win + self._n2_win))
+        cov_target_score = -cov_target
 
         result = gaussian_query.summary(self,
                                         observed_target,
-                                        target_cov,
-                                        target_score_cov,
+                                        cov_target,
+                                        cov_target_score,
                                         alternatives=['twosided']*self.K,
                                         ndraw=ndraw,
                                         level=level,
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 28f70aa16..fe7cc0885 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -11,8 +11,8 @@ class exact_grid_inference(object):
     def __init__(self,
                  query,
                  observed_target,
-                 target_cov,
-                 target_score_cov,
+                 cov_target,
+                 cov_target_score,
                  solve_args={'tol': 1.e-12},
                  useIP=False):
 
@@ -26,17 +26,17 @@ def __init__(self,
             to describe implied Gaussian.
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         solve_args : dict, optional
             Arguments passed to solver.
         """
 
         result, inverse_info = query.selective_MLE(observed_target,
-                                                   target_cov,
-                                                   target_score_cov,
+                                                   cov_target,
+                                                   cov_target_score,
                                                    solve_args=solve_args)[:2]
 
         self.linear_part = query.sampler.affine_con.linear_part
@@ -48,15 +48,15 @@ def __init__(self,
         self.cond_cov = query.cond_cov
 
         self.observed_target = observed_target
-        self.target_score_cov = target_score_cov
-        self.target_cov = target_cov
+        self.cov_target_score = cov_target_score
+        self.cov_target = cov_target
 
-        self.init_soln = query.observed_opt_state
+        self.observed_soln = query.observed_opt_state
 
         self.randomizer_prec = query.sampler.randomizer_prec
         self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
-        self.ntarget = ntarget = target_cov.shape[0]
+        self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
 
         if useIP == False:
@@ -118,34 +118,34 @@ def summary(self,
 
     def log_reference(self,
                       observed_target,
-                      target_cov,
-                      target_score_cov,
+                      cov_target,
+                      cov_target_score,
                       grid):
 
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
-        prec_target = np.linalg.inv(target_cov)
-        target_lin = self.regress_opt.dot(target_score_cov.T.dot(prec_target))
+        prec_target = np.linalg.inv(cov_target)
+        regress_opt_target = self.regress_opt.dot(cov_target_score.T.dot(prec_target))
 
         ref_hat = []
 
         for k in range(grid.shape[0]):
             # in the usual D = N + Gamma theta.hat,
-            # target_lin is "something" times Gamma,
+            # regress_opt_target is "something" times Gamma,
             # where "something" comes from implied Gaussian
             # cond_mean is "something" times D
-            # Gamma is target_score_cov.T.dot(prec_target)
+            # Gamma is cov_target_score.T.dot(prec_target)
 
             num_opt = self.prec_opt.shape[0]
             num_con = self.linear_part.shape[0]
 
-            cond_mean_grid = (target_lin.dot(np.atleast_1d(grid[k] - observed_target)) +
+            cond_mean_grid = (regress_opt_target.dot(np.atleast_1d(grid[k] - observed_target)) +
                               self.cond_mean)
 
             #direction for decomposing o
 
-            eta = self.prec_opt.dot(self.regress_opt.dot(target_score_cov.T))
+            eta = self.prec_opt.dot(self.regress_opt.dot(cov_target_score.T))
 
             implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
             implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
@@ -155,7 +155,7 @@ def log_reference(self,
             R = np.identity(num_opt) - _A.dot(eta.T)
 
             A = self.linear_part.dot(_A).reshape((-1,))
-            b = -self.linear_part.dot(R).dot(self.init_soln)
+            b = -self.linear_part.dot(R).dot(self.observed_soln)
 
             trunc_ = np.true_divide((self.offset + b), A)
 
@@ -197,17 +197,17 @@ def _construct_families(self):
         self._families = []
 
         for m in range(self.ntarget):
-            p = self.target_score_cov.shape[1]
+            p = self.cov_target_score.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
 
-            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
             log_ref = self.log_reference(observed_target_uni,
-                                         target_cov_uni,
-                                         target_score_cov_uni,
+                                         cov_target_uni,
+                                         cov_target_score_uni,
                                          self.stat_grid[m])
             if self.useIP == False:
                 logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
@@ -290,31 +290,31 @@ def _construct_density(self):
         S = {}
         r = {}
 
-        p = self.target_score_cov.shape[1]
+        p = self.cov_target_score.shape[1]
 
         for m in range(self.ntarget):
             observed_target_uni = (self.observed_target[m]).reshape((1,))
-            target_cov_uni = (np.diag(self.target_cov)[m]).reshape((1, 1))
-            prec_target = 1. / target_cov_uni
-            target_score_cov_uni = self.target_score_cov[m, :].reshape((1, p))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            prec_target = 1. / cov_target_uni
+            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
-            score_decomp = target_score_cov_uni.T.dot(prec_target)
-            score_resid = (self.score_offset - score_decomp.dot(observed_target_uni)).reshape(
-                (score_decomp.shape[0],))
+            regress_score_target = cov_target_score_uni.T.dot(prec_target)
+            resid_score_target = (self.score_offset - regress_score_target.dot(observed_target_uni)).reshape(
+                (regress_score_target.shape[0],))
 
-            target_lin = self.regress_opt.dot(score_decomp)
-            target_off = (self.cond_mean - target_lin.dot(observed_target_uni)).reshape((target_lin.shape[0],))
+            regress_opt_target = self.regress_opt.dot(regress_score_target)
+            resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
 
-            _prec = prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) - target_lin.T.dot(
-                self.prec_opt).dot(target_lin)
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+                self.prec_opt).dot(regress_opt_target)
 
-            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
-            _r = (1. / _prec).dot(target_lin.T.dot(self.prec_opt).dot(target_off) - _P)
+            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
             S[m] = _S
             r[m] = _r
-            precs[m] = _prec
+            precs[m] = prec_target_nosel
 
         self.precs = precs
         self.S = S
diff --git a/selectinf/randomized/group_lasso.py b/selectinf/randomized/group_lasso.py
index 09c239df0..4f1860599 100644
--- a/selectinf/randomized/group_lasso.py
+++ b/selectinf/randomized/group_lasso.py
@@ -103,8 +103,8 @@ def fit(self,
 
         p = self.nfeature
 
-        (self.initial_soln, 
-         self.initial_subgrad) = self._solve_randomized_problem(
+        (self.observed_soln, 
+         self.observed_subgrad) = self._solve_randomized_problem(
                                       perturb=perturb, 
                                       solve_args=solve_args)
 
@@ -124,7 +124,7 @@ def fit(self,
         for g in sorted(np.unique(self.penalty.groups)):
             group = self.penalty.groups == g
 
-            soln = self.initial_soln
+            soln = self.observed_soln
             if np.linalg.norm(soln[group]) * tol * np.linalg.norm(soln):
                 ordered_groups.append(g)
                 ordered_vars.extend(np.nonzero(group)[0])
@@ -184,8 +184,6 @@ def fit(self,
         for i, var in enumerate(ordered_vars):
             opt_linear[var, i] += self.ridge_term
 
-        opt_offset = self.initial_subgrad
-         
         # for group LASSO, we will have
         # a different sampler for each group
         # based on conditioning on all scalings
@@ -209,9 +207,8 @@ def fit(self,
                                                         ordered_groups,
                                                         ordered_vars,
                                                         opt_linear,
-                                                        opt_offset,
                                                         self.observed_score_state,
-                                                        self.initial_subgrad,
+                                                        self.observed_subgrad,
                                                         self.penalty, 
                                                         prec_opt_linear).items():
 
@@ -232,7 +229,7 @@ def fit(self,
                                                   log_cond_density,
                                                   log_det,
                                                   (np.atleast_2d(regress_opt.T[:,idx_g].dot(dir_g).T), 
-                                                   opt_offset))
+                                                   self.observed_subgrad))
             self._samplers[group] = sampler
 
         self._setup = True
@@ -242,8 +239,8 @@ def fit(self,
     def summary(self,
                 observed_target, 
                 group_assignments,
-                target_cov, 
-                target_score_cov, 
+                cov_target, 
+                cov_target_score, 
                 alternatives,
                 parameter=None,
                 level=0.9,
@@ -268,8 +265,8 @@ def summary(self,
              intervals_) = self._inference_for_target(
                                observed_target[group_idx],
                                group,
-                               target_cov[group_idx][:, group_idx],
-                               target_score_cov[group_idx],
+                               cov_target[group_idx][:, group_idx],
+                               cov_target_score[group_idx],
                                [alternatives[i] for i in np.nonzero(group_idx)[0]],
                                parameter=parameter[group_idx],
                                level=level,
@@ -284,8 +281,8 @@ def summary(self,
     def _inference_for_target(self,
                               observed_target, 
                               group,
-                              target_cov, 
-                              target_score_cov, 
+                              cov_target, 
+                              cov_target_score, 
                               alternatives,
                               opt_sample=None,
                               target_sample=None,
@@ -332,8 +329,8 @@ def _inference_for_target(self,
             ndraw = opt_sample.shape[0]
 
         pivots = sampler.coefficient_pvalues(observed_target,
-                                             target_cov,
-                                             target_score_cov,
+                                             cov_target,
+                                             cov_target_score,
                                              parameter=parameter,
                                              sample=(opt_sample, logW),
                                              normal_sample=target_sample,
@@ -341,8 +338,8 @@ def _inference_for_target(self,
 
         if not np.all(parameter == 0):
             pvalues = sampler.coefficient_pvalues(observed_target,
-                                                  target_cov,
-                                                  target_score_cov,
+                                                  cov_target,
+                                                  cov_target_score,
                                                   parameter=np.zeros_like(parameter),
                                                   sample=(opt_sample, logW),
                                                   normal_sample=target_sample,
@@ -354,8 +351,8 @@ def _inference_for_target(self,
         if compute_intervals:
 
             intervals = sampler.confidence_intervals(observed_target,
-                                                     target_cov,
-                                                     target_score_cov,
+                                                     cov_target,
+                                                     cov_target_score,
                                                      sample=(opt_sample, logW),
                                                      normal_sample=target_sample,
                                                      level=level)
@@ -396,12 +393,12 @@ def _solve_randomized_problem(self,
         
         problem = rr.simple_problem(self.loglike, self.penalty)
 
-        initial_soln = problem.solve(quad, **solve_args) 
-        initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 
+        observed_soln = problem.solve(quad, **solve_args) 
+        observed_subgrad = -(self.loglike.smooth_objective(observed_soln, 
                                                           'grad') +
-                            quad.objective(initial_soln, 'grad'))
+                            quad.objective(observed_soln, 'grad'))
 
-        return initial_soln, initial_subgrad
+        return observed_soln, observed_subgrad
 
     @staticmethod
     def gaussian(X,
@@ -858,7 +855,6 @@ def _reference_density_info(soln,
                             ordered_groups, # ordering is used in assumptions about columns opt_linear
                             ordered_variables,
                             opt_linear,
-                            opt_offset,
                             observed_score_state,
                             observed_subgrad,
                             group_lasso_penalty, 
@@ -1064,12 +1060,12 @@ def sample(self, ndraw):
 
     def selective_MLE(self, 
                       observed_target, 
-                      target_cov, 
-                      target_score_cov, 
+                      cov_target, 
+                      cov_target_score, 
                       # initial (observed) value of optimization variables -- 
                       # used as a feasible point.
                       # precise value used only for independent estimator 
-                      init_soln, 
+                      observed_soln, 
                       solve_args={'tol':1.e-12}, 
                       level=0.9):
 
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index ff41f46de..4936896b1 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -105,12 +105,12 @@ def fit(self,
 
         p = self.nfeature
 
-        (self.initial_soln, 
-         self.initial_subgrad) = self._solve_randomized_problem(
+        (self.observed_soln, 
+         self.observed_subgrad) = self._solve_randomized_problem(
                                      perturb=perturb, 
                                      solve_args=solve_args)
 
-        active_signs = np.sign(self.initial_soln)
+        active_signs = np.sign(self.observed_soln)
         active = self._active = active_signs != 0
 
         self._lagrange = self.penalty.weights
@@ -133,8 +133,8 @@ def fit(self,
 
         # initial state for opt variables
 
-        initial_scalings = np.fabs(self.initial_soln[active])
-        initial_unpenalized = self.initial_soln[self._unpenalized]
+        initial_scalings = np.fabs(self.observed_soln[active])
+        initial_unpenalized = self.observed_soln[self._unpenalized]
 
         self.observed_opt_state = np.concatenate([initial_scalings,
                                                   initial_unpenalized])
@@ -227,7 +227,6 @@ def signed_basis_vector(p, j, s):
                                                 + self.ridge_term *
                                                 unpenalized_directions)
 
-        opt_offset = self.initial_subgrad
         self.opt_linear = opt_linear
         # now make the constraints and implied gaussian
 
@@ -238,7 +237,7 @@ def signed_basis_vector(p, j, s):
         self._setup_sampler_data = (A_scaling[:active.sum()],
                                     b_scaling[:active.sum()],
                                     opt_linear,
-                                    opt_offset)
+                                    self.observed_subgrad)
         if num_opt_var > 0:
             self._setup_sampler(*self._setup_sampler_data)
 
@@ -261,12 +260,12 @@ def _solve_randomized_problem(self,
 
         problem = rr.simple_problem(self.loglike, self.penalty)
 
-        initial_soln = problem.solve(quad, **solve_args) 
-        initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 
+        observed_soln = problem.solve(quad, **solve_args) 
+        observed_subgrad = -(self.loglike.smooth_objective(observed_soln, 
                                                           'grad') +
-                            quad.objective(initial_soln, 'grad'))
+                            quad.objective(observed_soln, 'grad'))
 
-        return initial_soln, initial_subgrad
+        return observed_soln, observed_subgrad
 
     @staticmethod
     def gaussian(X,
@@ -888,7 +887,7 @@ def fit(self,
 
     def _setup_implied_gaussian(self, 
                                 opt_linear, 
-                                opt_offset,
+                                observed_subgrad,
                                 dispersion):
 
         # key observation is that the covariance of the added noise is 
@@ -919,7 +918,7 @@ def _setup_implied_gaussian(self,
         regress_opt = np.zeros((len(ordered_vars),
                                    self.nfeature)) 
         regress_opt[:, ordered_vars] = -cond_cov * signs[None, :] / (dispersion * ratio)
-        cond_mean = regress_opt.dot(self.observed_score_state + opt_offset)
+        cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
         return cond_mean, cond_cov, cond_precision, regress_opt
 
@@ -950,12 +949,12 @@ def _solve_randomized_problem(self,
         randomized_loss.coef *= inv_frac
 
         problem = rr.simple_problem(randomized_loss, self.penalty)
-        initial_soln = problem.solve(quad, **solve_args) 
-        initial_subgrad = -(randomized_loss.smooth_objective(initial_soln,
+        observed_soln = problem.solve(quad, **solve_args) 
+        observed_subgrad = -(randomized_loss.smooth_objective(observed_soln,
                                                              'grad') +
-                            quad.objective(initial_soln, 'grad'))
+                            quad.objective(observed_soln, 'grad'))
 
-        return initial_soln, initial_subgrad
+        return observed_soln, observed_subgrad
 
     @staticmethod
     def gaussian(X,
diff --git a/selectinf/randomized/modelQ.py b/selectinf/randomized/modelQ.py
index d960af043..62aa37b47 100644
--- a/selectinf/randomized/modelQ.py
+++ b/selectinf/randomized/modelQ.py
@@ -114,9 +114,9 @@ def fit(self,
         quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
         quad_data = rr.identity_quadratic(0, 0, -self.X.T.dot(self.y), 0)
         problem = rr.simple_problem(self.loss, self.penalty)
-        self.initial_soln = problem.solve(quad + quad_data, **solve_args)
+        self.observed_soln = problem.solve(quad + quad_data, **solve_args)
 
-        active_signs = np.sign(self.initial_soln)
+        active_signs = np.sign(self.observed_soln)
         active = self._active = active_signs != 0
 
         self._lagrange = self.penalty.weights
@@ -135,13 +135,13 @@ def fit(self,
 
         # initial state for opt variables
 
-        initial_subgrad = -(self.loss.smooth_objective(self.initial_soln, 'grad') + 
-                            quad_data.objective(self.initial_soln, 'grad') +
-                            quad.objective(self.initial_soln, 'grad')) 
-        self.initial_subgrad = initial_subgrad
+        observed_subgrad = -(self.loss.smooth_objective(self.observed_soln, 'grad') + 
+                            quad_data.objective(self.observed_soln, 'grad') +
+                            quad.objective(self.observed_soln, 'grad')) 
+        self.observed_subgrad = observed_subgrad
 
-        initial_scalings = np.fabs(self.initial_soln[active])
-        initial_unpenalized = self.initial_soln[self._unpenalized]
+        initial_scalings = np.fabs(self.observed_soln[active])
+        initial_unpenalized = self.observed_soln[self._unpenalized]
 
         self.observed_opt_state = np.concatenate([initial_scalings,
                                                   initial_unpenalized])
@@ -210,7 +210,7 @@ def signed_basis_vector(p, j, s):
         # two transforms that encode score and optimization
         # variable roles 
 
-        self.opt_transform = (_opt_linear_term, self.initial_subgrad)
+        self.opt_transform = (_opt_linear_term, self.observed_subgrad)
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
         # now store everything needed for the projections
@@ -224,7 +224,7 @@ def signed_basis_vector(p, j, s):
 
         # compute implied mean and covariance
 
-        opt_linear, opt_offset = self.opt_transform
+        opt_linear, observed_subgrad = self.opt_transform
 
         A_scaling = -np.identity(self.num_opt_var)
         b_scaling = np.zeros(self.num_opt_var)
@@ -232,7 +232,7 @@ def signed_basis_vector(p, j, s):
         self._setup_sampler(A_scaling,
                             b_scaling,
                             opt_linear,
-                            opt_offset)
+                            observed_subgrad)
         
         return active_signs
 
@@ -417,9 +417,9 @@ def selected_targets(self, features=None, dispersion=None):
 
             Xfeat = X[:,features]
             Qfeat = self.Q[features][:,features]
-            Gfeat = self.loss.smooth_objective(self.initial_soln, 'grad')[features] - Xfeat.T.dot(y)
+            Gfeat = self.loss.smooth_objective(self.observed_soln, 'grad')[features] - Xfeat.T.dot(y)
             Qfeat_inv = np.linalg.inv(Qfeat)
-            one_step = self.initial_soln[features] - Qfeat_inv.dot(Gfeat)
+            one_step = self.observed_soln[features] - Qfeat_inv.dot(Gfeat)
             cov_target = Qfeat_inv.dot(Xfeat.T.dot(Xfeat)).dot(Qfeat_inv)
             _score_linear = -self.Q[features]
             crosscov_target_score = _score_linear.dot(cov_target)
@@ -447,9 +447,9 @@ def full_targets(self, features=None, dispersion=None):
         # target is one-step estimator
 
         Qfull = self.Q
-        G = self.loss.smooth_objective(self.initial_soln, 'grad') - X.T.dot(y)
+        G = self.loss.smooth_objective(self.observed_soln, 'grad') - X.T.dot(y)
         Qfull_inv = np.linalg.inv(Qfull)
-        one_step = self.initial_soln - Qfull_inv.dot(G)
+        one_step = self.observed_soln - Qfull_inv.dot(G)
         cov_target = Qfull_inv[features][:,features]
         observed_target = one_step[features]
         crosscov_target_score = np.zeros((p, cov_target.shape[0]))
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index a63718aea..ea1d1fbf9 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -109,10 +109,11 @@ def log_posterior(self,
 
         log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2.
 
-        log_lik = -((self.observed_target - target).T.dot(self._prec).dot(self.observed_target - target)) / 2. \
+        _prec = self.prec_target_nosel # shorthand
+        log_lik = -((self.observed_target - target).T.dot(_prec).dot(self.observed_target - target)) / 2. \
                   - log_normalizer
 
-        grad_lik = self.S.T.dot(self._prec.dot(self.observed_target) - self._prec.dot(target) - self.linear_coef.T.dot(
+        grad_lik = self.S.T.dot(_prec.dot(self.observed_target) - _prec.dot(target) - self.linear_coef.T.dot(
             prec_marginal.dot(soln) - conjugate_marginal))
 
         log_prior, grad_prior = self.prior(target_parameter)
@@ -130,34 +131,34 @@ def _set_marginal_parameters(self):
         implied mean as a function of the true parameters.
         """
 
-        score_decomp = self.cov_target_score.T.dot(self.prec_target)
-        score_resid = self.score_offset - score_decomp.dot(self.observed_target)
+        regress_score_target = self.cov_target_score.T.dot(self.prec_target)
+        resid_score_target = self.score_offset - regress_score_target.dot(self.observed_target)
 
-        target_lin = self.regress_opt.dot(score_decomp)
-        target_off = self.cond_mean - target_lin.dot(self.observed_target)
+        regress_opt_target = self.regress_opt.dot(regress_score_target)
+        resid_mean_opt_target = self.cond_mean - regress_opt_target.dot(self.observed_target)
 
-        self.linear_coef = target_lin
-        self.offset_coef = target_off
+        self.linear_coef = regress_opt_target
+        self.offset_coef = resid_mean_opt_target
 
         if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
-            _prec = self.prec_target + (score_decomp.T.dot(score_decomp) * self.randomizer_prec) \
-                    - target_lin.T.dot(self.cond_precision).dot(target_lin)
-            _P = score_decomp.T.dot(score_resid) * self.randomizer_prec
+            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) \
+                    - regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target)
+            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
         else:
-            _prec = self.prec_target + (score_decomp.T.dot(self.randomizer_prec).dot(score_decomp)) \
-                    - target_lin.T.dot(self.cond_precision).dot(target_lin)
-            _P = score_decomp.T.dot(self.randomizer_prec).dot(score_resid)
+            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(self.randomizer_prec).dot(regress_score_target)) \
+                    - regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target)
+            _P = regress_score_target.T.dot(self.randomizer_prec).dot(resid_score_target)
 
-        _Q = np.linalg.inv(_prec + target_lin.T.dot(self.cond_precision).dot(target_lin))
-        self.prec_marginal = self.cond_precision - self.cond_precision.dot(target_lin).dot(_Q).dot(target_lin.T).dot(self.cond_precision)
+        _Q = np.linalg.inv(_prec + regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target))
+        self.prec_marginal = self.cond_precision - self.cond_precision.dot(regress_opt_target).dot(_Q).dot(regress_opt_target.T).dot(self.cond_precision)
 
-        r = np.linalg.inv(_prec).dot(target_lin.T.dot(self.cond_precision).dot(target_off) - _P)
+        r = np.linalg.inv(_prec).dot(regress_opt_target.T.dot(self.cond_precision).dot(resid_mean_opt_target) - _P)
         S = np.linalg.inv(_prec).dot(self.prec_target)
 
         self.r = r
         self.S = S
         #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
-        self._prec = _prec
+        self.prec_target_nosel = prec_target_nosel
 
 
 ### sampling methods
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 12fcbc8aa..9859f693f 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -106,7 +106,7 @@ def _setup_sampler(self,
                        linear_part,
                        offset,
                        opt_linear,
-                       opt_offset,
+                       observed_subgrad,
                        # optional dispersion parameter
                        # for covariance of randomization
                        dispersion=1):
@@ -119,20 +119,20 @@ def _setup_sampler(self,
          cond_cov,
          cond_precision,
          regress_opt) = self._setup_implied_gaussian(opt_linear,
-                                                     opt_offset,
+                                                     observed_subgrad,
                                                      dispersion)
 
-        def log_density(regress_opt, offset, cond_prec, opt, score):
+        def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
             if score.ndim == 1:
-                mean_term = regress_opt.dot(score.T + offset).T
+                mean_term = regress_opt.dot(score.T + u).T
             else:
-                mean_term = regress_opt.dot(score.T + offset[:, None]).T
+                mean_term = regress_opt.dot(score.T + u[:, None]).T
             arg = opt - mean_term
             return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
 
         log_density = functools.partial(log_density,
                                         regress_opt,
-                                        opt_offset,
+                                        observed_subgrad,
                                         cond_precision)
 
 
@@ -148,14 +148,15 @@ def log_density(regress_opt, offset, cond_prec, opt, score):
                                                self.observed_opt_state,
                                                self.observed_score_state,
                                                log_density,
-                                               (regress_opt, opt_offset),
+                                               regress_opt,
+                                               observed_subgrad,
                                                self.randomizer_prec,
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
 
     def _setup_implied_gaussian(self,
                                 opt_linear,
-                                opt_offset,
+                                observed_subgrad,
                                 # optional dispersion parameter
                                 # for covariance of randomization
                                 dispersion=1):
@@ -174,14 +175,14 @@ def _setup_implied_gaussian(self,
 
         # regress_opt is regression coefficient of opt onto score + u...
 
-        cond_mean = regress_opt.dot(self.observed_score_state + opt_offset)
+        cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
         return cond_mean, cond_cov, cond_precision, regress_opt
 
     def summary(self,
                 observed_target,
-                target_cov,
-                target_score_cov,
+                cov_target,
+                cov_target_score,
                 alternatives,
                 opt_sample=None,
                 target_sample=None,
@@ -197,9 +198,9 @@ def summary(self,
         ----------
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
@@ -232,8 +233,8 @@ def summary(self,
             ndraw = opt_sample.shape[0]
 
         pivots = self.sampler.coefficient_pvalues(observed_target,
-                                                  target_cov,
-                                                  target_score_cov,
+                                                  cov_target,
+                                                  cov_target_score,
                                                   parameter=parameter,
                                                   sample=(opt_sample, logW),
                                                   normal_sample=target_sample,
@@ -241,8 +242,8 @@ def summary(self,
 
         if not np.all(parameter == 0):
             pvalues = self.sampler.coefficient_pvalues(observed_target,
-                                                       target_cov,
-                                                       target_score_cov,
+                                                       cov_target,
+                                                       cov_target_score,
                                                        parameter=np.zeros_like(parameter),
                                                        sample=(opt_sample, logW),
                                                        normal_sample=target_sample,
@@ -255,14 +256,14 @@ def summary(self,
 
         if compute_intervals:
             MLE = self.selective_MLE(observed_target,
-                                     target_cov,
-                                     target_score_cov)[0]
+                                     cov_target,
+                                     cov_target_score)[0]
             MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
 
             intervals = self.sampler.confidence_intervals(
                 observed_target,
-                target_cov,
-                target_score_cov,
+                cov_target,
+                cov_target_score,
                 sample=(opt_sample, logW),
                 normal_sample=target_sample,
                 initial_guess=MLE_intervals,
@@ -279,8 +280,8 @@ def summary(self,
 
     def selective_MLE(self,
                       observed_target,
-                      target_cov,
-                      target_score_cov,
+                      cov_target,
+                      cov_target_score,
                       level=0.9,
                       solve_args={'tol': 1.e-12}):
         """
@@ -288,9 +289,9 @@ def selective_MLE(self,
         ----------
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         level : float, optional
             Confidence level.
@@ -299,16 +300,16 @@ def selective_MLE(self,
         """
 
         return self.sampler.selective_MLE(observed_target,
-                                          target_cov,
-                                          target_score_cov,
+                                          cov_target,
+                                          cov_target_score,
                                           self.observed_opt_state,
                                           level=level,
                                           solve_args=solve_args)
 
     def posterior(self,
                   observed_target,
-                  target_cov,
-                  target_score_cov,
+                  cov_target,
+                  cov_target_score,
                   prior=None,
                   dispersion=None,
                   solve_args={'tol': 1.e-12}):
@@ -317,9 +318,9 @@ def posterior(self,
         ----------
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         prior : callable
             A callable object that takes a single argument
@@ -336,7 +337,7 @@ def posterior(self,
             print('Using dispersion parameter 1...')
 
         if prior is None:
-            Di = 1. / (200 * np.diag(target_cov))
+            Di = 1. / (200 * np.diag(cov_target))
 
             def prior(target_parameter):
                 grad_prior = -target_parameter * Di
@@ -345,16 +346,16 @@ def prior(target_parameter):
 
         return posterior(self,
                          observed_target,
-                         target_cov,
-                         target_score_cov,
+                         cov_target,
+                         cov_target_score,
                          prior,
                          dispersion,
                          solve_args=solve_args)
 
     def approximate_grid_inference(self,
                                    observed_target,
-                                   target_cov,
-                                   target_score_cov,
+                                   cov_target,
+                                   cov_target_score,
                                    alternatives=None,
                                    solve_args={'tol': 1.e-12}):
 
@@ -363,9 +364,9 @@ def approximate_grid_inference(self,
         ----------
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
@@ -376,8 +377,8 @@ def approximate_grid_inference(self,
 
         G = approximate_grid_inference(self,
                                        observed_target,
-                                       target_cov,
-                                       target_score_cov,
+                                       cov_target,
+                                       cov_target_score,
                                        solve_args=solve_args)
         return G.summary(alternatives=alternatives)
 
@@ -420,8 +421,8 @@ def fit(self):
 
     def summary(self,
                 observed_target,
-                opt_sampling_info,  # a sequence of (target_cov, score_cov)
-                # objects in theory all target_cov
+                opt_sampling_info,  # a sequence of (cov_target, score_cov)
+                # objects in theory all cov_target
                 # should be about the same...
                 alternatives=None,
                 parameter=None,
@@ -613,7 +614,7 @@ def log_cond_density(self,
     def hypothesis_test(self,
                         test_stat,
                         observed_value,
-                        target_cov,
+                        cov_target,
                         score_cov,
                         sample_args=(),
                         sample=None,
@@ -661,7 +662,7 @@ def hypothesis_test(self,
 
         sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
 
-        target_inv_cov = np.linalg.inv(target_cov)
+        target_inv_cov = np.linalg.inv(cov_target)
         delta = target_inv_cov.dot(parameter - self.reference)
         W = np.exp(sample.dot(delta) + logW)
 
@@ -677,7 +678,7 @@ def hypothesis_test(self,
 
     def confidence_intervals(self,
                              observed_target,
-                             target_cov,
+                             cov_target,
                              score_cov,
                              sample_args=(),
                              sample=None,
@@ -725,7 +726,7 @@ def confidence_intervals(self,
         _intervals = optimization_intervals([(self,
                                               sample,
                                               logW,
-                                              target_cov,
+                                              cov_target,
                                               score_cov)],
                                             observed_target,
                                             ndraw,
@@ -747,7 +748,7 @@ def confidence_intervals(self,
 
     def coefficient_pvalues(self,
                             observed_target,
-                            target_cov,
+                            cov_target,
                             score_cov,
                             parameter=None,
                             sample_args=(),
@@ -795,7 +796,7 @@ def coefficient_pvalues(self,
         _intervals = optimization_intervals([(self,
                                               sample,
                                               logW,
-                                              target_cov,
+                                              cov_target,
                                               score_cov)],
                                             observed_target,
                                             ndraw,
@@ -834,8 +835,9 @@ def __init__(self,
                  initial_point,
                  observed_score_state,
                  log_cond_density,
-                 logdens_transform,  # described how score enters log_density.
-                 cov_product, # product score_cov.dot(randomizer_prec),
+                 regress_opt,
+                 observed_subgrad,
+                 randomizer_prec,
                  selection_info=None,
                  useC=False):
 
@@ -855,9 +857,9 @@ def __init__(self,
              $-X^Ty$.
         log_cond_density : callable
              Density of optimization variables given score
-        logdens_transform : tuple
-             Description of how conditional mean
-             of optimization variables depends on score.
+        regress_opt: ndarray
+             Regression coefficient of opt on to score
+        observed_subgrad : ndarray
         selection_info : optional
              Function of optimization variables that
              will be conditioned on.
@@ -875,9 +877,10 @@ def __init__(self,
         self.observed_score_state = observed_score_state
         self.selection_info = selection_info
         self._log_cond_density = log_cond_density
-        self.logdens_transform = logdens_transform
+        self.regress_opt = regress_opt
+        self.observed_subgrad = observed_subgrad
         self.useC = useC
-        self.cov_product = cov_product
+        self.randomizer_prec = randomizer_prec
 
     def log_cond_density(self,
                          opt_sample,
@@ -924,12 +927,12 @@ def sample(self, ndraw, burnin):
 
     def selective_MLE(self,
                       observed_target,
-                      target_cov,
-                      target_score_cov,
+                      cov_target,
+                      cov_target_score,
                       # initial (observed) value of optimization variables --
                       # used as a feasible point.
                       # precise value used only for independent estimator
-                      init_soln,
+                      observed_soln,
                       solve_args={'tol': 1.e-12},
                       level=0.9):
         """
@@ -939,11 +942,11 @@ def selective_MLE(self,
         ----------
         observed_target : ndarray
             Observed estimate of target.
-        target_cov : ndarray
+        cov_target : ndarray
             Estimated covaraince of target.
-        target_score_cov : ndarray
+        cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
-        init_soln : ndarray
+        observed_soln : ndarray
             Feasible point for optimization problem.
         level : float, optional
             Confidence level.
@@ -951,15 +954,15 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
-        score_offset = self.observed_score_state + self.logdens_transform[1] # logdens_transform[1] is observed_subgrad
+        score_offset = self.observed_score_state + self.observed_subgrad
 
         return selective_MLE(observed_target,
-                             target_cov,
-                             target_score_cov,
-                             init_soln,
+                             cov_target,
+                             cov_target_score,
+                             observed_soln,
                              self.mean,
                              self.covariance,
-                             self.logdens_transform[0],
+                             self.regress_opt,
                              self.affine_con.linear_part,
                              self.affine_con.offset,
                              self.randomizer_prec,
@@ -980,14 +983,14 @@ def _log_density_ray(self,
         if (not hasattr(self, "_direction") or not
         np.all(self._direction == direction)):
 
-            regress_opt, logdens_offset = self.logdens_transform
+            regress_opt, subgrad = self.regress_opt, self.observed_subgrad
 
             if opt_sample.shape[1] == 1:
 
                 prec = 1. / self.covariance[0, 0]
                 quadratic_term = regress_opt.dot(direction) ** 2 * prec
                 arg = (opt_sample[:, 0] -
-                       regress_opt.dot(nuisance + logdens_offset) -
+                       regress_opt.dot(nuisance + subgrad) -
                        regress_opt.dot(direction) * gaussian_sample) 
                 linear_term = -regress_opt.dot(direction) * prec * arg
                 constant_term = arg ** 2 * prec
@@ -999,19 +1002,19 @@ def _log_density_ray(self,
                 self._direction = direction.copy()
 
                 # density is a Gaussian evaluated at
-                # O_i - A(N + (Z_i + theta) * gamma + b)
+                # O_i - A(N + (Z_i + theta) * gamma + u)
 
-                # b is logdens_offset
+                # u is observed_subgrad
                 # A is regress_opt
                 # Z_i is gaussian_sample[i] (real-valued)
                 # gamma is direction
                 # O_i is opt_sample[i]
 
                 # let arg1 = O_i
-                # let arg2 = A(N+b + Z_i \cdot gamma)
+                # let arg2 = A(N+u + Z_i \cdot gamma)
                 # then it is of the form (arg1 - arg2 - theta * A gamma)
 
-                regress_opt, logdens_offset = self.logdens_transform
+                regress_opt, subgrad = self.regress_opt, self.observed_subgrad
                 cov = self.covariance
                 prec = np.linalg.inv(cov)
                 linear_part = -regress_opt.dot(direction)  # -A gamma
@@ -1024,7 +1027,7 @@ def _log_density_ray(self,
 
                 arg1 = opt_sample.T
                 arg2 = -regress_opt.dot(np.multiply.outer(direction, gaussian_sample) +
-                                        (nuisance + logdens_offset)[:, None])
+                                        (nuisance + subgrad)[:, None])
                 arg = arg1 + arg2
                 linear_term = -regress_opt.T.dot(prec).dot(arg)
                 constant_term = np.sum(prec.dot(arg) * arg, 0)
@@ -1048,13 +1051,13 @@ def __init__(self,
                  # (opt_sampler,
                  #  opt_sample,
                  #  opt_logweights,
-                 #  target_cov,
+                 #  cov_target,
                  #  score_cov) objects
-                 #  in theory all target_cov
+                 #  in theory all cov_target
                  #  should be about the same...
                  observed,
                  nsample,  # how large a normal sample
-                 target_cov=None,
+                 cov_target=None,
                  normal_sample=None):
 
         # not all opt_samples will be of the same size as nsample
@@ -1112,16 +1115,16 @@ def __init__(self,
 
         # average covariances in case they might be different
 
-        if target_cov is None:
-            self.target_cov = 0
-            for _, _, _, target_cov, _ in opt_sampling_info:
-                self.target_cov += target_cov
-            self.target_cov /= len(opt_sampling_info)
+        if cov_target is None:
+            self.cov_target = 0
+            for _, _, _, cov_target, _ in opt_sampling_info:
+                self.cov_target += cov_target
+            self.cov_target /= len(opt_sampling_info)
 
         if normal_sample is None:
             self._normal_sample = np.random.multivariate_normal(
-                mean=np.zeros(self.target_cov.shape[0]),
-                cov=self.target_cov,
+                mean=np.zeros(self.cov_target.shape[0]),
+                cov=self.cov_target,
                 size=(nsample,))
         else:
             self._normal_sample = normal_sample
@@ -1144,7 +1147,7 @@ def pivot(self,
         observed_stat = self.observed.dot(linear_func)
         sample_stat = self._normal_sample.dot(linear_func)
 
-        target_cov = linear_func.dot(self.target_cov.dot(linear_func))
+        cov_target = linear_func.dot(self.cov_target.dot(linear_func))
 
         nuisance = []
         translate_dirs = []
@@ -1153,18 +1156,18 @@ def pivot(self,
              opt_sample,
              _,
              _,
-             target_score_cov) in self.opt_sampling_info:
-            cur_score_cov = linear_func.dot(target_score_cov)
+             cov_target_score) in self.opt_sampling_info:
+            cur_score_cov = linear_func.dot(cov_target_score)
 
             # cur_nuisance is in the view's score coordinates
-            cur_nuisance = opt_sampler.observed_score_state - cur_score_cov * observed_stat / target_cov
+            cur_nuisance = opt_sampler.observed_score_state - cur_score_cov * observed_stat / cov_target
             nuisance.append(cur_nuisance)
-            translate_dirs.append(cur_score_cov / target_cov)
+            translate_dirs.append(cur_score_cov / cov_target)
 
         weights = self._weights(sample_stat,  # normal sample
                                 candidate,  # candidate value
                                 nuisance,  # nuisance sufficient stats for each view
-                                translate_dirs)  # points will be moved like sample * target_score_cov
+                                translate_dirs)  # points will be moved like sample * cov_target_score
 
         pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights)
 
@@ -1307,9 +1310,9 @@ def naive_pvalues(diag_cov, observed, parameter):
     return pvalues
 
 def selective_MLE(observed_target,
-                  target_cov,
-                  target_score_cov,
-                  init_soln,  # initial (observed) value of
+                  cov_target,
+                  cov_target_score,
+                  observed_soln,  # initial (observed) value of
                   # optimization variables -- used as a
                   # feasible point.  precise value used
                   # only for independent estimator
@@ -1330,11 +1333,11 @@ def selective_MLE(observed_target,
     ----------
     observed_target : ndarray
         Observed estimate of target.
-    target_cov : ndarray
+    cov_target : ndarray
         Estimated covaraince of target.
-    target_score_cov : ndarray
+    cov_target_score : ndarray
         Estimated covariance of target and score of randomized query.
-    init_soln : ndarray
+    observed_soln : ndarray
         Feasible point for optimization problem.
     cond_mean : ndarray
         Conditional mean of optimization variables given target.
@@ -1359,31 +1362,32 @@ def selective_MLE(observed_target,
         raise ValueError('no target specified')
 
     observed_target = np.atleast_1d(observed_target)
-    prec_target = np.linalg.inv(target_cov)
+    prec_target = np.linalg.inv(cov_target)
 
     prec_opt = np.linalg.inv(cond_cov)
 
-    # target_lin determines how the conditional mean of optimization variables
+    # regress_opt_target determines how the conditional mean of optimization variables
     # vary with target
     # regress_opt determines how the argument of the optimization density
     # depends on the score, not how the mean depends on score, hence the minus sign
 
-    score_decomp = target_score_cov.T.dot(prec_target)
-    score_resid = score_offset - score_decomp.dot(observed_target)
+    regress_score_target = cov_target_score.T.dot(prec_target)
+    resid_score_target = score_offset - regress_score_target.dot(observed_target)
+
+    regress_opt_target = regress_opt.dot(regress_score_target)
+    resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target)
 
-    target_lin = regress_opt.dot(score_decomp)
-    target_off = cond_mean - target_lin.dot(observed_target)
 
     if np.asarray(randomizer_prec).shape in [(), (0,)]:
-        _P = score_decomp.T.dot(score_resid) * randomizer_prec
-        _prec = prec_target + (score_decomp.T.dot(score_decomp) * randomizer_prec) - target_lin.T.dot(prec_opt).dot(
-            target_lin)
+        _P = regress_score_target.T.dot(resid_score_target) * randomizer_prec
+        prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * randomizer_prec) - regress_opt_target.T.dot(prec_opt).dot(
+            regress_opt_target)
     else:
-        _P = score_decomp.T.dot(randomizer_prec).dot(score_resid)
-        _prec = prec_target + (score_decomp.T.dot(randomizer_prec).dot(score_decomp)) - target_lin.T.dot(
-            prec_opt).dot(target_lin)
+        _P = regress_score_target.T.dot(randomizer_prec).dot(resid_score_target)
+        prec_target_nosel = prec_target + (regress_score_target.T.dot(randomizer_prec).dot(regress_score_target)) - regress_opt_target.T.dot(
+            prec_opt).dot(regress_opt_target)
 
-    C = target_cov.dot(_P - target_lin.T.dot(prec_opt).dot(target_off))
+    C = cov_target.dot(_P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
@@ -1394,21 +1398,21 @@ def selective_MLE(observed_target,
 
     val, soln, hess = solver(conjugate_arg,
                              prec_opt,
-                             init_soln,
+                             observed_soln,
                              linear_part,
                              offset,
                              **solve_args)
 
-    final_estimator = target_cov.dot(_prec).dot(observed_target) \
-                      + target_cov.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + C
+    final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
+                      + cov_target.dot(regress_opt_target.T.dot(prec_opt.dot(cond_mean - soln))) + C
 
-    unbiased_estimator = target_cov.dot(_prec).dot(observed_target) + target_cov.dot(
-        _P - target_lin.T.dot(prec_opt).dot(target_off))
+    unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) + cov_target.dot(
+        _P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
 
-    L = target_lin.T.dot(prec_opt)
-    observed_info_natural = _prec + L.dot(target_lin) - L.dot(hess.dot(L.T))
+    L = regress_opt_target.T.dot(prec_opt)
+    observed_info_natural = prec_target_nosel + L.dot(regress_opt_target) - L.dot(hess.dot(L.T))
 
-    observed_info_mean = target_cov.dot(observed_info_natural.dot(target_cov))
+    observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
 
     Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
 
diff --git a/selectinf/randomized/screening.py b/selectinf/randomized/screening.py
index b87ae0027..db6602cc4 100644
--- a/selectinf/randomized/screening.py
+++ b/selectinf/randomized/screening.py
@@ -108,9 +108,9 @@ def fit(self, perturb=None):
 
         opt_linear = np.zeros((p, self.num_opt_var))
         opt_linear[self._selected] = np.diag(active_signs)
-        opt_offset = np.zeros(p)
-        opt_offset[self._selected] = active_signs * self.threshold[self._selected]
-        opt_offset[self._not_selected] = _randomized_score[self._not_selected]
+        observed_subgrad = np.zeros(p)
+        observed_subgrad[self._selected] = active_signs * self.threshold[self._selected]
+        observed_subgrad[self._not_selected] = _randomized_score[self._not_selected]
 
         self._setup = True
 
@@ -120,7 +120,7 @@ def fit(self, perturb=None):
         self._setup_sampler(A_scaling,
                             b_scaling,
                             opt_linear,
-                            opt_offset)
+                            observed_subgrad)
 
         return self._selected
 
@@ -211,9 +211,9 @@ def fit(self, perturb=None):
             for j in range(self.num_opt_var):
                 opt_linear[selected_idx[j], j] = active_signs[j]
 
-            opt_offset = np.zeros(p)
-            opt_offset[self._selected] = active_signs * last_cutoff
-            opt_offset[self._not_selected] = _randomized_score[self._not_selected]
+            observed_subgrad = np.zeros(p)
+            observed_subgrad[self._selected] = active_signs * last_cutoff
+            observed_subgrad[self._not_selected] = _randomized_score[self._not_selected]
 
             self._setup = True
 
@@ -223,7 +223,7 @@ def fit(self, perturb=None):
             self._setup_sampler(A_scaling,
                                 b_scaling,
                                 opt_linear,
-                                opt_offset)
+                                observed_subgrad)
         else:
             self._selected = np.zeros(p, np.bool)
         return self._selected
@@ -328,7 +328,7 @@ def fit(self, perturb=None):
 
             opt_linear = np.zeros((p, self.num_opt_var))
             opt_linear[self._selected] = np.diag(topK_signs)
-            opt_offset = np.zeros(p)  
+            observed_subgrad = np.zeros(p)  
 
         else:
 
@@ -346,7 +346,7 @@ def fit(self, perturb=None):
 
             opt_linear = np.zeros((p, self.num_opt_var))
             opt_linear[self._selected] = np.identity(self.num_opt_var)
-            opt_offset = np.zeros(p)  
+            observed_subgrad = np.zeros(p)  
 
         # in both cases, this conditioning means we just need to compute
         # the observed lower bound
@@ -360,7 +360,7 @@ def fit(self, perturb=None):
         self._setup_sampler(A_scaling,
                             b_scaling,
                             opt_linear,
-                            opt_offset)
+                            observed_subgrad)
 
         return self._selected
 
diff --git a/selectinf/randomized/slope.py b/selectinf/randomized/slope.py
index 854148b54..5f88676e8 100644
--- a/selectinf/randomized/slope.py
+++ b/selectinf/randomized/slope.py
@@ -81,22 +81,22 @@ def _solve_randomized_problem(self,
 
         quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0)
         problem = rr.simple_problem(self.loglike, self.penalty)
-        initial_soln = problem.solve(quad, **solve_args)
-        initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 'grad') +
-                            quad.objective(initial_soln, 'grad'))
+        observed_soln = problem.solve(quad, **solve_args)
+        observed_subgrad = -(self.loglike.smooth_objective(observed_soln, 'grad') +
+                            quad.objective(observed_soln, 'grad'))
 
-        return initial_soln, initial_subgrad
+        return observed_soln, observed_subgrad
 
     def fit(self,
             solve_args={'tol': 1.e-12, 'min_its': 50},
             perturb=None):
 
-        self.initial_soln, self.initial_subgrad = self._solve_randomized_problem(perturb=perturb, solve_args=solve_args)
-        p = self.initial_soln.shape[0]
+        self.observed_soln, self.observed_subgrad = self._solve_randomized_problem(perturb=perturb, solve_args=solve_args)
+        p = self.observed_soln.shape[0]
 
         # now we have to work out SLOPE details, clusters, etc.
 
-        active_signs = np.sign(self.initial_soln)
+        active_signs = np.sign(self.observed_soln)
         active = self._active = active_signs != 0
 
         self._overall = overall = active> 0
@@ -107,9 +107,9 @@ def fit(self,
                                    'variables': self._overall}
 
 
-        indices = np.argsort(-np.fabs(self.initial_soln))
-        sorted_soln = self.initial_soln[indices]
-        initial_scalings = np.sort(np.unique(np.fabs(self.initial_soln[active])))[::-1]
+        indices = np.argsort(-np.fabs(self.observed_soln))
+        sorted_soln = self.observed_soln[indices]
+        initial_scalings = np.sort(np.unique(np.fabs(self.observed_soln[active])))[::-1]
         self.observed_opt_state = initial_scalings
         self._unpenalized = np.zeros(p, np.bool)
 
@@ -141,7 +141,7 @@ def fit(self,
                 cur_indx = j + 1
                 sign_vec = np.zeros(p)
                 sign_vec[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]] = \
-                    np.sign(self.initial_soln[indices[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]]])
+                    np.sign(self.observed_soln[indices[np.arange(j + 1 - cur_indx_array[pointer]) + cur_indx_array[pointer]]])
                 signs_cluster.append(sign_vec)
                 pointer = pointer + 1
                 if sorted_soln[j + 1] == 0:
@@ -156,7 +156,6 @@ def fit(self,
             _opt_linear_term = X.T.dot(X_clustered)
 
             _, prec = self.randomizer.cov_prec
-            opt_linear, opt_offset = (_opt_linear_term, self.initial_subgrad)
 
             # now make the constraints
 
@@ -170,8 +169,8 @@ def fit(self,
 
             self._setup_sampler(A_scaling,
                                 b_scaling,
-                                opt_linear,
-                                opt_offset)
+                                _opt_linear_term,
+                                self.observed_subgrad)
 
             return active_signs
 

From 175cae75b182bd470fcd7f7d99180abe1202c67a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 18:31:12 -0700
Subject: [PATCH 106/187] finished rename, and rewrite in terms of regression
 parameters for LASSO

---
 selectinf/randomized/approx_reference.py      |   6 +-
 .../randomized/approx_reference_grouplasso.py |  30 ++--
 selectinf/randomized/exact_reference.py       |   6 +-
 selectinf/randomized/lasso.py                 |  34 +++-
 selectinf/randomized/posterior_inference.py   |  14 +-
 selectinf/randomized/query.py                 | 152 ++++++++++--------
 .../tests/test_selective_MLE_high.py          |   4 +-
 7 files changed, 149 insertions(+), 97 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 06eb5cd54..40e7363c4 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -56,7 +56,7 @@ def __init__(self,
 
         self.observed_soln = query.observed_opt_state
 
-        self.randomizer_prec = query.sampler.randomizer_prec
+        self.prec_randomizer = query.sampler.prec_randomizer
         self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
         self.ntarget = ntarget = cov_target.shape[0]
@@ -293,10 +293,10 @@ def _construct_density(self):
             regress_opt_target = self.regress_opt.dot(regress_score_target)
             resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
 
-            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot(
                 self.prec_opt).dot(regress_opt_target)
 
-            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
             _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
index 3909a2a56..5d90e981b 100644
--- a/selectinf/randomized/approx_reference_grouplasso.py
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -75,7 +75,7 @@ def fit(self,
 
         tol = 1.e-20
 
-        _, self.randomizer_prec = self.randomizer.cov_prec
+        _, self.prec_randomizer = self.randomizer.cov_prec
 
         # now we are collecting the directions and norms of the active groups
         for g in sorted(np.unique(self.groups)):  # g is group label
@@ -314,14 +314,14 @@ def selective_MLE(self,
         regress_opt_target = regress_opt.dot(regress_score_target)
         resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target)
 
-        if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
-            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
-            _prec = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+        if np.asarray(self.prec_randomizer).shape in [(), (0,)]:
+            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot(
                 prec_opt).dot(
                 regress_opt_target)
         else:
-            _P = regress_score_target.T.dot(self.randomizer_prec).dot(resid_score_target)
-            _prec = prec_target + (regress_score_target.T.dot(self.randomizer_prec).dot(regress_score_target)) - regress_opt_target.T.dot(
+            _P = regress_score_target.T.dot(self.prec_randomizer).dot(resid_score_target)
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(self.prec_randomizer).dot(regress_score_target)) - regress_opt_target.T.dot(
                 prec_opt).dot(regress_opt_target)
 
         C = cov_target.dot(_P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
@@ -338,14 +338,14 @@ def selective_MLE(self,
                                                            useJacobian,
                                                            **solve_args)
 
-        final_estimator = cov_target.dot(_prec).dot(observed_target) \
+        final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
                           + cov_target.dot(regress_opt_target.T.dot(prec_opt.dot(cond_mean - soln))) + C
 
-        unbiased_estimator = cov_target.dot(_prec).dot(observed_target) + cov_target.dot(
+        unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) + cov_target.dot(
             _P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
 
         L = regress_opt_target.T.dot(prec_opt)
-        observed_info_natural = _prec + L.dot(regress_opt_target) - L.dot(hess.dot(L.T))
+        observed_info_natural = prec_target_nosel + L.dot(regress_opt_target) - L.dot(hess.dot(L.T))
 
         observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
 
@@ -448,7 +448,7 @@ def __init__(self,
 
         self.observed_soln = query.observed_opt_state
 
-        self.randomizer_prec = query.randomizer_prec
+        self.prec_randomizer = query.prec_randomizer
         self.score_offset = query.observed_score_state + query.observed_subgrad
 
         self.ntarget = ntarget = cov_target.shape[0]
@@ -688,16 +688,16 @@ def _construct_density(self):
             regress_opt_target = self.regress_opt.dot(regress_score_target)
             resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
 
-            _prec = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot(
                 self.prec_opt).dot(regress_opt_target)
 
-            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
-            _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
-            _S = np.linalg.inv(_prec).dot(prec_target)
+            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
+            _r = (1. / prec_target_nosel).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
+            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
 
             S[m] = _S
             r[m] = _r
-            precs[m] = _prec
+            precs[m] = prec_target_nosel
 
         self.precs = precs
         self.S = S
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index fe7cc0885..018d19074 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -53,7 +53,7 @@ def __init__(self,
 
         self.observed_soln = query.observed_opt_state
 
-        self.randomizer_prec = query.sampler.randomizer_prec
+        self.prec_randomizer = query.sampler.prec_randomizer
         self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
         self.ntarget = ntarget = cov_target.shape[0]
@@ -305,10 +305,10 @@ def _construct_density(self):
             regress_opt_target = self.regress_opt.dot(regress_score_target)
             resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
 
-            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) - regress_opt_target.T.dot(
+            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot(
                 self.prec_opt).dot(regress_opt_target)
 
-            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
             _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
             _S = np.linalg.inv(_prec).dot(prec_target)
 
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 4936896b1..6b473cd56 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -164,10 +164,12 @@ def fit(self,
         X, y = self.loglike.data
         linpred = X.dot(beta_bar)
         n = linpred.shape[0]
+
         if hasattr(self.loglike.saturated_loss, "hessian"): # a GLM -- all we need is W
             W = self._W = self.loglike.saturated_loss.hessian(linpred)
             _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
             _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
+            _hessian = np.dot(X.T, X * W[:, None]) # CAREFUL -- this will be big
         elif hasattr(self.loglike.saturated_loss, "hessian_mult"):
             active_right = np.zeros((n, active.sum()))
             for i, j in enumerate(np.nonzero(active)[0]):
@@ -181,6 +183,12 @@ def fit(self,
                                                                             case_weights=self.loglike.saturated_loss.case_weights)
             _hessian_active = X.T.dot(active_right)
             _hessian_unpen = X.T.dot(unpen_right)
+            _hessian = []
+            for i in range(p):
+                _hessian.append(self.loglike.saturated_loss.hessian_mult(linpred, 
+                                                                         X[:,i], 
+                                                                         case_weights=self.loglike.saturated_loss.case_weights))
+            _hessian = X.T.dot(np.array(_hessian).T)
         else:
             raise ValueError('saturated_loss has no hessian or hessian_mult method')
 
@@ -238,6 +246,19 @@ def signed_basis_vector(p, j, s):
                                     b_scaling[:active.sum()],
                                     opt_linear,
                                     self.observed_subgrad)
+
+        #### to be fixed -- set the cov_score here without dispersion
+
+        self._cov_randomizer, prec = self.randomizer.cov_prec
+        self._prod_score_prec_unnorm = _hessian
+
+        if np.asarray(prec).shape in [(), (0,)]:
+            self._prod_score_prec_unnorm *= prec
+        else:
+            self._prod_score_prec_unnorm = self._prod_score_prec_unnorm.dot(prec)
+
+        #####
+        
         if num_opt_var > 0:
             self._setup_sampler(*self._setup_sampler_data)
 
@@ -721,7 +742,9 @@ def selected_targets(loglike,
         dispersion = ((y - loglike.saturated_loss.mean_function(
             Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1])
 
-    return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+    regress_target_score = np.zeros((cov_target.shape[0], p))
+    regress_target_score[:,features] = cov_target
+    return observed_target, cov_target * dispersion, regress_target_score, alternatives
 
 def full_targets(loglike, 
                  W, 
@@ -756,7 +779,8 @@ def full_targets(loglike,
                       (n - p))
 
     alternatives = ['twosided'] * features.sum()
-    return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+    regress_target_score = Qfull_inv[features] # weights missing?
+    return observed_target, cov_target * dispersion, regress_target_score, alternatives
 
 def debiased_targets(loglike, 
                      W, 
@@ -811,7 +835,7 @@ def debiased_targets(loglike,
                       (n - features.sum()))
 
     alternatives = ['twosided'] * features.sum()
-    return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+    return observed_target, cov_target * dispersion, Qinv_hat, alternatives
 
 def form_targets(target, 
                  loglike, 
@@ -920,7 +944,9 @@ def _setup_implied_gaussian(self,
         regress_opt[:, ordered_vars] = -cond_cov * signs[None, :] / (dispersion * ratio)
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
-        return cond_mean, cond_cov, cond_precision, regress_opt
+        prod_score_prec = np.identity(self.nfeature) / ratio
+        
+        return cond_mean, cond_cov, cond_precision, regress_opt, prod_score_prec
 
     def _solve_randomized_problem(self, 
                                   # optional binary vector 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index ea1d1fbf9..44f981561 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -41,7 +41,7 @@ def __init__(self,
         linear_part = query.sampler.affine_con.linear_part
         offset = query.sampler.affine_con.offset
         regress_opt = query.sampler.logdens_transform[0]
-        _, randomizer_prec = query.randomizer.cov_prec
+        _, prec_randomizer = query.randomizer.cov_prec
         score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
         result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
@@ -60,7 +60,7 @@ def __init__(self,
         self.observed_target = observed_target
         self.cov_target_score = cov_target_score
         self.regress_opt = regress_opt
-        self.randomizer_prec = randomizer_prec
+        self.prec_randomizer = prec_randomizer
         self.score_offset = score_offset
 
         self.feasible_point = query.observed_opt_state
@@ -140,14 +140,14 @@ def _set_marginal_parameters(self):
         self.linear_coef = regress_opt_target
         self.offset_coef = resid_mean_opt_target
 
-        if np.asarray(self.randomizer_prec).shape in [(), (0,)]:
-            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(regress_score_target) * self.randomizer_prec) \
+        if np.asarray(self.prec_randomizer).shape in [(), (0,)]:
+            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) \
                     - regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target)
-            _P = regress_score_target.T.dot(resid_score_target) * self.randomizer_prec
+            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
         else:
-            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(self.randomizer_prec).dot(regress_score_target)) \
+            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(self.prec_randomizer).dot(regress_score_target)) \
                     - regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target)
-            _P = regress_score_target.T.dot(self.randomizer_prec).dot(resid_score_target)
+            _P = regress_score_target.T.dot(self.prec_randomizer).dot(resid_score_target)
 
         _Q = np.linalg.inv(_prec + regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target))
         self.prec_marginal = self.cond_precision - self.cond_precision.dot(regress_opt_target).dot(_Q).dot(regress_opt_target.T).dot(self.cond_precision)
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 9859f693f..c284b59ec 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -118,9 +118,10 @@ def _setup_sampler(self,
         (cond_mean,
          cond_cov,
          cond_precision,
-         regress_opt) = self._setup_implied_gaussian(opt_linear,
-                                                     observed_subgrad,
-                                                     dispersion)
+         regress_opt,
+         prod_score_prec) = self._setup_implied_gaussian(opt_linear,
+                                                         observed_subgrad,
+                                                         dispersion)
 
         def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
             if score.ndim == 1:
@@ -135,9 +136,8 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
                                         observed_subgrad,
                                         cond_precision)
 
-
-        _, randomizer_prec = self.randomizer.cov_prec
-        self.cond_mean, self.cond_cov, self.randomizer_prec = cond_mean, cond_cov, randomizer_prec
+        cov_randomizer = self._cov_randomizer
+        self.cond_mean, self.cond_cov, self.cov_randomizer = cond_mean, cond_cov, cov_randomizer
 
         affine_con = constraints(A,
                                  b,
@@ -150,7 +150,9 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
                                                log_density,
                                                regress_opt,
                                                observed_subgrad,
-                                               self.randomizer_prec,
+                                               cov_randomizer,  # \Sigma_{\omega}
+                                               opt_linear,      # L
+                                               prod_score_prec, # \Sigma_S \Theta_{\omega}
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
 
@@ -164,6 +166,8 @@ def _setup_implied_gaussian(self,
         _, prec = self.randomizer.cov_prec
         prec = prec / dispersion
 
+        prod_score_prec = self._prod_score_prec_unnorm * dispersion # this is usually unnormalized by dispersion
+        
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = opt_linear.T.dot(opt_linear) * prec
             cond_cov = np.linalg.inv(cond_precision)
@@ -172,17 +176,17 @@ def _setup_implied_gaussian(self,
             cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
             cond_cov = np.linalg.inv(cond_precision)
             regress_opt = -cond_cov.dot(opt_linear.T).dot(prec)
-
+            
         # regress_opt is regression coefficient of opt onto score + u...
 
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
-        return cond_mean, cond_cov, cond_precision, regress_opt
+        return cond_mean, cond_cov, cond_precision, regress_opt, prod_score_prec
 
     def summary(self,
                 observed_target,
                 cov_target,
-                cov_target_score,
+                regress_target_score,
                 alternatives,
                 opt_sample=None,
                 target_sample=None,
@@ -200,8 +204,8 @@ def summary(self,
             Observed estimate of target.
         cov_target : ndarray
             Estimated covaraince of target.
-        cov_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
+        regress_target_score : ndarray
+            Estimated regression coefficient of target on score.
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
             should be values of ['twosided', 'less', 'greater']
@@ -234,7 +238,7 @@ def summary(self,
 
         pivots = self.sampler.coefficient_pvalues(observed_target,
                                                   cov_target,
-                                                  cov_target_score,
+                                                  regress_target_score,
                                                   parameter=parameter,
                                                   sample=(opt_sample, logW),
                                                   normal_sample=target_sample,
@@ -243,7 +247,7 @@ def summary(self,
         if not np.all(parameter == 0):
             pvalues = self.sampler.coefficient_pvalues(observed_target,
                                                        cov_target,
-                                                       cov_target_score,
+                                                       regress_target_score,
                                                        parameter=np.zeros_like(parameter),
                                                        sample=(opt_sample, logW),
                                                        normal_sample=target_sample,
@@ -257,13 +261,13 @@ def summary(self,
         if compute_intervals:
             MLE = self.selective_MLE(observed_target,
                                      cov_target,
-                                     cov_target_score)[0]
+                                     regress_target_score)[0]
             MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
 
             intervals = self.sampler.confidence_intervals(
                 observed_target,
                 cov_target,
-                cov_target_score,
+                regress_target_score,
                 sample=(opt_sample, logW),
                 normal_sample=target_sample,
                 initial_guess=MLE_intervals,
@@ -281,7 +285,7 @@ def summary(self,
     def selective_MLE(self,
                       observed_target,
                       cov_target,
-                      cov_target_score,
+                      regress_target_score,
                       level=0.9,
                       solve_args={'tol': 1.e-12}):
         """
@@ -291,7 +295,7 @@ def selective_MLE(self,
             Observed estimate of target.
         cov_target : ndarray
             Estimated covaraince of target.
-        cov_target_score : ndarray
+        regress_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         level : float, optional
             Confidence level.
@@ -301,7 +305,7 @@ def selective_MLE(self,
 
         return self.sampler.selective_MLE(observed_target,
                                           cov_target,
-                                          cov_target_score,
+                                          regress_target_score,
                                           self.observed_opt_state,
                                           level=level,
                                           solve_args=solve_args)
@@ -309,7 +313,7 @@ def selective_MLE(self,
     def posterior(self,
                   observed_target,
                   cov_target,
-                  cov_target_score,
+                  regress_target_score,
                   prior=None,
                   dispersion=None,
                   solve_args={'tol': 1.e-12}):
@@ -320,7 +324,7 @@ def posterior(self,
             Observed estimate of target.
         cov_target : ndarray
             Estimated covaraince of target.
-        cov_target_score : ndarray
+        regress_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         prior : callable
             A callable object that takes a single argument
@@ -347,7 +351,7 @@ def prior(target_parameter):
         return posterior(self,
                          observed_target,
                          cov_target,
-                         cov_target_score,
+                         regress_target_score,
                          prior,
                          dispersion,
                          solve_args=solve_args)
@@ -355,7 +359,7 @@ def prior(target_parameter):
     def approximate_grid_inference(self,
                                    observed_target,
                                    cov_target,
-                                   cov_target_score,
+                                   regress_target_score,
                                    alternatives=None,
                                    solve_args={'tol': 1.e-12}):
 
@@ -366,7 +370,7 @@ def approximate_grid_inference(self,
             Observed estimate of target.
         cov_target : ndarray
             Estimated covaraince of target.
-        cov_target_score : ndarray
+        regress_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         alternatives : [str], optional
             Sequence of strings describing the alternatives,
@@ -378,7 +382,7 @@ def approximate_grid_inference(self,
         G = approximate_grid_inference(self,
                                        observed_target,
                                        cov_target,
-                                       cov_target_score,
+                                       regress_target_score,
                                        solve_args=solve_args)
         return G.summary(alternatives=alternatives)
 
@@ -837,7 +841,9 @@ def __init__(self,
                  log_cond_density,
                  regress_opt,
                  observed_subgrad,
-                 randomizer_prec,
+                 cov_randomizer,  # \Sigma_{\omega}
+                 opt_linear,      # L
+                 prod_score_prec, # \Sigma_S \Theta_{\omega}
                  selection_info=None,
                  useC=False):
 
@@ -880,7 +886,9 @@ def __init__(self,
         self.regress_opt = regress_opt
         self.observed_subgrad = observed_subgrad
         self.useC = useC
-        self.randomizer_prec = randomizer_prec
+        self.cov_randomizer = cov_randomizer
+        self.opt_linear = opt_linear
+        self.prod_score_prec = prod_score_prec
 
     def log_cond_density(self,
                          opt_sample,
@@ -928,7 +936,7 @@ def sample(self, ndraw, burnin):
     def selective_MLE(self,
                       observed_target,
                       cov_target,
-                      cov_target_score,
+                      regress_target_score,
                       # initial (observed) value of optimization variables --
                       # used as a feasible point.
                       # precise value used only for independent estimator
@@ -944,7 +952,7 @@ def selective_MLE(self,
             Observed estimate of target.
         cov_target : ndarray
             Estimated covaraince of target.
-        cov_target_score : ndarray
+        regress_target_score : ndarray
             Estimated covariance of target and score of randomized query.
         observed_soln : ndarray
             Feasible point for optimization problem.
@@ -954,19 +962,19 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
-        score_offset = self.observed_score_state + self.observed_subgrad
-
         return selective_MLE(observed_target,
                              cov_target,
-                             cov_target_score,
+                             regress_target_score,
                              observed_soln,
                              self.mean,
                              self.covariance,
                              self.regress_opt,
                              self.affine_con.linear_part,
                              self.affine_con.offset,
-                             self.randomizer_prec,
-                             score_offset,
+                             self.cov_randomizer,
+                             self.opt_linear,
+                             self.prod_score_prec,
+                             self.observed_score_state + self.observed_subgrad,
                              solve_args=solve_args,
                              level=level,
                              useC=self.useC)
@@ -1156,8 +1164,8 @@ def pivot(self,
              opt_sample,
              _,
              _,
-             cov_target_score) in self.opt_sampling_info:
-            cur_score_cov = linear_func.dot(cov_target_score)
+             regress_target_score) in self.opt_sampling_info:
+            cur_score_cov = linear_func.dot(regress_target_score)
 
             # cur_nuisance is in the view's score coordinates
             cur_nuisance = opt_sampler.observed_score_state - cur_score_cov * observed_stat / cov_target
@@ -1167,7 +1175,7 @@ def pivot(self,
         weights = self._weights(sample_stat,  # normal sample
                                 candidate,  # candidate value
                                 nuisance,  # nuisance sufficient stats for each view
-                                translate_dirs)  # points will be moved like sample * cov_target_score
+                                translate_dirs)  # points will be moved like sample * regress_target_score
 
         pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights)
 
@@ -1311,7 +1319,7 @@ def naive_pvalues(diag_cov, observed, parameter):
 
 def selective_MLE(observed_target,
                   cov_target,
-                  cov_target_score,
+                  regress_target_score,
                   observed_soln,  # initial (observed) value of
                   # optimization variables -- used as a
                   # feasible point.  precise value used
@@ -1321,8 +1329,10 @@ def selective_MLE(observed_target,
                   regress_opt,
                   linear_part,
                   offset,
-                  randomizer_prec,
-                  score_offset,
+                  cov_randomizer,
+                  opt_linear,
+                  prod_score_prec,
+                  observed_score,
                   solve_args={'tol': 1.e-12},
                   level=0.9,
                   useC=False):
@@ -1335,8 +1345,8 @@ def selective_MLE(observed_target,
         Observed estimate of target.
     cov_target : ndarray
         Estimated covaraince of target.
-    cov_target_score : ndarray
-        Estimated covariance of target and score of randomized query.
+    regress_target_score : ndarray
+        Estimated regression coefficient of target on score.
     observed_soln : ndarray
         Feasible point for optimization problem.
     cond_mean : ndarray
@@ -1371,23 +1381,37 @@ def selective_MLE(observed_target,
     # regress_opt determines how the argument of the optimization density
     # depends on the score, not how the mean depends on score, hence the minus sign
 
-    regress_score_target = cov_target_score.T.dot(prec_target)
-    resid_score_target = score_offset - regress_score_target.dot(observed_target)
-
-    regress_opt_target = regress_opt.dot(regress_score_target)
-    resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target)
-
-
-    if np.asarray(randomizer_prec).shape in [(), (0,)]:
-        _P = regress_score_target.T.dot(resid_score_target) * randomizer_prec
-        prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * randomizer_prec) - regress_opt_target.T.dot(prec_opt).dot(
-            regress_opt_target)
-    else:
-        _P = regress_score_target.T.dot(randomizer_prec).dot(resid_score_target)
-        prec_target_nosel = prec_target + (regress_score_target.T.dot(randomizer_prec).dot(regress_score_target)) - regress_opt_target.T.dot(
-            prec_opt).dot(regress_opt_target)
-
-    C = cov_target.dot(_P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
+    ## regress_score_target = cov_target_score.T.dot(prec_target)
+    ## resid_score_target = score_offset - regress_score_target.dot(observed_target)
+
+    ## regress_opt_target = regress_opt.dot(regress_score_target)
+    ## resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target)
+
+    # M1, M2, M3 can be computed quickly (assumption) -- we can make this
+    # faster later
+    # shorthand
+    
+    M1 = prod_score_prec.dot(cov_randomizer).dot(prod_score_prec.T)
+    M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
+    M3 = prod_score_prec
+    
+    # this is specific to target
+    
+    T1 = regress_target_score.T.dot(prec_target)
+    T2 = T1.T.dot(M1.dot(T1))
+    T3 = T1.T.dot(M2.dot(T1)) 
+
+    prec_target_nosel = prec_target + T2 - T3
+    _P = T1.T.dot(M3.dot(observed_score)) - T2.dot(observed_target)
+
+    T4 = M3.T.dot(T1)
+    T5 = opt_linear.T.dot(T4)
+    T6 = cond_cov.dot(T5)
+    T7 = opt_linear.dot(T6)
+    T8 = M3.dot(T7)
+    T9 = T8.dot(observed_target) + M3.dot(opt_linear.dot(cond_mean))
+    T10 = T1.T.dot(T9) 
+    C = cov_target.dot(T10)
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
@@ -1403,14 +1427,16 @@ def selective_MLE(observed_target,
                              offset,
                              **solve_args)
 
+    T11 = regress_target_score.dot(M3.dot(opt_linear))
     final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
-                      + cov_target.dot(regress_opt_target.T.dot(prec_opt.dot(cond_mean - soln))) + C
+                      + T11.dot(cond_mean - soln) + C
 
+    T12 = prec_target.dot(T11)
+    T13 = T3
     unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) + cov_target.dot(
-        _P - regress_opt_target.T.dot(prec_opt).dot(resid_mean_opt_target))
+        _P - T12.dot(cond_mean) + T13.dot(observed_target))
 
-    L = regress_opt_target.T.dot(prec_opt)
-    observed_info_natural = prec_target_nosel + L.dot(regress_opt_target) - L.dot(hess.dot(L.T))
+    observed_info_natural = prec_target_nosel + T3 - T12.dot(hess.dot(T12.T))
 
     observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index b133735f6..da592da87 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -428,7 +428,7 @@ def test_cox(n=2000,
         if nonzero.sum() > 0:
 
             cox_full = rr.glm.cox(X, T, S)
-            full_hess = cox_full.hessian(conv.initial_soln)
+            full_hess = cox_full.hessian(conv.observed_soln)
 
             (observed_target, 
              cov_target, 
@@ -488,7 +488,7 @@ def test_cox_split(n=2000,
         if nonzero.sum() > 0:
 
             cox_full = rr.glm.cox(X, T, S)
-            full_hess = cox_full.hessian(conv.initial_soln)
+            full_hess = cox_full.hessian(conv.observed_soln)
 
             (observed_target, 
              cov_target, 

From 6b93957974680857ec2cd710987daa717cab7e76 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 18:31:44 -0700
Subject: [PATCH 107/187] doc describing rename

---
 doc/Gaussian queries.Rmd   | 167 +++++++++++++++++++++++++++++
 doc/Gaussian queries.ipynb | 209 +++++++++++++++++++++++++++++++++++++
 2 files changed, 376 insertions(+)
 create mode 100644 doc/Gaussian queries.Rmd
 create mode 100644 doc/Gaussian queries.ipynb

diff --git a/doc/Gaussian queries.Rmd b/doc/Gaussian queries.Rmd
new file mode 100644
index 000000000..e86125149
--- /dev/null
+++ b/doc/Gaussian queries.Rmd	
@@ -0,0 +1,167 @@
+---
+jupyter:
+  jupytext:
+    formats: ipynb,Rmd
+    text_representation:
+      extension: .Rmd
+      format_name: rmarkdown
+      format_version: '1.2'
+      jupytext_version: 1.10.2
+  kernelspec:
+    display_name: Python 3
+    language: python
+    name: python3
+---
+
+## KKT conditions
+
+$$
+\omega = \nabla \ell(o) + u + \epsilon o.
+$$
+
+## Current terms used in selective MLE
+
+- `observed_score_state`: for LASSO this is $S=-X^TY$ (and for any linear regression), in general it should be
+$\nabla \ell(\beta^*) - Q(\beta^*)\beta^*$, call this $A$
+
+- `opt_offset`: this is $\hat{u}$ or (changed everywhere to `observed_subgrad`)
+
+- `opt_linear`: this is $\nabla^2 \ell(\hat{\beta}) + \epsilon I$ restricted to "selected" subspace, call this $L$
+
+## Rewrite of KKT
+
+$$
+\omega = Lo + S + u.
+$$
+
+## More terms in the code
+
+- Randomization precision `randomizer_prec` call this $\Theta_{\omega}=\Sigma_{\omega}^{-1}$ so $\omega \sim N(0, \Theta^{-1})$.
+
+- `cond_cov`= $\Sigma_{o|S,u}$, `cond_mean`, `cond_precision`=$\Sigma_{o|S,u}^{-1}=\Theta_{o|S,u}$:
+describe implied law of $o|S,u$. These are computed in `_setup_implied_gaussian`. Specifically, we have
+
+$$
+\begin{aligned}
+\Sigma_{o|S,u} = (L^T\Theta L)^{-1}
+\end{aligned}
+$$
+
+- `regress_opt` (formerly `logdens_linear`) call this $A$: this is the regression of $o$ onto $S+u$, in the implied
+Gaussian given $u,S$ i.e.
+
+$$
+E[o|S,u] = A(S+u) = -\Sigma_{o|S,u} L^T \Theta_{\omega}(S+u).
+$$
+
+- `cond_mean` is the conditional mean of $o|S,u$ evaluated at observed $S,u$: $A(S+u)_{obs}$. Or, `regress_opt_score(observed_score_state + observed_subgrad)`
+
+
+## Target related
+
+- `observed_target, target_cov, target_prec`: not much explanation needed $\hat{\theta}, \Sigma_{\hat{\theta}}, \Theta_{\hat{\theta}} = \Sigma_{\hat{\theta}}^{-1}$
+
+- `target_score_cov`: $\Sigma_{\hat{\theta},S}$
+
+- `regress_target`: regression of target onto score, formally this would be $\Sigma_{\hat{\theta},S}\Theta_S $ (transpose of usual way of writing regression, not in code yet), let's call it $B$ for now
+
+- `cov_product`: $\Sigma_S \Theta_{\omega}$: product of score covariance and randomization precision.
+
+- `cov_score`: $\Sigma_S$
+
+- `score_offset = observed_score_state + observed_subgrad`=$S+u$
+
+### In `selective_MLE`
+
+- `target_linear`: $\Sigma_{S,\hat{\theta}}\Theta_{\hat{\theta}}= \Sigma_S B^T\Theta_{\hat{\theta}}$ (changed name to `regress_score_target`)
+
+- `target_offset`: $S+u-\Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} = S+u - \Sigma_{S,\hat{\theta}} \Theta_{\hat{\theta}} \hat{\theta}$ (changed name to `resid_score_target`)
+
+- `target_lin`: $A\Sigma_S B^T \Theta_{\hat{\theta}} = -(L^T\Theta_{\omega}L)^{-1} L^T\Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}}$ (changed name to `regress_opt_target`
+
+- `target_off`: $A(S+u - \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta})$ `resid_opt_target`
+
+- `_P`: $\Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} (S+u-\Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta}) = \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} (S+u) - \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} = \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} (S+u) - \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} \Sigma_{\omega} \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} $.
+Let's call `_P` $\xi$
+
+- `_prec`: $\Theta_{\hat{\theta}} + \Theta_{\hat{\theta}} B\Sigma_S \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}}
+- \Theta_{\hat{\theta}} B \Sigma_S A^T \Theta_{o|S,u} A \Sigma_S B^T \Theta_{\hat{\theta}}$
+
+- `C`: something that can be computed with all of the above... I guess (but am not sure) that `_prec` is 
+the precision of the (best case, no-selection) unbiased estimate of our target when we condition on $N,u$ 
+
+- More precisely,
+
+$$
+\begin{aligned}
+\Theta_{\hat{\theta}} C &= \xi + (A\Sigma_S B^T \Theta_{\hat{\theta}})^T L^T \Theta_{\omega} L (A\Sigma_S B^T \Theta_{\hat{\theta}})^T \hat{\theta} - (A\Sigma_S B^T \Theta_{\hat{\theta}})^T L^T \Theta_{\omega} L A(S+u) \\
+&= \xi + \Theta_{\hat{\theta}}B \left(\Sigma_S A^T L^T\Theta_{\omega} L A \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} - \Sigma_S A^T L^T\Theta_{\omega} L A(S+u) \right)  \\
+&= \xi + \Theta_{\hat{\theta}}B \left(\Sigma_S \Theta_{\omega} L (L^T\Theta_{\omega} L)^{-1} L^T \Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}} \hat{\theta} + \Sigma_S \Theta_{\omega}L  A(S+u) \right)  \\
+\end{aligned}
+$$
+
+The expression $A(S+u)$ is `cond_mean` and the other term can be computed straightforwardly. We've used the fact
+$$
+A\Sigma_S = -\Sigma_{o|S,u}L^T\Theta_{\omega} \Sigma_S =- (L^T\Theta_{\omega}L)^{-1}L^T\Theta_{\omega}\Sigma_S
+$$
+
+<!-- #region -->
+
+
+
+- Don't know what to sensibly call the last three things... but `_P` and `_prec` are the arguments to the
+optimization problem so these are what needs computing. I did change `_prec` to `prec_target_nosel`
+
+- `cov_target.dot(regress_opt_target.T.dot(prec_opt))`. This is
+
+$$-\Sigma_{\hat{\theta}} \Theta_{\hat{\theta}}B \Sigma_S\Theta_{\omega} L (L^T\Theta_{\omega}L)^{-1} (L^T\Theta_{\omega} L) = B \Sigma_S\Theta_{\omega} L$$
+
+- `regress_opt_target.T.dot(prec_opt)`. This is
+
+$$-\Theta_{\hat{\theta}}B \Sigma_S\Theta_{\omega} L (L^T\Theta_{\omega}L)^{-1} (L^T\Theta_{\omega} L) = \Theta_{\hat{\theta}} B \Sigma_S\Theta_{\omega} L$$
+
+- `regress_opt_target.T.dot(prec_opt).dot(regress_opt_target)`: This is
+
+$$
+\Theta_{\hat{\theta}}B \Sigma_S\Theta_{\omega} L (L^T\Theta_{\omega}L)^{-1} L^T\Theta_{\omega} \Sigma_S B^T \Theta_{\hat{\theta}}
+$$
+<!-- #endregion -->
+
+### Computational considerations?
+
+
+#### Case 1: $\Theta_{\omega}^{1/2}$ is known
+
+
+Another potential downside to all this is that these matrices will generally be $p \times p$. I think in `price_of_selection` I had written some way of doing part of this without having to form all of these matrices
+explicitly.  However, the difference of the last two matrices in `_prec` can be computed (if we know $\Sigma_{\omega}^{\pm 1/2}$ as identity minus rank $E$ matrix I think and
+$$
+A^T\Sigma_{o|S,u}A = \Theta_{\omega} L^T \Sigma_{o|S,u} L \Theta_{\omega}
+$$
+so we want to compute
+$$
+\Theta_{\omega} - \Theta_{\omega} L^T \Sigma_{o|S,u} L \Theta_{\omega} = \Theta_{\omega}^{1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Theta_{\omega}^{1/2}
+$$
+with $P$ projection onto $\text{row}(\Sigma_{\omega})$. So we need to compute projection on to a $E$-dimensional
+subspace of $\text{row}(\Sigma_{\omega})$. Morally, this makes sense even if $\Sigma_{\omega}$ is not full rank but seems a little sketchy.
+
+We might also try computing
+$$
+\begin{aligned}
+\Sigma_S\Theta_{\omega}\Sigma_S -  \Sigma_S\Theta_{\omega} L^T \Sigma_{o|S,u} L \Theta_{\omega} \Sigma_S &= \Sigma_S \Theta_{\omega}^{1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Theta_{\omega}^{1/2} \Sigma_S \\
+&= \Sigma_S \Theta_{\omega} \Theta_{\omega}^{-1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Theta_{\omega}^{-1/2} \Theta_{\omega} \Sigma_S \\
+&= \Sigma_S \Theta_{\omega} \Sigma_{\omega}^{1/2}(P - \Theta_{\omega}^{1/2}L^T (L^T\Theta_{\omega} L)^{-1} L\Theta_{\omega}^{1/2}) \Sigma_{\omega}^{1/2} \Theta_{\omega} \Sigma_S \\
+&= \Sigma_S \Theta_{\omega} (\Sigma_{\omega} - PL^T (L^T\Theta_{\omega} L)^{-1} LP)  \Theta_{\omega} \Sigma_S \\
+&= \Sigma_S \Theta_{\omega} (\Sigma_{\omega} - L^T (L^T\Theta_{\omega} L)^{-1} L)  \Theta_{\omega} \Sigma_S \\
+\end{aligned}
+$$
+
+So, to compute `_prec` we need to compute this above matrix and apply it to $B\Theta_{\hat{\theta}}$. **If we suppose that $\Sigma_{\omega}$ and $\Sigma_S \Theta_{\omega}$ can be computed without $p^2$ memory then we only
+have to store $L$ and $(L^T\Theta_{\omega}L)^{-1}$.** We are already storing $(L^T\Theta_{\omega}L)^{-1}$ as the conditional covariance in the affine constraint.
+
+This matrix might be easier to compute for both data splitting and general case (when we know $\Sigma_{\omega}$).
+
+
+
+
+In order to compute `_P` suppose wee have stored $PL^T(L^T\Theta_{\omega}L)^{-1}LP$ as well as 
diff --git a/doc/Gaussian queries.ipynb b/doc/Gaussian queries.ipynb
new file mode 100644
index 000000000..84788d447
--- /dev/null
+++ b/doc/Gaussian queries.ipynb	
@@ -0,0 +1,209 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## KKT conditions\n",
+    "\n",
+    "$$\n",
+    "\\omega = \\nabla \\ell(o) + u + \\epsilon o.\n",
+    "$$\n",
+    "\n",
+    "## Current terms used in selective MLE\n",
+    "\n",
+    "- `observed_score_state`: for LASSO this is $S=-X^TY$ (and for any linear regression), in general it should be\n",
+    "$\\nabla \\ell(\\beta^*) - Q(\\beta^*)\\beta^*$, call this $A$\n",
+    "\n",
+    "- `opt_offset`: this is $\\hat{u}$ or (changed everywhere to `observed_subgrad`)\n",
+    "\n",
+    "- `opt_linear`: this is $\\nabla^2 \\ell(\\hat{\\beta}) + \\epsilon I$ restricted to \"selected\" subspace, call this $L$\n",
+    "\n",
+    "## Rewrite of KKT\n",
+    "\n",
+    "$$\n",
+    "\\omega = Lo + S + u.\n",
+    "$$\n",
+    "\n",
+    "## More terms in the code\n",
+    "\n",
+    "- Randomization precision `randomizer_prec` call this $\\Theta_{\\omega}=\\Sigma_{\\omega}^{-1}$ so $\\omega \\sim N(0, \\Theta^{-1})$.\n",
+    "\n",
+    "- `cond_cov`= $\\Sigma_{o|S,u}$, `cond_mean`, `cond_precision`=$\\Sigma_{o|S,u}^{-1}=\\Theta_{o|S,u}$:\n",
+    "describe implied law of $o|S,u$. These are computed in `_setup_implied_gaussian`. Specifically, we have\n",
+    "\n",
+    "$$\n",
+    "\\begin{aligned}\n",
+    "\\Sigma_{o|S,u} = (L^T\\Theta L)^{-1}\n",
+    "\\end{aligned}\n",
+    "$$\n",
+    "\n",
+    "- `regress_opt` (formerly `logdens_linear`) call this $A$: this is the regression of $o$ onto $S+u$, in the implied\n",
+    "Gaussian given $u,S$ i.e.\n",
+    "\n",
+    "$$\n",
+    "E[o|S,u] = A(S+u) = -\\Sigma_{o|S,u} L^T \\Theta_{\\omega}(S+u).\n",
+    "$$\n",
+    "\n",
+    "- `cond_mean` is the conditional mean of $o|S,u$ evaluated at observed $S,u$: $A(S+u)_{obs}$. Or, `regress_opt_score(observed_score_state + observed_subgrad)`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Target related\n",
+    "\n",
+    "- `observed_target, target_cov, target_prec`: not much explanation needed $\\hat{\\theta}, \\Sigma_{\\hat{\\theta}}, \\Theta_{\\hat{\\theta}} = \\Sigma_{\\hat{\\theta}}^{-1}$\n",
+    "\n",
+    "- `target_score_cov`: $\\Sigma_{\\hat{\\theta},S}$\n",
+    "\n",
+    "- `regress_target`: regression of target onto score, formally this would be $\\Sigma_{\\hat{\\theta},S}\\Theta_S $ (transpose of usual way of writing regression, not in code yet), let's call it $B$ for now\n",
+    "\n",
+    "- `cov_product`: $\\Sigma_S \\Theta_{\\omega}$: product of score covariance and randomization precision.\n",
+    "\n",
+    "- `cov_score`: $\\Sigma_S$\n",
+    "\n",
+    "- `score_offset = observed_score_state + observed_subgrad`=$S+u$\n",
+    "\n",
+    "### In `selective_MLE`\n",
+    "\n",
+    "- `target_linear`: $\\Sigma_{S,\\hat{\\theta}}\\Theta_{\\hat{\\theta}}= \\Sigma_S B^T\\Theta_{\\hat{\\theta}}$ (changed name to `regress_score_target`)\n",
+    "\n",
+    "- `target_offset`: $S+u-\\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} = S+u - \\Sigma_{S,\\hat{\\theta}} \\Theta_{\\hat{\\theta}} \\hat{\\theta}$ (changed name to `resid_score_target`)\n",
+    "\n",
+    "- `target_lin`: $A\\Sigma_S B^T \\Theta_{\\hat{\\theta}} = -(L^T\\Theta_{\\omega}L)^{-1} L^T\\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}}$ (changed name to `regress_opt_target`\n",
+    "\n",
+    "- `target_off`: $A(S+u - \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta})$ `resid_opt_target`\n",
+    "\n",
+    "- `_P`: $\\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} (S+u-\\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta}) = \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} (S+u) - \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} = \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} (S+u) - \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} \\Sigma_{\\omega} \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} $.\n",
+    "Let's call `_P` $\\xi$\n",
+    "\n",
+    "- `_prec`: $\\Theta_{\\hat{\\theta}} + \\Theta_{\\hat{\\theta}} B\\Sigma_S \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}}\n",
+    "- \\Theta_{\\hat{\\theta}} B \\Sigma_S A^T \\Theta_{o|S,u} A \\Sigma_S B^T \\Theta_{\\hat{\\theta}}$\n",
+    "\n",
+    "- `C`: something that can be computed with all of the above... I guess (but am not sure) that `_prec` is \n",
+    "the precision of the (best case, no-selection) unbiased estimate of our target when we condition on $N,u$ \n",
+    "\n",
+    "- More precisely,\n",
+    "\n",
+    "$$\n",
+    "\\begin{aligned}\n",
+    "\\Theta_{\\hat{\\theta}} C &= \\xi + (A\\Sigma_S B^T \\Theta_{\\hat{\\theta}})^T L^T \\Theta_{\\omega} L (A\\Sigma_S B^T \\Theta_{\\hat{\\theta}})^T \\hat{\\theta} - (A\\Sigma_S B^T \\Theta_{\\hat{\\theta}})^T L^T \\Theta_{\\omega} L A(S+u) \\\\\n",
+    "&= \\xi + \\Theta_{\\hat{\\theta}}B \\left(\\Sigma_S A^T L^T\\Theta_{\\omega} L A \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} - \\Sigma_S A^T L^T\\Theta_{\\omega} L A(S+u) \\right)  \\\\\n",
+    "&= \\xi + \\Theta_{\\hat{\\theta}}B \\left(\\Sigma_S \\Theta_{\\omega} L (L^T\\Theta_{\\omega} L)^{-1} L^T \\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}} \\hat{\\theta} + \\Sigma_S \\Theta_{\\omega}L  A(S+u) \\right)  \\\\\n",
+    "\\end{aligned}\n",
+    "$$\n",
+    "\n",
+    "The expression $A(S+u)$ is `cond_mean` and the other term can be computed straightforwardly. We've used the fact\n",
+    "$$\n",
+    "A\\Sigma_S = -\\Sigma_{o|S,u}L^T\\Theta_{\\omega} \\Sigma_S =- (L^T\\Theta_{\\omega}L)^{-1}L^T\\Theta_{\\omega}\\Sigma_S\n",
+    "$$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "- Don't know what to sensibly call the last three things... but `_P` and `_prec` are the arguments to the\n",
+    "optimization problem so these are what needs computing. I did change `_prec` to `prec_target_nosel`\n",
+    "\n",
+    "- `cov_target.dot(regress_opt_target.T.dot(prec_opt))`. This is\n",
+    "\n",
+    "$$-\\Sigma_{\\hat{\\theta}} \\Theta_{\\hat{\\theta}}B \\Sigma_S\\Theta_{\\omega} L (L^T\\Theta_{\\omega}L)^{-1} (L^T\\Theta_{\\omega} L) = B \\Sigma_S\\Theta_{\\omega} L$$\n",
+    "\n",
+    "- `regress_opt_target.T.dot(prec_opt)`. This is\n",
+    "\n",
+    "$$-\\Theta_{\\hat{\\theta}}B \\Sigma_S\\Theta_{\\omega} L (L^T\\Theta_{\\omega}L)^{-1} (L^T\\Theta_{\\omega} L) = \\Theta_{\\hat{\\theta}} B \\Sigma_S\\Theta_{\\omega} L$$\n",
+    "\n",
+    "- `regress_opt_target.T.dot(prec_opt).dot(regress_opt_target)`: This is\n",
+    "\n",
+    "$$\n",
+    "\\Theta_{\\hat{\\theta}}B \\Sigma_S\\Theta_{\\omega} L (L^T\\Theta_{\\omega}L)^{-1} L^T\\Theta_{\\omega} \\Sigma_S B^T \\Theta_{\\hat{\\theta}}\n",
+    "$$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Computational considerations?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Case 1: $\\Theta_{\\omega}^{1/2}$ is known"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Another potential downside to all this is that these matrices will generally be $p \\times p$. I think in `price_of_selection` I had written some way of doing part of this without having to form all of these matrices\n",
+    "explicitly.  However, the difference of the last two matrices in `_prec` can be computed (if we know $\\Sigma_{\\omega}^{\\pm 1/2}$ as identity minus rank $E$ matrix I think and\n",
+    "$$\n",
+    "A^T\\Sigma_{o|S,u}A = \\Theta_{\\omega} L^T \\Sigma_{o|S,u} L \\Theta_{\\omega}\n",
+    "$$\n",
+    "so we want to compute\n",
+    "$$\n",
+    "\\Theta_{\\omega} - \\Theta_{\\omega} L^T \\Sigma_{o|S,u} L \\Theta_{\\omega} = \\Theta_{\\omega}^{1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Theta_{\\omega}^{1/2}\n",
+    "$$\n",
+    "with $P$ projection onto $\\text{row}(\\Sigma_{\\omega})$. So we need to compute projection on to a $E$-dimensional\n",
+    "subspace of $\\text{row}(\\Sigma_{\\omega})$. Morally, this makes sense even if $\\Sigma_{\\omega}$ is not full rank but seems a little sketchy.\n",
+    "\n",
+    "We might also try computing\n",
+    "$$\n",
+    "\\begin{aligned}\n",
+    "\\Sigma_S\\Theta_{\\omega}\\Sigma_S -  \\Sigma_S\\Theta_{\\omega} L^T \\Sigma_{o|S,u} L \\Theta_{\\omega} \\Sigma_S &= \\Sigma_S \\Theta_{\\omega}^{1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Theta_{\\omega}^{1/2} \\Sigma_S \\\\\n",
+    "&= \\Sigma_S \\Theta_{\\omega} \\Theta_{\\omega}^{-1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Theta_{\\omega}^{-1/2} \\Theta_{\\omega} \\Sigma_S \\\\\n",
+    "&= \\Sigma_S \\Theta_{\\omega} \\Sigma_{\\omega}^{1/2}(P - \\Theta_{\\omega}^{1/2}L^T (L^T\\Theta_{\\omega} L)^{-1} L\\Theta_{\\omega}^{1/2}) \\Sigma_{\\omega}^{1/2} \\Theta_{\\omega} \\Sigma_S \\\\\n",
+    "&= \\Sigma_S \\Theta_{\\omega} (\\Sigma_{\\omega} - PL^T (L^T\\Theta_{\\omega} L)^{-1} LP)  \\Theta_{\\omega} \\Sigma_S \\\\\n",
+    "&= \\Sigma_S \\Theta_{\\omega} (\\Sigma_{\\omega} - L^T (L^T\\Theta_{\\omega} L)^{-1} L)  \\Theta_{\\omega} \\Sigma_S \\\\\n",
+    "\\end{aligned}\n",
+    "$$\n",
+    "\n",
+    "So, to compute `_prec` we need to compute this above matrix and apply it to $B\\Theta_{\\hat{\\theta}}$. **If we suppose that $\\Sigma_{\\omega}$ and $\\Sigma_S \\Theta_{\\omega}$ can be computed without $p^2$ memory then we only\n",
+    "have to store $L$ and $(L^T\\Theta_{\\omega}L)^{-1}$.** We are already storing $(L^T\\Theta_{\\omega}L)^{-1}$ as the conditional covariance in the affine constraint.\n",
+    "\n",
+    "This matrix might be easier to compute for both data splitting and general case (when we know $\\Sigma_{\\omega}$).\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In order to compute `_P` suppose wee have stored $PL^T(L^T\\Theta_{\\omega}L)^{-1}LP$ as well as "
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,Rmd"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From dd4597a563392c1bf4e9414fe41ce6f6ab42b314 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 20:02:49 -0700
Subject: [PATCH 108/187] small comment

---
 selectinf/randomized/query.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index c284b59ec..8072cd75f 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1391,6 +1391,8 @@ def selective_MLE(observed_target,
     # faster later
     # shorthand
     
+    # these could be done by the query at `fit` time
+
     M1 = prod_score_prec.dot(cov_randomizer).dot(prod_score_prec.T)
     M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
     M3 = prod_score_prec

From 82cb60d4f17046991b3c83ecd0fdbb1700e2a53a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 12 Jul 2021 23:38:43 -0700
Subject: [PATCH 109/187] computing M1, M2, M3 within query, so data splitting
 runs now

---
 selectinf/randomized/lasso.py | 21 ++++++-----
 selectinf/randomized/query.py | 67 +++++++++++++++++++++--------------
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 6b473cd56..397fce7ef 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -249,13 +249,7 @@ def signed_basis_vector(p, j, s):
 
         #### to be fixed -- set the cov_score here without dispersion
 
-        self._cov_randomizer, prec = self.randomizer.cov_prec
-        self._prod_score_prec_unnorm = _hessian
-
-        if np.asarray(prec).shape in [(), (0,)]:
-            self._prod_score_prec_unnorm *= prec
-        else:
-            self._prod_score_prec_unnorm = self._prod_score_prec_unnorm.dot(prec)
+        self._hessian = _hessian
 
         #####
         
@@ -946,7 +940,18 @@ def _setup_implied_gaussian(self,
 
         prod_score_prec = np.identity(self.nfeature) / ratio
         
-        return cond_mean, cond_cov, cond_precision, regress_opt, prod_score_prec
+        cov_rand = self._hessian * dispersion
+        M1 = prod_score_prec.dot(cov_rand).dot(prod_score_prec.T)
+        M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
+        M3 = prod_score_prec
+    
+        return (cond_mean,
+                cond_cov,
+                cond_precision,
+                regress_opt,
+                M1,
+                M2,
+                M3)
 
     def _solve_randomized_problem(self, 
                                   # optional binary vector 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 8072cd75f..dc59f6d9e 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -119,9 +119,11 @@ def _setup_sampler(self,
          cond_cov,
          cond_precision,
          regress_opt,
-         prod_score_prec) = self._setup_implied_gaussian(opt_linear,
-                                                         observed_subgrad,
-                                                         dispersion)
+         M1,
+         M2,
+         M3) = self._setup_implied_gaussian(opt_linear,
+                                            observed_subgrad,
+                                            dispersion)
 
         def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
             if score.ndim == 1:
@@ -136,8 +138,7 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
                                         observed_subgrad,
                                         cond_precision)
 
-        cov_randomizer = self._cov_randomizer
-        self.cond_mean, self.cond_cov, self.cov_randomizer = cond_mean, cond_cov, cov_randomizer
+        self.cond_mean, self.cond_cov = cond_mean, cond_cov
 
         affine_con = constraints(A,
                                  b,
@@ -148,11 +149,12 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
                                                self.observed_opt_state,
                                                self.observed_score_state,
                                                log_density,
-                                               regress_opt,
+                                               regress_opt, # not needed?
                                                observed_subgrad,
-                                               cov_randomizer,  # \Sigma_{\omega}
                                                opt_linear,      # L
-                                               prod_score_prec, # \Sigma_S \Theta_{\omega}
+                                               M1,
+                                               M2,
+                                               M3,
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
 
@@ -163,10 +165,15 @@ def _setup_implied_gaussian(self,
                                 # for covariance of randomization
                                 dispersion=1):
 
-        _, prec = self.randomizer.cov_prec
-        prec = prec / dispersion
+        cov_rand, prec = self.randomizer.cov_prec
+        prec = prec / dispersion # why do we do this here -- prec is just known
 
-        prod_score_prec = self._prod_score_prec_unnorm * dispersion # this is usually unnormalized by dispersion
+        if np.asarray(prec).shape in [(), (0,)]:
+            _prod_score_prec_unnorm = self._hessian * prec
+        else:
+            _prod_score_prec_unnorm = self._hessian.dot(prec)
+
+        prod_score_prec = _prod_score_prec_unnorm * dispersion
         
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = opt_linear.T.dot(opt_linear) * prec
@@ -181,7 +188,17 @@ def _setup_implied_gaussian(self,
 
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
-        return cond_mean, cond_cov, cond_precision, regress_opt, prod_score_prec
+        M1 = prod_score_prec.dot(cov_rand).dot(prod_score_prec.T)
+        M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
+        M3 = prod_score_prec
+    
+        return (cond_mean,
+                cond_cov,
+                cond_precision,
+                regress_opt,
+                M1,
+                M2,
+                M3)
 
     def summary(self,
                 observed_target,
@@ -841,9 +858,10 @@ def __init__(self,
                  log_cond_density,
                  regress_opt,
                  observed_subgrad,
-                 cov_randomizer,  # \Sigma_{\omega}
-                 opt_linear,      # L
-                 prod_score_prec, # \Sigma_S \Theta_{\omega}
+                 opt_linear,      
+                 M1,
+                 M2,
+                 M3,
                  selection_info=None,
                  useC=False):
 
@@ -886,9 +904,8 @@ def __init__(self,
         self.regress_opt = regress_opt
         self.observed_subgrad = observed_subgrad
         self.useC = useC
-        self.cov_randomizer = cov_randomizer
         self.opt_linear = opt_linear
-        self.prod_score_prec = prod_score_prec
+        self.M1, self.M2, self.M3 = M1, M2, M3
 
     def log_cond_density(self,
                          opt_sample,
@@ -968,12 +985,12 @@ def selective_MLE(self,
                              observed_soln,
                              self.mean,
                              self.covariance,
-                             self.regress_opt,
                              self.affine_con.linear_part,
                              self.affine_con.offset,
-                             self.cov_randomizer,
                              self.opt_linear,
-                             self.prod_score_prec,
+                             self.M1,
+                             self.M2,
+                             self.M3,
                              self.observed_score_state + self.observed_subgrad,
                              solve_args=solve_args,
                              level=level,
@@ -1326,12 +1343,12 @@ def selective_MLE(observed_target,
                   # only for independent estimator
                   cond_mean,
                   cond_cov,
-                  regress_opt,
                   linear_part,
                   offset,
-                  cov_randomizer,
                   opt_linear,
-                  prod_score_prec,
+                  M1,
+                  M2,
+                  M3,
                   observed_score,
                   solve_args={'tol': 1.e-12},
                   level=0.9,
@@ -1393,10 +1410,6 @@ def selective_MLE(observed_target,
     
     # these could be done by the query at `fit` time
 
-    M1 = prod_score_prec.dot(cov_randomizer).dot(prod_score_prec.T)
-    M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
-    M3 = prod_score_prec
-    
     # this is specific to target
     
     T1 = regress_target_score.T.dot(prec_target)

From d91f463c211df75246e9806383c82f7ac2fae32f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 13 Jul 2021 00:15:25 -0700
Subject: [PATCH 110/187] update doc

---
 doc/Gaussian queries.Rmd      | 16 +++++++-------
 doc/Gaussian queries.ipynb    | 21 +++++++++----------
 selectinf/randomized/lasso.py |  7 ++++---
 selectinf/randomized/query.py | 39 ++++++++++-------------------------
 4 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/doc/Gaussian queries.Rmd b/doc/Gaussian queries.Rmd
index e86125149..3dfb026f8 100644
--- a/doc/Gaussian queries.Rmd	
+++ b/doc/Gaussian queries.Rmd	
@@ -156,12 +156,14 @@ $$
 \end{aligned}
 $$
 
-So, to compute `_prec` we need to compute this above matrix and apply it to $B\Theta_{\hat{\theta}}$. **If we suppose that $\Sigma_{\omega}$ and $\Sigma_S \Theta_{\omega}$ can be computed without $p^2$ memory then we only
-have to store $L$ and $(L^T\Theta_{\omega}L)^{-1}$.** We are already storing $(L^T\Theta_{\omega}L)^{-1}$ as the conditional covariance in the affine constraint.
+## Three matrices
 
-This matrix might be easier to compute for both data splitting and general case (when we know $\Sigma_{\omega}$).
+- All the computations above can be expressed of some target specific info like $B, \Theta_{\hat{\theta}}, \Sigma_{\hat{\theta}}, \hat{\theta}$ and
 
-
-
-
-In order to compute `_P` suppose wee have stored $PL^T(L^T\Theta_{\omega}L)^{-1}LP$ as well as 
+$$
+\begin{aligned}
+M_1 &= \Sigma_S \Theta_{\omega} \\
+M_2 &= M_1 \Sigma_{\omega} M_1^T \\
+M_3 &= M_1 L (L^T\Sigma_{\omega}L)^{-1} L M_1^T
+\end{aligned}
+$$
\ No newline at end of file
diff --git a/doc/Gaussian queries.ipynb b/doc/Gaussian queries.ipynb
index 84788d447..89d0cbc46 100644
--- a/doc/Gaussian queries.ipynb	
+++ b/doc/Gaussian queries.ipynb	
@@ -167,18 +167,17 @@
     "\\end{aligned}\n",
     "$$\n",
     "\n",
-    "So, to compute `_prec` we need to compute this above matrix and apply it to $B\\Theta_{\\hat{\\theta}}$. **If we suppose that $\\Sigma_{\\omega}$ and $\\Sigma_S \\Theta_{\\omega}$ can be computed without $p^2$ memory then we only\n",
-    "have to store $L$ and $(L^T\\Theta_{\\omega}L)^{-1}$.** We are already storing $(L^T\\Theta_{\\omega}L)^{-1}$ as the conditional covariance in the affine constraint.\n",
+    "## Three matrices\n",
     "\n",
-    "This matrix might be easier to compute for both data splitting and general case (when we know $\\Sigma_{\\omega}$).\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In order to compute `_P` suppose wee have stored $PL^T(L^T\\Theta_{\\omega}L)^{-1}LP$ as well as "
+    "- All the computations above can be expressed of some target specific info like $B, \\Theta_{\\hat{\\theta}}, \\Sigma_{\\hat{\\theta}}, \\hat{\\theta}$ and\n",
+    "\n",
+    "$$\n",
+    "\\begin{aligned}\n",
+    "M_1 &= \\Sigma_S \\Theta_{\\omega} \\\\\n",
+    "M_2 &= M_1 \\Sigma_{\\omega} M_1^T \\\\\n",
+    "M_3 &= M_1 L (L^T\\Sigma_{\\omega}L)^{-1} L M_1^T\n",
+    "\\end{aligned}\n",
+    "$$"
    ]
   }
  ],
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 397fce7ef..d1fa9bf9e 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -941,9 +941,10 @@ def _setup_implied_gaussian(self,
         prod_score_prec = np.identity(self.nfeature) / ratio
         
         cov_rand = self._hessian * dispersion
-        M1 = prod_score_prec.dot(cov_rand).dot(prod_score_prec.T)
-        M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
-        M3 = prod_score_prec
+
+        M1 = prod_score_prec 
+        M2 = M1.dot(cov_rand).dot(M1.T)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
     
         return (cond_mean,
                 cond_cov,
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index dc59f6d9e..31909ac00 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -188,9 +188,9 @@ def _setup_implied_gaussian(self,
 
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
-        M1 = prod_score_prec.dot(cov_rand).dot(prod_score_prec.T)
-        M2 = prod_score_prec.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(prod_score_prec.T)
-        M3 = prod_score_prec
+        M1 = prod_score_prec 
+        M2 = M1.dot(cov_rand).dot(M1.T)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
     
         return (cond_mean,
                 cond_cov,
@@ -1346,7 +1346,7 @@ def selective_MLE(observed_target,
                   linear_part,
                   offset,
                   opt_linear,
-                  M1,
+                  M1,   
                   M2,
                   M3,
                   observed_score,
@@ -1393,38 +1393,21 @@ def selective_MLE(observed_target,
 
     prec_opt = np.linalg.inv(cond_cov)
 
-    # regress_opt_target determines how the conditional mean of optimization variables
-    # vary with target
-    # regress_opt determines how the argument of the optimization density
-    # depends on the score, not how the mean depends on score, hence the minus sign
-
-    ## regress_score_target = cov_target_score.T.dot(prec_target)
-    ## resid_score_target = score_offset - regress_score_target.dot(observed_target)
-
-    ## regress_opt_target = regress_opt.dot(regress_score_target)
-    ## resid_mean_opt_target = cond_mean - regress_opt_target.dot(observed_target)
-
-    # M1, M2, M3 can be computed quickly (assumption) -- we can make this
-    # faster later
-    # shorthand
-    
-    # these could be done by the query at `fit` time
-
     # this is specific to target
     
     T1 = regress_target_score.T.dot(prec_target)
-    T2 = T1.T.dot(M1.dot(T1))
-    T3 = T1.T.dot(M2.dot(T1)) 
+    T2 = T1.T.dot(M2.dot(T1))
+    T3 = T1.T.dot(M3.dot(T1)) 
 
     prec_target_nosel = prec_target + T2 - T3
-    _P = T1.T.dot(M3.dot(observed_score)) - T2.dot(observed_target)
+    _P = T1.T.dot(M1.dot(observed_score)) - T2.dot(observed_target)
 
-    T4 = M3.T.dot(T1)
+    T4 = M1.T.dot(T1)
     T5 = opt_linear.T.dot(T4)
     T6 = cond_cov.dot(T5)
     T7 = opt_linear.dot(T6)
-    T8 = M3.dot(T7)
-    T9 = T8.dot(observed_target) + M3.dot(opt_linear.dot(cond_mean))
+    T8 = M1.dot(T7)
+    T9 = T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))
     T10 = T1.T.dot(T9) 
     C = cov_target.dot(T10)
 
@@ -1442,7 +1425,7 @@ def selective_MLE(observed_target,
                              offset,
                              **solve_args)
 
-    T11 = regress_target_score.dot(M3.dot(opt_linear))
+    T11 = regress_target_score.dot(M1.dot(opt_linear))
     final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
                       + T11.dot(cond_mean - soln) + C
 

From a762a480bf93755d8e5ec42f02a4d4f6ca19a37a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 18 Jul 2021 22:41:37 -0400
Subject: [PATCH 111/187] commit changes so far

---
 selectinf/randomized/exact_reference.py       |   1 +
 .../randomized/tests/test_exact_reference.py  | 115 ++++++++++++------
 2 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 018d19074..429278d1e 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -76,6 +76,7 @@ def __init__(self,
 
         self.opt_linear = query.opt_linear
         self.useIP = useIP
+        self.inverse_info = inverse_info
 
     def summary(self,
                 alternatives=None,
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 7cb49ff11..7c1a5efb4 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -4,15 +4,16 @@
 from ..lasso import lasso, selected_targets
 from ..exact_reference import exact_grid_inference
 
-def test_approx_pivot(n=500,
-                      p=100,
-                      signal_fac=1.,
-                      s=5,
-                      sigma=2.,
-                      rho=0.4,
-                      randomizer_scale=1.,
-                      equicorrelated=False,
-                      useIP=False):
+def test_inf(n=500,
+             p=100,
+             signal_fac=1.,
+             s=5,
+             sigma=2.,
+             rho=0.4,
+             randomizer_scale=1.,
+             equicorrelated=False,
+             useIP=False,
+             CI=True):
 
     while True:
 
@@ -66,37 +67,71 @@ def test_approx_pivot(n=500,
                                                   cov_target_score,
                                                   useIP=useIP)
 
-            pivot = exact_grid_inf._pivots(beta_target)
+            if CI is False:
+                pivot = exact_grid_inf._pivots(beta_target)
+                return pivot
+
+            else:
+                lci, uci = exact_grid_inf._intervals(level=0.90)
+                coverage = (lci < beta_target) * (uci > beta_target)
+                length = uci - lci
+                mle_length = 1.65*2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
+                return np.mean(coverage), np.mean(length), np.mean(mle_length)
+
+def main(nsim=300, CI = False):
+
+    if CI is False:
+
+        import matplotlib as mpl
+        mpl.use('tkagg')
+        import matplotlib.pyplot as plt
+        from statsmodels.distributions.empirical_distribution import ECDF
+
+        _pivot = []
+        for i in range(nsim):
+            _pivot.extend(test_inf(n=100,
+                                   p=400,
+                                   signal_fac=1.,
+                                   s=0,
+                                   sigma=2.,
+                                   rho=0.30,
+                                   randomizer_scale=0.7,
+                                   equicorrelated=True,
+                                   useIP=False,
+                                   CI=False))
+
+            print("iteration completed ", i)
+
+        plt.clf()
+        ecdf_pivot = ECDF(np.asarray(_pivot))
+        grid = np.linspace(0, 1, 101)
+        plt.plot(grid, ecdf_pivot(grid), c='blue')
+        plt.plot(grid, grid, 'k--')
+        plt.show()
+
+    else:
+        coverage_ = 0.
+        length_ = 0.
+        mle_length_= 0.
+        for n in range(nsim):
+            cov, len, mle_len = test_inf(n=400,
+                                         p=100,
+                                         signal_fac=0.5,
+                                         s=5,
+                                         sigma=2.,
+                                         rho=0.30,
+                                         randomizer_scale=0.7,
+                                         equicorrelated=True,
+                                         useIP=False,
+                                         CI=True)
+
+            coverage_ += cov
+            length_ += len
+            mle_length_ += mle_len
+            print("coverage so far ", coverage_ / (n + 1.))
+            print("lengths so far ", length_ / (n + 1.), mle_length_/ (n + 1.))
+            print("iteration completed ", n + 1)
 
-            return pivot
-
-def main(nsim=300):
-
-    import matplotlib as mpl
-    mpl.use('tkagg')
-    import matplotlib.pyplot as plt
-    from statsmodels.distributions.empirical_distribution import ECDF
-
-    _pivot = []
-    for i in range(nsim):
-        _pivot.extend(test_approx_pivot(n=100,
-                                        p=400,
-                                        signal_fac=1.,
-                                        s=0,
-                                        sigma=2.,
-                                        rho=0.30,
-                                        randomizer_scale=0.7,
-                                        equicorrelated=True,
-                                        useIP=False))
-
-        print("iteration completed ", i)
-
-    plt.clf()
-    ecdf_pivot = ECDF(np.asarray(_pivot))
-    grid = np.linspace(0, 1, 101)
-    plt.plot(grid, ecdf_pivot(grid), c='blue')
-    plt.plot(grid, grid, 'k--')
-    plt.show()
 
 if __name__ == "__main__":
-    main(nsim=100)
\ No newline at end of file
+    main(nsim=100, CI=True)
\ No newline at end of file

From 7856e7ac19db87b6d24e8415fa149299472e7af4 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 19 Jul 2021 10:42:19 -0400
Subject: [PATCH 112/187] commit before switch

---
 selectinf/randomized/lasso.py                  | 2 +-
 selectinf/randomized/tests/test_split_lasso.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index d1fa9bf9e..53d5ff1fc 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -942,7 +942,7 @@ def _setup_implied_gaussian(self,
         
         cov_rand = self._hessian * dispersion
 
-        M1 = prod_score_prec 
+        M1 = prod_score_prec
         M2 = M1.dot(cov_rand).dot(M1.T)
         M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
     
diff --git a/selectinf/randomized/tests/test_split_lasso.py b/selectinf/randomized/tests/test_split_lasso.py
index 78df932a9..f994c05cc 100644
--- a/selectinf/randomized/tests/test_split_lasso.py
+++ b/selectinf/randomized/tests/test_split_lasso.py
@@ -163,4 +163,3 @@ def main(nsim=500, n=100, p=200, target='selected', sigma=3, s=3):
             plt.savefig("plot.pdf")
     plt.show()
 
-

From 73137f81f68d1770d1ac2fc4954cc91dd40ab1bc Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 19 Jul 2021 16:25:31 -0400
Subject: [PATCH 113/187] some sign fixes

---
 selectinf/randomized/query.py                 |   8 +-
 .../tests/test_selective_MLE_high.py          | 586 ++----------------
 2 files changed, 67 insertions(+), 527 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 31909ac00..fa4560ffb 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1400,16 +1400,18 @@ def selective_MLE(observed_target,
     T3 = T1.T.dot(M3.dot(T1)) 
 
     prec_target_nosel = prec_target + T2 - T3
-    _P = T1.T.dot(M1.dot(observed_score)) - T2.dot(observed_target)
+    _P = -(T1.T.dot(M1.dot(observed_score)) + T2.dot(observed_target)) ##flipped sign of second term here
 
     T4 = M1.T.dot(T1)
     T5 = opt_linear.T.dot(T4)
     T6 = cond_cov.dot(T5)
     T7 = opt_linear.dot(T6)
     T8 = M1.dot(T7)
-    T9 = T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))
+    T9 = (-T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean)))
+    #T9 = M1.dot(opt_linear.dot(cond_mean))
     T10 = T1.T.dot(T9) 
-    C = cov_target.dot(T10)
+    C = cov_target.dot(_P - T10)
+    print("check within MLE ", np.allclose(T2 - T3, np.zeros((T2.shape[0], T2.shape[1]))), np.allclose((_P-T10), np.zeros(T10.shape[0])))
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index da592da87..1df9b2930 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -24,7 +24,7 @@ def test_full_targets(n=200,
                       randomizer_scale=0.7,
                       full_dispersion=False):
     """
-    Run approx MLE with full targets on Gaussian data
+    Compare to R randomized lasso
     """
 
     inst, const = gaussian_instance, lasso.gaussian
@@ -86,30 +86,31 @@ def test_full_targets(n=200,
                                         cov_target_score)[0]
             pval = result['pvalue']
             estimate = result['MLE']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             print("estimate, intervals", estimate, intervals)
 
             coverage = (beta[nonzero] > intervals[:, 0]) * (beta[nonzero] < intervals[:, 1])
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
-def test_selected_targets(n=2000, 
-                          p=200, 
-                          signal_fac=10.,
-                          s=5, 
-                          sigma=3, 
-                          rho=0.4, 
-                          randomizer_scale=1,
+
+def test_selected_targets(seedn,
+                          n=2000,
+                          p=200,
+                          signal_fac=1.2,
+                          s=5,
+                          sigma=2,
+                          rho=0.7,
+                          randomizer_scale=1.,
                           full_dispersion=True):
     """
-    Run approx MLE with selected targets on Gaussian data
+    Compare to R randomized lasso
     """
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
     while True:
+        np.random.seed(seed=seedn)
         X, Y, beta = inst(n=n,
                           p=p,
                           signal=signal,
@@ -156,522 +157,17 @@ def test_selected_targets(n=2000,
                                         cov_target_score)[0]
             estimate = result['MLE']
             pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
 
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
-
-def test_logistic(n=2000, 
-                  p=200, 
-                  signal_fac=10.,
-                  s=5, 
-                  rho=0.4, 
-                  randomizer_scale=1):
-    """
-    Run approx MLE with selected targets on binomial data
-    """
-
-    inst, const = logistic_instance, lasso.logistic
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    while True:
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=False,
-                          rho=rho,
-                          random_signs=True)[:3]
-
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-        conv = const(X,
-                     Y,
-                     W,
-                     randomizer_scale=randomizer_scale * sigma_)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
-            pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-
-def test_logistic_split(n=2000, 
-                        p=200, 
-                        signal_fac=10.,
-                        s=5, 
-                        rho=0.4, 
-                        randomizer_scale=1):
-    """
-    Run approx MLE with selected targets on binomial data with data splitting
-    """
-
-    inst, const = logistic_instance, split_lasso.logistic
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    while True:
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=False,
-                          rho=rho,
-                          random_signs=True)[:3]
-
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-        conv = const(X,
-                     Y,
-                     W,
-                     proportion=0.7)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
-            pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-        
-def test_poisson(n=2000, 
-                 p=200, 
-                 signal_fac=10.,
-                 s=5, 
-                 rho=0.4, 
-                 randomizer_scale=1):
-    """
-    Run approx MLE with selected targets on Poisson data 
-    """
-
-    inst, const = poisson_instance, lasso.poisson
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    while True:
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=False,
-                          rho=rho,
-                          random_signs=True)[:3]
-
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-        conv = const(X,
-                     Y,
-                     W,
-                     randomizer_scale=randomizer_scale * sigma_)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
-            pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-
-def test_poisson_split(n=2000, 
-                       p=200, 
-                       signal_fac=10.,
-                       s=5, 
-                       rho=0.4, 
-                       randomizer_scale=1):
-    """
-    Run approx MLE with selected targets on Poisson data with data splitting
-    """
-
-    inst, const = poisson_instance, split_lasso.poisson
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    while True:
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=False,
-                          rho=rho,
-                          random_signs=True)[:3]
-
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-        conv = const(X,
-                     Y,
-                     W,
-                     proportion=0.7)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
-            pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-
-def test_cox(n=2000, 
-             p=200, 
-             signal_fac=10.,
-             s=5, 
-             rho=0.4, 
-             randomizer_scale=1):
-    """
-    Run approx MLE with selected targets on survival data 
-    """
-
-    inst, const = cox_instance, lasso.coxph
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    while True:
-        X, T, S, beta = inst(n=n,
-                             p=p,
-                             signal=signal,
-                             s=s,
-                             equicorrelated=False,
-                             rho=rho,
-                             random_signs=True)[:4]
-
-        n, p = X.shape
-
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) 
-
-        conv = const(X,
-                     T,
-                     S,
-                     W,
-                     randomizer_scale=randomizer_scale)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-
-            cox_full = rr.glm.cox(X, T, S)
-            full_hess = cox_full.hessian(conv.observed_soln)
-
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             alternatives) = selected_targets(conv.loglike, 
-                                              None,
-                                              nonzero,
-                                              hessian=full_hess,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
-            pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-
-def test_cox_split(n=2000, 
-                   p=200, 
-                   signal_fac=10.,
-                   s=5, 
-                   rho=0.4, 
-                   randomizer_scale=1):
-    """
-    Run approx MLE with selected targets on survival data with data splitting
-    """
-
-    inst, const = cox_instance, split_lasso.coxph
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    while True:
-        X, T, S, beta = inst(n=n,
-                             p=p,
-                             signal=signal,
-                             s=s,
-                             equicorrelated=False,
-                             rho=rho,
-                             random_signs=True)[:4]
-
-        n, p = X.shape
-
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p))
-
-        conv = const(X,
-                     T,
-                     S,
-                     W,
-                     proportion=0.7)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-
-            cox_full = rr.glm.cox(X, T, S)
-            full_hess = cox_full.hessian(conv.observed_soln)
 
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             alternatives) = selected_targets(conv.loglike, 
-                                              None,
-                                              nonzero,
-                                              hessian=full_hess,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
-            pval = result['pvalue']
-            intervals = np.asarray(result[['lower_confidence',
-                                           'upper_confidence']])
-            
-            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-        
-def test_scale_invariant_split(n=200, 
-                               p=20, 
-                               signal_fac=10.,
-                               s=5, 
-                               sigma=3, 
-                               rho=0.4, 
-                               randomizer_scale=1,
-                               full_dispersion=True,
-                               seed=2):
-    """
-    Confirm Gaussian version is appropriately scale invariant with data splitting
-    """
-
-    inst, const = gaussian_instance, split_lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    results = []
-
-    scales = [1, 5]
-    for scale in scales:
-
-        np.random.seed(seed)
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=False,
-                          rho=rho,
-                          sigma=sigma,
-                          random_signs=True)[:3]
-
-        Y *= scale; beta *= scale
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-        print('W', W[0]/scale)
-        conv = const(X,
-                     Y,
-                     W,
-                     proportion=0.7)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print('nonzero', np.where(nonzero)[0])
-        print('feature_weights', conv.feature_weights[0] / scale)
-        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero, 
-                                          dispersion=dispersion)
-
-        print('dispersion', dispersion/scale**2)
-        print('target', observed_target[0]/scale)
-        print('cov_target', cov_target[0,0]/scale**2)
-        print('cov_target_score',  cov_target_score[0,0]/scale**2)
-        
-        result = conv.selective_MLE(observed_target,
-                                    cov_target,
-                                    cov_target_score)[0]
-
-        print(result['MLE'] / scale)
-        results.append(result)
-
-    assert np.allclose(results[0]['MLE'] / scales[0],
-                       results[1]['MLE'] / scales[1])
-    assert np.allclose(results[0]['SE'] / scales[0],
-                       results[1]['SE'] / scales[1])
-    assert np.allclose(results[0]['upper_confidence'] / scales[0],
-                       results[1]['upper_confidence'] / scales[1])
-    assert np.allclose(results[0]['lower_confidence'] / scales[0],
-                       results[1]['lower_confidence'] / scales[1])
-    assert np.allclose(results[0]['Zvalue'],
-                       results[1]['Zvalue'])
-    assert np.allclose(results[0]['pvalue'],
-                       results[1]['pvalue'])
-
-def test_scale_invariant(n=200, 
-                         p=20, 
-                         signal_fac=10.,
-                         s=5, 
-                         sigma=3, 
-                         rho=0.4, 
-                         randomizer_scale=1,
-                         full_dispersion=True,
-                         seed=2):
-    """
-    Confirm Gaussian version is appropriately scale invariant
-    """
-
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    results = []
-
-    scales = [1, 5]
-    for scale in scales:
-
-        np.random.seed(seed)
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=False,
-                          rho=rho,
-                          sigma=sigma,
-                          random_signs=True)[:3]
+            # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
-        Y *= scale; beta *= scale
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-        print('W', W[0]/scale)
-        conv = const(X,
-                     Y,
-                     W,
-                     randomizer_scale=randomizer_scale * sigma_)
+            #return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
+            return result['MLE'], result['lower_confidence'], result['upper_confidence']
 
-        signs = conv.fit()
-        nonzero = signs != 0
-        print('nonzero', np.where(nonzero)[0])
-        print('feature_weights', conv.feature_weights[0] / scale)
-        print('perturb', conv._initial_omega[0] / scale)
-        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero, 
-                                          dispersion=dispersion)
-
-        print('dispersion', dispersion/scale**2)
-        print('target', observed_target[0]/scale)
-        print('cov_target', cov_target[0,0]/scale**2)
-        print('cov_target_score',  cov_target_score[0,0]/scale**2)
-        
-        result = conv.selective_MLE(observed_target,
-                                    cov_target,
-                                    cov_target_score)[0]
-
-        print(result['MLE'] / scale)
-        results.append(result)
-
-    assert np.allclose(results[0]['MLE'] / scales[0],
-                       results[1]['MLE'] / scales[1])
-    assert np.allclose(results[0]['SE'] / scales[0],
-                       results[1]['SE'] / scales[1])
-    assert np.allclose(results[0]['upper_confidence'] / scales[0],
-                       results[1]['upper_confidence'] / scales[1])
-    assert np.allclose(results[0]['lower_confidence'] / scales[0],
-                       results[1]['lower_confidence'] / scales[1])
-    assert np.allclose(results[0]['Zvalue'],
-                       results[1]['Zvalue'])
-    assert np.allclose(results[0]['pvalue'],
-                       results[1]['pvalue'])
-    
 
 def test_instance():
     n, p, s = 500, 100, 5
@@ -705,8 +201,7 @@ def test_instance():
                              cov_target_score)[0]
     estimate = result['MLE']
     pval = result['pvalue']
-    intervals = np.asarray(result[['lower_confidence',
-                                   'upper_confidence']])
+    intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
 
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
 
@@ -716,6 +211,17 @@ def test_instance():
 
     return coverage
 
+
+# def main(nsim=500):
+#
+#     cover = []
+#     for i in range(nsim):
+#
+#         cover_ = test_instance()
+#         cover.extend(cover_)
+#         print(np.mean(cover), 'coverage so far ')
+
+
 def test_selected_targets_disperse(n=500,
                                    p=100,
                                    signal_fac=1.,
@@ -786,7 +292,7 @@ def test_selected_targets_disperse(n=500,
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
-def main(nsim=500, full=False):
+def test_inf(nsim=500, full=False):
     P0, PA, cover, length_int = [], [], [], []
     from statsmodels.distributions import ECDF
 
@@ -808,9 +314,41 @@ def main(nsim=500, full=False):
         cover.extend(cover_)
         P0.extend(p0)
         PA.extend(pA)
+        # print(
+        #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+        #     np.mean(avg_length), 'null pvalue + power + length')
         print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 
 
+def main(nsim =50):
+
+    import pandas as pd
+    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf"]
+    master_DF = pd.DataFrame(columns=column_names)
+    DF = pd.DataFrame(columns=column_names)
+
+    n, p, s = 500, 100, 5
+    for i in range(nsim):
+        full_dispersion = True
+        mle, lower_conf, upper_conf = test_selected_targets(n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion, seedn=i)
+        #print("check ", mle, lower_conf, upper_conf)
+        DF["MLE"] = pd.Series(mle)
+        DF["Lower Conf"] = pd.Series(lower_conf)
+        DF["Upper Conf"] = pd.Series(upper_conf)
+        DF["Experiment Replicate"] = pd.Series((i*np.ones(len(mle),int)).tolist())
+
+        master_DF = DF.append(master_DF, ignore_index=True)
+
+    import os
+    outpath = os.path.dirname(__file__)
+
+    outfile_mse_html = os.path.join(outpath, "compare_mle.html")
+    outfile_mse_csv = os.path.join(outpath, "compare_mle.csv")
+
+    master_DF.to_html(outfile_mse_html, index=False)
+    master_DF.to_csv(outfile_mse_csv, index=False)
+
 if __name__ == "__main__":
     main(nsim=50)
 
+

From 5449179e1605071abd145e3c3a763139fb59e806 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 19 Jul 2021 23:19:05 -0400
Subject: [PATCH 114/187] commit before switch

---
 selectinf/randomized/query.py                 |  6 +-
 .../tests/test_selective_MLE_high.py          | 66 ++++++++++---------
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index fa4560ffb..8afe3e5aa 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1408,10 +1408,9 @@ def selective_MLE(observed_target,
     T7 = opt_linear.dot(T6)
     T8 = M1.dot(T7)
     T9 = (-T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean)))
-    #T9 = M1.dot(opt_linear.dot(cond_mean))
     T10 = T1.T.dot(T9) 
     C = cov_target.dot(_P - T10)
-    print("check within MLE ", np.allclose(T2 - T3, np.zeros((T2.shape[0], T2.shape[1]))), np.allclose((_P-T10), np.zeros(T10.shape[0])))
+    print("check within MLE ", np.allclose(T2 - T3, np.zeros((T2.shape[0], T2.shape[1]))), np.allclose(C, np.zeros(C.shape[0])))
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
@@ -1463,7 +1462,8 @@ def selective_MLE(observed_target,
                            'lower_confidence': intervals[:, 0],
                            'upper_confidence': intervals[:, 1],
                            'unbiased': unbiased_estimator})
-    return result, observed_info_mean, log_ref
+    return result, observed_info_mean, log_ref,\
+           T11[:,0], cond_mean - soln, cov_target.dot(prec_target_nosel).dot(observed_target),  C
 
 
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 1df9b2930..143ac5c1a 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -152,9 +152,9 @@ def test_selected_targets(seedn,
                                               nonzero,
                                               dispersion=dispersion)
 
-            result = conv.selective_MLE(observed_target,
+            result, _, _, X1, X2, X3, X4 = conv.selective_MLE(observed_target,
                                         cov_target,
-                                        cov_target_score)[0]
+                                        cov_target_score)
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -166,7 +166,7 @@ def test_selected_targets(seedn,
             # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
             #return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
-            return result['MLE'], result['lower_confidence'], result['upper_confidence']
+            return result['MLE'], result['lower_confidence'], result['upper_confidence'], X1, X2, X3, X4
 
 
 def test_instance():
@@ -292,50 +292,54 @@ def test_selected_targets_disperse(n=500,
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
-def test_inf(nsim=500, full=False):
-    P0, PA, cover, length_int = [], [], [], []
-    from statsmodels.distributions import ECDF
-
-    n, p, s = 500, 100, 0
-
-    for i in range(nsim):
-        if full:
-            if n > p:
-                full_dispersion = True
-            else:
-                full_dispersion = False
-            p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-            avg_length = intervals[:, 1] - intervals[:, 0]
-        else:
-            full_dispersion = True
-            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-            avg_length = intervals[:, 1] - intervals[:, 0]
-
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        # print(
-        #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
-        #     np.mean(avg_length), 'null pvalue + power + length')
-        print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
+# def main(nsim=500, full=False):
+#     P0, PA, cover, length_int = [], [], [], []
+#     from statsmodels.distributions import ECDF
+#
+#     n, p, s = 500, 100, 0
+#
+#     for i in range(nsim):
+#         if full:
+#             if n > p:
+#                 full_dispersion = True
+#             else:
+#                 full_dispersion = False
+#             p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+#             avg_length = intervals[:, 1] - intervals[:, 0]
+#         else:
+#             full_dispersion = True
+#             p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+#             avg_length = intervals[:, 1] - intervals[:, 0]
+#
+#         cover.extend(cover_)
+#         P0.extend(p0)
+#         PA.extend(pA)
+#         # print(
+#         #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+#         #     np.mean(avg_length), 'null pvalue + power + length')
+#         print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 
 
 def main(nsim =50):
 
     import pandas as pd
-    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf"]
+    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf", "X1", "X2", "X3", "X4"]
     master_DF = pd.DataFrame(columns=column_names)
     DF = pd.DataFrame(columns=column_names)
 
     n, p, s = 500, 100, 5
     for i in range(nsim):
         full_dispersion = True
-        mle, lower_conf, upper_conf = test_selected_targets(n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion, seedn=i)
+        mle, lower_conf, upper_conf, X1, X2, X3, X4 = test_selected_targets(seedn=i, n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion)
         #print("check ", mle, lower_conf, upper_conf)
         DF["MLE"] = pd.Series(mle)
         DF["Lower Conf"] = pd.Series(lower_conf)
         DF["Upper Conf"] = pd.Series(upper_conf)
         DF["Experiment Replicate"] = pd.Series((i*np.ones(len(mle),int)).tolist())
+        DF["X1"] = pd.Series(X1)
+        DF["X2"] = pd.Series(X2)
+        DF["X3"] = pd.Series(X3)
+        DF["X4"] = pd.Series(X4)
 
         master_DF = DF.append(master_DF, ignore_index=True)
 

From cce962e70bf406b156e55b8d3048aed8b51bc185 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 20 Jul 2021 09:00:22 -0400
Subject: [PATCH 115/187] regress_target_score scaled by dispersion

---
 selectinf/randomized/lasso.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 53d5ff1fc..bbb9cb7c2 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -738,7 +738,7 @@ def selected_targets(loglike,
 
     regress_target_score = np.zeros((cov_target.shape[0], p))
     regress_target_score[:,features] = cov_target
-    return observed_target, cov_target * dispersion, regress_target_score, alternatives
+    return observed_target, cov_target * dispersion, regress_target_score * dispersion, alternatives
 
 def full_targets(loglike, 
                  W, 

From cb73217c7d9a876909abe9865e481c46e3659c0e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 20 Jul 2021 09:36:23 -0400
Subject: [PATCH 116/187] changes to selective mle + target: added comments

---
 selectinf/randomized/lasso.py                 |  2 +-
 selectinf/randomized/query.py                 | 11 ++-
 .../tests/test_selective_MLE_high.py          | 87 +++++++++++++++----
 3 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index bbb9cb7c2..f5b27936e 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -737,7 +737,7 @@ def selected_targets(loglike,
             Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1])
 
     regress_target_score = np.zeros((cov_target.shape[0], p))
-    regress_target_score[:,features] = cov_target
+    regress_target_score[:,features] = cov_target ##scale by dispersion while returning the value
     return observed_target, cov_target * dispersion, regress_target_score * dispersion, alternatives
 
 def full_targets(loglike, 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 8afe3e5aa..43306aa1c 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -166,7 +166,7 @@ def _setup_implied_gaussian(self,
                                 dispersion=1):
 
         cov_rand, prec = self.randomizer.cov_prec
-        prec = prec / dispersion # why do we do this here -- prec is just known
+        prec = prec
 
         if np.asarray(prec).shape in [(), (0,)]:
             _prod_score_prec_unnorm = self._hessian * prec
@@ -1407,10 +1407,9 @@ def selective_MLE(observed_target,
     T6 = cond_cov.dot(T5)
     T7 = opt_linear.dot(T6)
     T8 = M1.dot(T7)
-    T9 = (-T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean)))
+    T9 = (-T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) ##flipped sign of first term here
     T10 = T1.T.dot(T9) 
-    C = cov_target.dot(_P - T10)
-    print("check within MLE ", np.allclose(T2 - T3, np.zeros((T2.shape[0], T2.shape[1]))), np.allclose(C, np.zeros(C.shape[0])))
+    C = cov_target.dot(_P - T10) ##added missing _P in computing C
 
     conjugate_arg = prec_opt.dot(cond_mean)
 
@@ -1462,8 +1461,8 @@ def selective_MLE(observed_target,
                            'lower_confidence': intervals[:, 0],
                            'upper_confidence': intervals[:, 1],
                            'unbiased': unbiased_estimator})
-    return result, observed_info_mean, log_ref,\
-           T11[:,0], cond_mean - soln, cov_target.dot(prec_target_nosel).dot(observed_target),  C
+
+    return result, observed_info_mean, log_ref
 
 
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 143ac5c1a..b53340b9c 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -93,8 +93,7 @@ def test_full_targets(n=200,
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
-def test_selected_targets(seedn,
-                          n=2000,
+def test_selected_targets(n=2000,
                           p=200,
                           signal_fac=1.2,
                           s=5,
@@ -110,7 +109,6 @@ def test_selected_targets(seedn,
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
     while True:
-        np.random.seed(seed=seedn)
         X, Y, beta = inst(n=n,
                           p=p,
                           signal=signal,
@@ -152,9 +150,9 @@ def test_selected_targets(seedn,
                                               nonzero,
                                               dispersion=dispersion)
 
-            result, _, _, X1, X2, X3, X4 = conv.selective_MLE(observed_target,
+            result = conv.selective_MLE(observed_target,
                                         cov_target,
-                                        cov_target_score)
+                                        cov_target_score)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -163,10 +161,7 @@ def test_selected_targets(seedn,
 
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
 
-            # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
-
-            #return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
-            return result['MLE'], result['lower_confidence'], result['upper_confidence'], X1, X2, X3, X4
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
 def test_instance():
@@ -320,26 +315,86 @@ def test_selected_targets_disperse(n=500,
 #         print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 
 
+def test_selected_instance(seedn,
+                           n=2000,
+                           p=200,
+                           signal_fac=1.2,
+                           s=5,
+                           sigma=2,
+                           rho=0.7,
+                           randomizer_scale=1.,
+                           full_dispersion=True):
+    """
+    Compare to R randomized lasso
+    """
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        np.random.seed(seed=seedn)
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=True,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        idx = np.arange(p)
+        sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
+        print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     ridge_term=0.,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+            dispersion = None
+            if full_dispersion:
+                dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=dispersion)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+
+            return result['MLE'], result['lower_confidence'], result['upper_confidence']
+
 def main(nsim =50):
 
     import pandas as pd
-    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf", "X1", "X2", "X3", "X4"]
+    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf"]
     master_DF = pd.DataFrame(columns=column_names)
     DF = pd.DataFrame(columns=column_names)
 
     n, p, s = 500, 100, 5
     for i in range(nsim):
         full_dispersion = True
-        mle, lower_conf, upper_conf, X1, X2, X3, X4 = test_selected_targets(seedn=i, n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion)
-        #print("check ", mle, lower_conf, upper_conf)
+        mle, lower_conf, upper_conf = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion)
         DF["MLE"] = pd.Series(mle)
         DF["Lower Conf"] = pd.Series(lower_conf)
         DF["Upper Conf"] = pd.Series(upper_conf)
         DF["Experiment Replicate"] = pd.Series((i*np.ones(len(mle),int)).tolist())
-        DF["X1"] = pd.Series(X1)
-        DF["X2"] = pd.Series(X2)
-        DF["X3"] = pd.Series(X3)
-        DF["X4"] = pd.Series(X4)
 
         master_DF = DF.append(master_DF, ignore_index=True)
 

From e99d75eb185b58d741e9f61d23f510858c878a7b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 25 Jul 2021 16:13:30 -0400
Subject: [PATCH 117/187] updated query: moved calculations for M1, M2, M3

---
 selectinf/randomized/lasso.py                 |  8 +--
 selectinf/randomized/query.py                 | 71 +++++++++----------
 .../tests/test_selective_MLE_high.py          | 51 +++++++++----
 3 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index f5b27936e..9917d229c 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -737,8 +737,8 @@ def selected_targets(loglike,
             Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1])
 
     regress_target_score = np.zeros((cov_target.shape[0], p))
-    regress_target_score[:,features] = cov_target ##scale by dispersion while returning the value
-    return observed_target, cov_target * dispersion, regress_target_score * dispersion, alternatives
+    regress_target_score[:,features] = cov_target
+    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
 
 def full_targets(loglike, 
                  W, 
@@ -774,7 +774,7 @@ def full_targets(loglike,
 
     alternatives = ['twosided'] * features.sum()
     regress_target_score = Qfull_inv[features] # weights missing?
-    return observed_target, cov_target * dispersion, regress_target_score, alternatives
+    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
 
 def debiased_targets(loglike, 
                      W, 
@@ -829,7 +829,7 @@ def debiased_targets(loglike,
                       (n - features.sum()))
 
     alternatives = ['twosided'] * features.sum()
-    return observed_target, cov_target * dispersion, Qinv_hat, alternatives
+    return observed_target, cov_target * dispersion, Qinv_hat, dispersion, alternatives
 
 def form_targets(target, 
                  loglike, 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 43306aa1c..e17be21a8 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -106,24 +106,22 @@ def _setup_sampler(self,
                        linear_part,
                        offset,
                        opt_linear,
-                       observed_subgrad,
-                       # optional dispersion parameter
-                       # for covariance of randomization
-                       dispersion=1):
+                       observed_subgrad):
 
         A, b = linear_part, offset
         if not np.all(A.dot(self.observed_opt_state) - b <= 0):
             raise ValueError('constraints not satisfied')
 
+        cov_rand, prec = self.randomizer.cov_prec
+
         (cond_mean,
          cond_cov,
          cond_precision,
          regress_opt,
-         M1,
-         M2,
-         M3) = self._setup_implied_gaussian(opt_linear,
-                                            observed_subgrad,
-                                            dispersion)
+         prod_score_prec_unnorm) = self._setup_implied_gaussian(opt_linear,
+                                                                observed_subgrad,
+                                                                cov_rand,
+                                                                prec)
 
         def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
             if score.ndim == 1:
@@ -149,31 +147,25 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
                                                self.observed_opt_state,
                                                self.observed_score_state,
                                                log_density,
-                                               regress_opt, # not needed?
+                                               regress_opt,
                                                observed_subgrad,
                                                opt_linear,      # L
-                                               M1,
-                                               M2,
-                                               M3,
+                                               prod_score_prec_unnorm,
+                                               cov_rand,
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
 
     def _setup_implied_gaussian(self,
                                 opt_linear,
                                 observed_subgrad,
-                                # optional dispersion parameter
-                                # for covariance of randomization
-                                dispersion=1):
+                                cov_rand,
+                                prec):
 
-        cov_rand, prec = self.randomizer.cov_prec
-        prec = prec
 
         if np.asarray(prec).shape in [(), (0,)]:
-            _prod_score_prec_unnorm = self._hessian * prec
+            prod_score_prec_unnorm = self._hessian * prec
         else:
-            _prod_score_prec_unnorm = self._hessian.dot(prec)
-
-        prod_score_prec = _prod_score_prec_unnorm * dispersion
+            prod_score_prec_unnorm = self._hessian.dot(prec)
         
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = opt_linear.T.dot(opt_linear) * prec
@@ -187,23 +179,18 @@ def _setup_implied_gaussian(self,
         # regress_opt is regression coefficient of opt onto score + u...
 
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
-
-        M1 = prod_score_prec 
-        M2 = M1.dot(cov_rand).dot(M1.T)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
     
         return (cond_mean,
                 cond_cov,
                 cond_precision,
                 regress_opt,
-                M1,
-                M2,
-                M3)
+                prod_score_prec_unnorm)
 
     def summary(self,
                 observed_target,
                 cov_target,
                 regress_target_score,
+                dispersion,
                 alternatives,
                 opt_sample=None,
                 target_sample=None,
@@ -303,6 +290,7 @@ def selective_MLE(self,
                       observed_target,
                       cov_target,
                       regress_target_score,
+                      dispersion,
                       level=0.9,
                       solve_args={'tol': 1.e-12}):
         """
@@ -323,6 +311,7 @@ def selective_MLE(self,
         return self.sampler.selective_MLE(observed_target,
                                           cov_target,
                                           regress_target_score,
+                                          dispersion,
                                           self.observed_opt_state,
                                           level=level,
                                           solve_args=solve_args)
@@ -858,10 +847,9 @@ def __init__(self,
                  log_cond_density,
                  regress_opt,
                  observed_subgrad,
-                 opt_linear,      
-                 M1,
-                 M2,
-                 M3,
+                 opt_linear,
+                 prod_score_prec_unnorm,
+                 cov_rand,
                  selection_info=None,
                  useC=False):
 
@@ -905,7 +893,9 @@ def __init__(self,
         self.observed_subgrad = observed_subgrad
         self.useC = useC
         self.opt_linear = opt_linear
-        self.M1, self.M2, self.M3 = M1, M2, M3
+
+        self.prod_score_prec_unnorm = prod_score_prec_unnorm
+        self.cov_rand = cov_rand
 
     def log_cond_density(self,
                          opt_sample,
@@ -954,6 +944,7 @@ def selective_MLE(self,
                       observed_target,
                       cov_target,
                       regress_target_score,
+                      dispersion,
                       # initial (observed) value of optimization variables --
                       # used as a feasible point.
                       # precise value used only for independent estimator
@@ -979,6 +970,11 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
+        prod_score_prec = self.prod_score_prec_unnorm * dispersion
+        M1 = prod_score_prec
+        M2 = M1.dot(self.cov_rand).dot(M1.T)
+        M3 = M1.dot(self.opt_linear.dot(self.covariance).dot(self.opt_linear.T)).dot(M1.T)
+
         return selective_MLE(observed_target,
                              cov_target,
                              regress_target_score,
@@ -988,9 +984,9 @@ def selective_MLE(self,
                              self.affine_con.linear_part,
                              self.affine_con.offset,
                              self.opt_linear,
-                             self.M1,
-                             self.M2,
-                             self.M3,
+                             M1,
+                             M2,
+                             M3,
                              self.observed_score_state + self.observed_subgrad,
                              solve_args=solve_args,
                              level=level,
@@ -1410,7 +1406,6 @@ def selective_MLE(observed_target,
     T9 = (-T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) ##flipped sign of first term here
     T10 = T1.T.dot(T9) 
     C = cov_target.dot(_P - T10) ##added missing _P in computing C
-
     conjugate_arg = prec_opt.dot(cond_mean)
 
     if useC:
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index b53340b9c..cd8cc81ab 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -315,15 +315,16 @@ def test_selected_targets_disperse(n=500,
 #         print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 
 
-def test_selected_instance(seedn,
-                           n=2000,
-                           p=200,
-                           signal_fac=1.2,
-                           s=5,
-                           sigma=2,
-                           rho=0.7,
-                           randomizer_scale=1.,
-                           full_dispersion=True):
+def test_mle_inference(seedn,
+                       n=2000,
+                       p=200,
+                       signal_fac=1.2,
+                       s=5,
+                       sigma=2,
+                       rho=0.7,
+                       randomizer_scale=1.,
+                       full_dispersion=True,
+                       full=False):
     """
     Compare to R randomized lasso
     """
@@ -366,17 +367,30 @@ def test_selected_instance(seedn,
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             alternatives) = selected_targets(conv.loglike,
+            if full:
+                (observed_target,
+                 cov_target,
+                 cov_target_score,
+                 dispersion,
+                 alternatives) = full_targets(conv.loglike,
                                               conv._W,
                                               nonzero,
                                               dispersion=dispersion)
 
+            else:
+                (observed_target,
+                 cov_target,
+                 cov_target_score,
+                 dispersion,
+                 alternatives) = selected_targets(conv.loglike,
+                                                  conv._W,
+                                                  nonzero,
+                                                  dispersion=dispersion)
+
             result = conv.selective_MLE(observed_target,
                                         cov_target,
-                                        cov_target_score)[0]
+                                        cov_target_score,
+                                        dispersion)[0]
 
             return result['MLE'], result['lower_confidence'], result['upper_confidence']
 
@@ -390,7 +404,14 @@ def main(nsim =50):
     n, p, s = 500, 100, 5
     for i in range(nsim):
         full_dispersion = True
-        mle, lower_conf, upper_conf = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2, full_dispersion=full_dispersion)
+        mle, lower_conf, upper_conf = test_mle_inference(seedn=i,
+                                                         n=n,
+                                                         p=p,
+                                                         s=s,
+                                                         signal_fac=1.2,
+                                                         full_dispersion=full_dispersion,
+                                                         full=True)
+
         DF["MLE"] = pd.Series(mle)
         DF["Lower Conf"] = pd.Series(lower_conf)
         DF["Upper Conf"] = pd.Series(upper_conf)

From 071a143d5f5d6a2dfa678768b3e28a123b7f7cae Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 25 Jul 2021 16:26:55 -0400
Subject: [PATCH 118/187] commit before switch

---
 selectinf/randomized/tests/test_selective_MLE_high.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index cd8cc81ab..5589212d8 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -355,7 +355,7 @@ def test_mle_inference(seedn,
         conv = const(X,
                      Y,
                      W,
-                     ridge_term=0.,
+                     #ridge_term=0.,
                      randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
@@ -410,7 +410,7 @@ def main(nsim =50):
                                                          s=s,
                                                          signal_fac=1.2,
                                                          full_dispersion=full_dispersion,
-                                                         full=True)
+                                                         full=False)
 
         DF["MLE"] = pd.Series(mle)
         DF["Lower Conf"] = pd.Series(lower_conf)

From 66a388e8735a4591c76812d0d77e7a07717d790a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 25 Jul 2021 16:45:29 -0400
Subject: [PATCH 119/187] commit before switch

---
 selectinf/randomized/tests/test_selective_MLE_high.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 5589212d8..e60e7feb7 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -345,6 +345,7 @@ def test_mle_inference(seedn,
 
         idx = np.arange(p)
         sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
+        snr = beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n)
         print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))
 
         n, p = X.shape
@@ -392,19 +393,19 @@ def test_mle_inference(seedn,
                                         cov_target_score,
                                         dispersion)[0]
 
-            return result['MLE'], result['lower_confidence'], result['upper_confidence']
+            return result['MLE'], result['lower_confidence'], result['upper_confidence'], snr
 
 def main(nsim =50):
 
     import pandas as pd
-    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf"]
+    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf", "SNR"]
     master_DF = pd.DataFrame(columns=column_names)
     DF = pd.DataFrame(columns=column_names)
 
     n, p, s = 500, 100, 5
     for i in range(nsim):
         full_dispersion = True
-        mle, lower_conf, upper_conf = test_mle_inference(seedn=i,
+        mle, lower_conf, upper_conf, snr = test_mle_inference(seedn=i,
                                                          n=n,
                                                          p=p,
                                                          s=s,
@@ -416,6 +417,7 @@ def main(nsim =50):
         DF["Lower Conf"] = pd.Series(lower_conf)
         DF["Upper Conf"] = pd.Series(upper_conf)
         DF["Experiment Replicate"] = pd.Series((i*np.ones(len(mle),int)).tolist())
+        DF["SNR"] = pd.Series((snr * np.ones(len(mle))).tolist())
 
         master_DF = DF.append(master_DF, ignore_index=True)
 

From 197e927202ebb72174238930793f108f545d59fc Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 26 Jul 2021 11:45:51 -0400
Subject: [PATCH 120/187] update posterior inf

---
 selectinf/randomized/posterior_inference.py  | 73 +++++++++++---------
 selectinf/randomized/tests/test_posterior.py |  7 +-
 2 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 44f981561..96bb86796 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -31,39 +31,50 @@ def __init__(self,
                  query,
                  observed_target,
                  cov_target,
-                 cov_target_score,
+                 regress_target_score,
+                 dispersion,
                  prior,
-                 dispersion=1,
                  solve_args={'tol': 1.e-12}):
 
         self.solve_args = solve_args
 
         linear_part = query.sampler.affine_con.linear_part
         offset = query.sampler.affine_con.offset
-        regress_opt = query.sampler.logdens_transform[0]
-        _, prec_randomizer = query.randomizer.cov_prec
-        score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
 
+        opt_linear = query.opt_linear
+
+        observed_score = query.observed_score_state + query.observed_subgrad
+
+        print("dispersion ", dispersion)
         result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
                                                                  cov_target,
-                                                                 cov_target_score)
+                                                                 regress_target_score,
+                                                                 dispersion)
 
         ### Note for an informative prior we might want to change this...
 
-        self.ntarget = cov_target.shape[0]
-        self.nopt = query.cond_cov.shape[0]
-
         self.cond_precision = np.linalg.inv(query.cond_cov)
         self.cov_target = cov_target
         self.prec_target = np.linalg.inv(cov_target)
 
+        self.ntarget = self.cov_target.shape[0]
+        self.nopt = self.cond_precision.shape[0]
+
         self.observed_target = observed_target
-        self.cov_target_score = cov_target_score
-        self.regress_opt = regress_opt
-        self.prec_randomizer = prec_randomizer
-        self.score_offset = score_offset
+        self.regress_target_score = regress_target_score
+        self.opt_linear = opt_linear
+        self.observed_score = observed_score
+
+        prod_score_prec = query.prod_score_prec_unnorm * dispersion
+        M1 = prod_score_prec
+        M2 = M1.dot(query.cov_rand).dot(M1.T)
+        M3 = M1.dot(self.opt_linear.dot(query.cond_cov).dot(self.opt_linear.T)).dot(M1.T)
+
+        self.M1 = M1
+        self.M2 = M2
+        self.M3 = M3
 
-        self.feasible_point = query.observed_opt_state
+        self.feasible_point = query.initial_point
         self.cond_mean = query.cond_mean
         self.linear_part = linear_part
         self.offset = offset
@@ -131,29 +142,27 @@ def _set_marginal_parameters(self):
         implied mean as a function of the true parameters.
         """
 
-        regress_score_target = self.cov_target_score.T.dot(self.prec_target)
-        resid_score_target = self.score_offset - regress_score_target.dot(self.observed_target)
+        T1 = self.regress_target_score.T.dot(self.prec_target)
+        T2 = T1.T.dot(self.M2.dot(T1))
+        T3 = T1.T.dot(self.M3.dot(T1))
 
-        regress_opt_target = self.regress_opt.dot(regress_score_target)
-        resid_mean_opt_target = self.cond_mean - regress_opt_target.dot(self.observed_target)
+        prec_target_nosel = self.prec_target + T2 - T3
+        _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(self.observed_target))
 
-        self.linear_coef = regress_opt_target
-        self.offset_coef = resid_mean_opt_target
+        _Q = np.linalg.inv(prec_target_nosel + T3)
 
-        if np.asarray(self.prec_randomizer).shape in [(), (0,)]:
-            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) \
-                    - regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target)
-            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
-        else:
-            prec_target_nosel = self.prec_target + (regress_score_target.T.dot(self.prec_randomizer).dot(regress_score_target)) \
-                    - regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target)
-            _P = regress_score_target.T.dot(self.prec_randomizer).dot(resid_score_target)
+        T4 = self.M1.T.dot(T1)
+        T5 = self.opt_linear.T.dot(T4)
+        T6 = self.cond_cov.dot(T5)
+        T7 = self.opt_linear.dot(T6)
+        T8 = self.M1.dot(T7)
+        T9 = (-T8.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean)))  ##flipped sign of first term here
+        T10 = T1.T.dot(T9)
 
-        _Q = np.linalg.inv(_prec + regress_opt_target.T.dot(self.cond_precision).dot(regress_opt_target))
-        self.prec_marginal = self.cond_precision - self.cond_precision.dot(regress_opt_target).dot(_Q).dot(regress_opt_target.T).dot(self.cond_precision)
+        self.prec_marginal = self.cond_precision - T5.dot(_Q).dot(T5)
 
-        r = np.linalg.inv(_prec).dot(regress_opt_target.T.dot(self.cond_precision).dot(resid_mean_opt_target) - _P)
-        S = np.linalg.inv(_prec).dot(self.prec_target)
+        r = np.linalg.inv(prec_target_nosel).dot(T10 - _P)
+        S = np.linalg.inv(prec_target_nosel).dot(self.prec_target)
 
         self.r = r
         self.S = S
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 1b369c351..fd3e3a803 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -49,7 +49,8 @@ def test_Langevin(n=500,
 
     (observed_target,
      cov_target,
-     cov_target_score,
+     regress_target_score,
+     dispersion,
      alternatives) = selected_targets(conv.loglike,
                                       conv._W,
                                       nonzero,
@@ -57,7 +58,7 @@ def test_Langevin(n=500,
 
     posterior_inf = conv.posterior(observed_target,
                                    cov_target,
-                                   cov_target_score,
+                                   regress_target_score,
                                    dispersion=dispersion)
 
     samples = langevin_sampler(posterior_inf,
@@ -362,6 +363,6 @@ def test_hiv_data(nsample=10000,
 
 if __name__ == "__main__":
     # test_hiv_data(split_proportion=0.50)
-    test_coverage(nsim=100)
+    test_coverage(nsim=1)
 
 

From 275c953b314c35f395ac4adc84e9e7ccf0c4da9c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 26 Jul 2021 11:46:44 -0400
Subject: [PATCH 121/187] scaled M1, M2, M3 with dispersion

---
 selectinf/randomized/query.py                 | 74 ++++++++++---------
 .../tests/test_selective_MLE_high.py          |  2 +-
 2 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index e17be21a8..d1c0b1077 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -106,24 +106,25 @@ def _setup_sampler(self,
                        linear_part,
                        offset,
                        opt_linear,
-                       observed_subgrad):
+                       observed_subgrad,
+                       # optional dispersion parameter
+                       # for covariance of randomization
+                       dispersion=1):
 
         A, b = linear_part, offset
         if not np.all(A.dot(self.observed_opt_state) - b <= 0):
             raise ValueError('constraints not satisfied')
 
-        cov_rand, prec = self.randomizer.cov_prec
-
         (cond_mean,
          cond_cov,
          cond_precision,
          regress_opt,
-         prod_score_prec_unnorm) = self._setup_implied_gaussian(opt_linear,
-                                                                observed_subgrad,
-                                                                cov_rand,
-                                                                prec)
+         M1,
+         M2,
+         M3) = self._setup_implied_gaussian(opt_linear,
+                                            observed_subgrad)
 
-        def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
+        def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
             if score.ndim == 1:
                 mean_term = regress_opt.dot(score.T + u).T
             else:
@@ -147,26 +148,29 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
                                                self.observed_opt_state,
                                                self.observed_score_state,
                                                log_density,
-                                               regress_opt,
+                                               regress_opt,  # not needed?
                                                observed_subgrad,
-                                               opt_linear,      # L
-                                               prod_score_prec_unnorm,
-                                               cov_rand,
+                                               opt_linear,  # L
+                                               M1,
+                                               M2,
+                                               M3,
                                                selection_info=self.selection_variable,
                                                useC=self.useC)
 
     def _setup_implied_gaussian(self,
                                 opt_linear,
-                                observed_subgrad,
-                                cov_rand,
-                                prec):
+                                observed_subgrad):
 
+        cov_rand, prec = self.randomizer.cov_prec
+        prec = prec   # why do we do this here -- prec is just known
 
         if np.asarray(prec).shape in [(), (0,)]:
             prod_score_prec_unnorm = self._hessian * prec
         else:
             prod_score_prec_unnorm = self._hessian.dot(prec)
-        
+
+        prod_score_prec_unnorm
+
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = opt_linear.T.dot(opt_linear) * prec
             cond_cov = np.linalg.inv(cond_precision)
@@ -175,16 +179,22 @@ def _setup_implied_gaussian(self,
             cond_precision = opt_linear.T.dot(prec.dot(opt_linear))
             cond_cov = np.linalg.inv(cond_precision)
             regress_opt = -cond_cov.dot(opt_linear.T).dot(prec)
-            
+
         # regress_opt is regression coefficient of opt onto score + u...
 
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
-    
+
+        M1 = prod_score_prec_unnorm
+        M2 = M1.dot(cov_rand).dot(M1.T)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T)
+
         return (cond_mean,
                 cond_cov,
                 cond_precision,
                 regress_opt,
-                prod_score_prec_unnorm)
+                M1,
+                M2,
+                M3)
 
     def summary(self,
                 observed_target,
@@ -320,8 +330,8 @@ def posterior(self,
                   observed_target,
                   cov_target,
                   regress_target_score,
+                  dispersion,
                   prior=None,
-                  dispersion=None,
                   solve_args={'tol': 1.e-12}):
         """
         Parameters
@@ -358,8 +368,8 @@ def prior(target_parameter):
                          observed_target,
                          cov_target,
                          regress_target_score,
-                         prior,
                          dispersion,
+                         prior,
                          solve_args=solve_args)
 
     def approximate_grid_inference(self,
@@ -848,8 +858,9 @@ def __init__(self,
                  regress_opt,
                  observed_subgrad,
                  opt_linear,
-                 prod_score_prec_unnorm,
-                 cov_rand,
+                 M1,
+                 M2,
+                 M3,
                  selection_info=None,
                  useC=False):
 
@@ -893,9 +904,7 @@ def __init__(self,
         self.observed_subgrad = observed_subgrad
         self.useC = useC
         self.opt_linear = opt_linear
-
-        self.prod_score_prec_unnorm = prod_score_prec_unnorm
-        self.cov_rand = cov_rand
+        self.M1, self.M2, self.M3 = M1, M2, M3
 
     def log_cond_density(self,
                          opt_sample,
@@ -970,10 +979,9 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
-        prod_score_prec = self.prod_score_prec_unnorm * dispersion
-        M1 = prod_score_prec
-        M2 = M1.dot(self.cov_rand).dot(M1.T)
-        M3 = M1.dot(self.opt_linear.dot(self.covariance).dot(self.opt_linear.T)).dot(M1.T)
+        self.M1 = self.M1 * dispersion
+        self.M2 = self.M2 * (dispersion**2)
+        self.M3 = self.M3 * (dispersion**2)
 
         return selective_MLE(observed_target,
                              cov_target,
@@ -984,9 +992,9 @@ def selective_MLE(self,
                              self.affine_con.linear_part,
                              self.affine_con.offset,
                              self.opt_linear,
-                             M1,
-                             M2,
-                             M3,
+                             self.M1,
+                             self.M2,
+                             self.M3,
                              self.observed_score_state + self.observed_subgrad,
                              solve_args=solve_args,
                              level=level,
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index e60e7feb7..5753ba668 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -411,7 +411,7 @@ def main(nsim =50):
                                                          s=s,
                                                          signal_fac=1.2,
                                                          full_dispersion=full_dispersion,
-                                                         full=False)
+                                                         full=True)
 
         DF["MLE"] = pd.Series(mle)
         DF["Lower Conf"] = pd.Series(lower_conf)

From 56ca78f60dfb969e559ebc8b5ce13f88035683c8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 26 Jul 2021 13:32:17 -0400
Subject: [PATCH 122/187] created all necessary objects

---
 selectinf/randomized/posterior_inference.py  | 24 +++++++++-----------
 selectinf/randomized/query.py                |  4 ++++
 selectinf/randomized/tests/test_posterior.py |  4 ++--
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 96bb86796..06879fd51 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -45,7 +45,6 @@ def __init__(self,
 
         observed_score = query.observed_score_state + query.observed_subgrad
 
-        print("dispersion ", dispersion)
         result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
                                                                  cov_target,
                                                                  regress_target_score,
@@ -53,7 +52,9 @@ def __init__(self,
 
         ### Note for an informative prior we might want to change this...
 
-        self.cond_precision = np.linalg.inv(query.cond_cov)
+        cond_cov = query.cond_cov
+        self.cond_precision = np.linalg.inv(cond_cov)
+        self.cond_cov = cond_cov
         self.cov_target = cov_target
         self.prec_target = np.linalg.inv(cov_target)
 
@@ -65,16 +66,11 @@ def __init__(self,
         self.opt_linear = opt_linear
         self.observed_score = observed_score
 
-        prod_score_prec = query.prod_score_prec_unnorm * dispersion
-        M1 = prod_score_prec
-        M2 = M1.dot(query.cov_rand).dot(M1.T)
-        M3 = M1.dot(self.opt_linear.dot(query.cond_cov).dot(self.opt_linear.T)).dot(M1.T)
+        self.M1 = query.M1 * dispersion
+        self.M2 = query.M2 * (dispersion ** 2)
+        self.M3 = query.M3 * (dispersion ** 2)
+        self.feasible_point = query.observed_opt_state
 
-        self.M1 = M1
-        self.M2 = M2
-        self.M3 = M3
-
-        self.feasible_point = query.initial_point
         self.cond_mean = query.cond_mean
         self.linear_part = linear_part
         self.offset = offset
@@ -159,14 +155,16 @@ def _set_marginal_parameters(self):
         T9 = (-T8.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean)))  ##flipped sign of first term here
         T10 = T1.T.dot(T9)
 
-        self.prec_marginal = self.cond_precision - T5.dot(_Q).dot(T5)
+        self.prec_marginal = self.cond_precision - T5.dot(_Q).dot(T5.T)
+        self.linear_coef = self.cond_cov.dot(T5)
+        self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
 
         r = np.linalg.inv(prec_target_nosel).dot(T10 - _P)
         S = np.linalg.inv(prec_target_nosel).dot(self.prec_target)
 
         self.r = r
         self.S = S
-        #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
+        print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
         self.prec_target_nosel = prec_target_nosel
 
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index d1c0b1077..557cd20e4 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -188,6 +188,10 @@ def _setup_implied_gaussian(self,
         M2 = M1.dot(cov_rand).dot(M1.T)
         M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T)
 
+        self.M1 = M1
+        self.M2 = M2
+        self.M3 = M3
+
         return (cond_mean,
                 cond_cov,
                 cond_precision,
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index fd3e3a803..319df1b83 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -83,7 +83,7 @@ def test_coverage(nsim=100):
     for i in range(nsim):
         cov_, len_ = test_Langevin(n=500,
                                    p=100,
-                                   signal_fac=1.,
+                                   signal_fac=0.5,
                                    s=5,
                                    sigma=3.,
                                    rho=0.2,
@@ -363,6 +363,6 @@ def test_hiv_data(nsample=10000,
 
 if __name__ == "__main__":
     # test_hiv_data(split_proportion=0.50)
-    test_coverage(nsim=1)
+    test_coverage(nsim=10)
 
 

From f8260984d981658de3872c89b39aa4772ae1584c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 26 Jul 2021 23:06:19 -0400
Subject: [PATCH 123/187] cleaned up some more

---
 selectinf/randomized/posterior_inference.py  | 30 +++++++++-----------
 selectinf/randomized/query.py                | 24 ++++++----------
 selectinf/randomized/tests/test_posterior.py |  3 +-
 3 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 06879fd51..1c83153cb 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -116,12 +116,11 @@ def log_posterior(self,
 
         log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2.
 
-        _prec = self.prec_target_nosel # shorthand
-        log_lik = -((self.observed_target - target).T.dot(_prec).dot(self.observed_target - target)) / 2. \
+        log_lik = -((self.observed_target - target).T.dot(self.prec_target_nosel).dot(self.observed_target - target)) / 2. \
                   - log_normalizer
 
-        grad_lik = self.S.T.dot(_prec.dot(self.observed_target) - _prec.dot(target) - self.linear_coef.T.dot(
-            prec_marginal.dot(soln) - conjugate_marginal))
+        grad_lik = self.S.T.dot(self.prec_target_nosel.dot(self.observed_target) - self.prec_target_nosel.dot(target)
+                                - self.linear_coef.T.dot(prec_marginal.dot(soln) - conjugate_marginal))
 
         log_prior, grad_prior = self.prior(target_parameter)
 
@@ -141,30 +140,27 @@ def _set_marginal_parameters(self):
         T1 = self.regress_target_score.T.dot(self.prec_target)
         T2 = T1.T.dot(self.M2.dot(T1))
         T3 = T1.T.dot(self.M3.dot(T1))
+        T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
+        T5 = T1.T.dot(self.M1.dot(self.opt_linear))
 
         prec_target_nosel = self.prec_target + T2 - T3
+
         _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(self.observed_target))
 
-        _Q = np.linalg.inv(prec_target_nosel + T3)
+        bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
-        T4 = self.M1.T.dot(T1)
-        T5 = self.opt_linear.T.dot(T4)
-        T6 = self.cond_cov.dot(T5)
-        T7 = self.opt_linear.dot(T6)
-        T8 = self.M1.dot(T7)
-        T9 = (-T8.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean)))  ##flipped sign of first term here
-        T10 = T1.T.dot(T9)
+        _Q = np.linalg.inv(prec_target_nosel + T3)
 
-        self.prec_marginal = self.cond_precision - T5.dot(_Q).dot(T5.T)
-        self.linear_coef = self.cond_cov.dot(T5)
+        self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
+        self.linear_coef = self.cond_cov.dot(T5.T)
         self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
 
-        r = np.linalg.inv(prec_target_nosel).dot(T10 - _P)
+        r = np.linalg.inv(prec_target_nosel).dot(self.prec_target.dot(bias_target))
         S = np.linalg.inv(prec_target_nosel).dot(self.prec_target)
 
         self.r = r
         self.S = S
-        print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
+        #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
         self.prec_target_nosel = prec_target_nosel
 
 
@@ -192,7 +188,7 @@ def langevin_sampler(selective_posterior,
     for i, sample in enumerate(sampler):
         sampler.scaling = np.sqrt(selective_posterior.dispersion)
         samples[i, :] = sample.copy()
-        #print("sample ", i, samples[i,:])
+        print("sample ", i, samples[i,:])
         if i == nsample - 1:
             break
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 557cd20e4..6c852f8a5 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1361,6 +1361,7 @@ def selective_MLE(observed_target,
                   solve_args={'tol': 1.e-12},
                   level=0.9,
                   useC=False):
+
     """
     Selective MLE based on approximation of
     CGF.
@@ -1406,18 +1407,15 @@ def selective_MLE(observed_target,
     T1 = regress_target_score.T.dot(prec_target)
     T2 = T1.T.dot(M2.dot(T1))
     T3 = T1.T.dot(M3.dot(T1)) 
+    T4 = M1.dot(opt_linear).dot(cond_cov).dot(opt_linear.T.dot(M1.T.dot(T1)))
+    T5 = T1.T.dot(M1.dot(opt_linear))
 
     prec_target_nosel = prec_target + T2 - T3
+
     _P = -(T1.T.dot(M1.dot(observed_score)) + T2.dot(observed_target)) ##flipped sign of second term here
 
-    T4 = M1.T.dot(T1)
-    T5 = opt_linear.T.dot(T4)
-    T6 = cond_cov.dot(T5)
-    T7 = opt_linear.dot(T6)
-    T8 = M1.dot(T7)
-    T9 = (-T8.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) ##flipped sign of first term here
-    T10 = T1.T.dot(T9) 
-    C = cov_target.dot(_P - T10) ##added missing _P in computing C
+    bias_target = cov_target.dot(T1.T.dot(-T4.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) - _P)
+
     conjugate_arg = prec_opt.dot(cond_mean)
 
     if useC:
@@ -1432,16 +1430,12 @@ def selective_MLE(observed_target,
                              offset,
                              **solve_args)
 
-    T11 = regress_target_score.dot(M1.dot(opt_linear))
     final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
-                      + T11.dot(cond_mean - soln) + C
+                      + regress_target_score.dot(M1.dot(opt_linear)).dot(cond_mean - soln) - bias_target
 
-    T12 = prec_target.dot(T11)
-    T13 = T3
-    unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) + cov_target.dot(
-        _P - T12.dot(cond_mean) + T13.dot(observed_target))
+    observed_info_natural = prec_target_nosel + T3 - T5.dot(hess.dot(T5.T))
 
-    observed_info_natural = prec_target_nosel + T3 - T12.dot(hess.dot(T12.T))
+    unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) - bias_target
 
     observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 319df1b83..534c208b3 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -78,6 +78,7 @@ def test_Langevin(n=500,
 
 
 def test_coverage(nsim=100):
+    np.random.seed(0)
     cov, len = 0., 0.
 
     for i in range(nsim):
@@ -363,6 +364,6 @@ def test_hiv_data(nsample=10000,
 
 if __name__ == "__main__":
     # test_hiv_data(split_proportion=0.50)
-    test_coverage(nsim=10)
+    test_coverage(nsim=1)
 
 

From 2edd860b9c3b316aba488ded2068544d72616511 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 27 Jul 2021 00:18:26 -0400
Subject: [PATCH 124/187] compare branches in progress

---
 selectinf/randomized/posterior_inference.py  | 4 ++--
 selectinf/randomized/tests/test_posterior.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 1c83153cb..2fb5ea2ae 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -150,7 +150,6 @@ def _set_marginal_parameters(self):
         bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
         _Q = np.linalg.inv(prec_target_nosel + T3)
-
         self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
         self.linear_coef = self.cond_cov.dot(T5.T)
         self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
@@ -163,6 +162,7 @@ def _set_marginal_parameters(self):
         #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
         self.prec_target_nosel = prec_target_nosel
 
+        print("match parameters ", r, S, prec_target_nosel, self.prec_marginal, self.linear_coef, self.offset_coef)
 
 ### sampling methods
 
@@ -188,7 +188,7 @@ def langevin_sampler(selective_posterior,
     for i, sample in enumerate(sampler):
         sampler.scaling = np.sqrt(selective_posterior.dispersion)
         samples[i, :] = sample.copy()
-        print("sample ", i, samples[i,:])
+        #print("sample ", i, samples[i,:])
         if i == nsample - 1:
             break
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 534c208b3..e94994591 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -17,6 +17,8 @@ def test_Langevin(n=500,
                   randomizer_scale=1.,
                   nsample=1500,
                   nburnin=100):
+
+    np.random.seed(0)
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
@@ -78,7 +80,6 @@ def test_Langevin(n=500,
 
 
 def test_coverage(nsim=100):
-    np.random.seed(0)
     cov, len = 0., 0.
 
     for i in range(nsim):

From ebbda3e8f175bbaa15a50a75195d56049d212355 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 27 Jul 2021 07:46:38 -0400
Subject: [PATCH 125/187] compare branches in progress

---
 selectinf/randomized/posterior_inference.py  | 6 ++++--
 selectinf/randomized/tests/test_posterior.py | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 2fb5ea2ae..80a0bc813 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -162,7 +162,8 @@ def _set_marginal_parameters(self):
         #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
         self.prec_target_nosel = prec_target_nosel
 
-        print("match parameters ", r, S, prec_target_nosel, self.prec_marginal, self.linear_coef, self.offset_coef)
+        #print("match parameters ", r, S, prec_target_nosel, self.prec_marginal, self.linear_coef, self.offset_coef)
+        print("match parameters ", np.diag(self.prec_marginal), np.diag(self.linear_coef), self.offset_coef)
 
 ### sampling methods
 
@@ -172,6 +173,7 @@ def langevin_sampler(selective_posterior,
                      proposal_scale=None,
                      step=1.):
     state = selective_posterior.initial_estimate
+    print("check INI ", state)
     stepsize = 1. / (step * selective_posterior.ntarget)
 
     if proposal_scale is None:
@@ -188,7 +190,7 @@ def langevin_sampler(selective_posterior,
     for i, sample in enumerate(sampler):
         sampler.scaling = np.sqrt(selective_posterior.dispersion)
         samples[i, :] = sample.copy()
-        #print("sample ", i, samples[i,:])
+        print("sample ", i, samples[i,:])
         if i == nsample - 1:
             break
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index e94994591..49abbad38 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -90,8 +90,8 @@ def test_coverage(nsim=100):
                                    sigma=3.,
                                    rho=0.2,
                                    randomizer_scale=1.,
-                                   nsample=1500,
-                                   nburnin=100)
+                                   nsample=5,
+                                   nburnin=0)
 
         cov += cov_
         len += len_

From 02d0d6db771b62dbfb299f7abe9e9e8c4d79c42c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 27 Jul 2021 08:45:36 -0400
Subject: [PATCH 126/187] compare branches in progress

---
 selectinf/randomized/posterior_inference.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 80a0bc813..bc01bf7c3 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -263,8 +263,10 @@ def next(self):
     def __next__(self):
         while True:
             self.posterior_ = self.gradient_map(self.state, self.scaling)
+            _proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape))
             candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_[1])
-                         + np.sqrt(2.) * (self.proposal_sqrt.dot(self._noise.rvs(self._shape))) * self._sqrt_step)
+                         + np.sqrt(2.) * _proposal * self._sqrt_step)
+            print("check proposal ", _proposal, self.posterior_[1], np.diag(self.proposal_scale))
 
             if not np.all(np.isfinite(self.gradient_map(candidate, self.scaling)[1])):
                 self.stepsize *= 0.5

From d61fd1f1f6cb8926ff5577a2f09f0ff352def60a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 27 Jul 2021 09:13:27 -0400
Subject: [PATCH 127/187] compare branches in progress

---
 selectinf/randomized/posterior_inference.py  | 15 +++++++--------
 selectinf/randomized/tests/test_posterior.py |  6 +++---
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index bc01bf7c3..45c3742fd 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -149,21 +149,20 @@ def _set_marginal_parameters(self):
 
         bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
+        ###set parameters for the marginal distribution of optimization variables
         _Q = np.linalg.inv(prec_target_nosel + T3)
         self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
         self.linear_coef = self.cond_cov.dot(T5.T)
         self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
 
+        ###set parameters for the marginal distribution of target
         r = np.linalg.inv(prec_target_nosel).dot(self.prec_target.dot(bias_target))
         S = np.linalg.inv(prec_target_nosel).dot(self.prec_target)
 
         self.r = r
         self.S = S
-        #print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
         self.prec_target_nosel = prec_target_nosel
-
-        #print("match parameters ", r, S, prec_target_nosel, self.prec_marginal, self.linear_coef, self.offset_coef)
-        print("match parameters ", np.diag(self.prec_marginal), np.diag(self.linear_coef), self.offset_coef)
+        # print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
 
 ### sampling methods
 
@@ -172,8 +171,8 @@ def langevin_sampler(selective_posterior,
                      nburnin=100,
                      proposal_scale=None,
                      step=1.):
+
     state = selective_posterior.initial_estimate
-    print("check INI ", state)
     stepsize = 1. / (step * selective_posterior.ntarget)
 
     if proposal_scale is None:
@@ -248,7 +247,7 @@ def __init__(self,
         self.proposal_scale = proposal_scale
         self._shape = self.state.shape[0]
         self._sqrt_step = np.sqrt(self.stepsize)
-        self._noise = ndist(loc=0, scale=1)
+        #self._noise = ndist(loc=0, scale=1)
         self.sample = np.copy(initial_condition)
         self.scaling = scaling
 
@@ -263,10 +262,10 @@ def next(self):
     def __next__(self):
         while True:
             self.posterior_ = self.gradient_map(self.state, self.scaling)
-            _proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape))
+            #_proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape))
+            _proposal = self.proposal_sqrt.dot(np.random.standard_normal(self._shape))
             candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_[1])
                          + np.sqrt(2.) * _proposal * self._sqrt_step)
-            print("check proposal ", _proposal, self.posterior_[1], np.diag(self.proposal_scale))
 
             if not np.all(np.isfinite(self.gradient_map(candidate, self.scaling)[1])):
                 self.stepsize *= 0.5
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 49abbad38..3aa45b86c 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -18,7 +18,6 @@ def test_Langevin(n=500,
                   nsample=1500,
                   nburnin=100):
 
-    np.random.seed(0)
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
@@ -80,6 +79,7 @@ def test_Langevin(n=500,
 
 
 def test_coverage(nsim=100):
+    np.random.seed(0)
     cov, len = 0., 0.
 
     for i in range(nsim):
@@ -90,8 +90,8 @@ def test_coverage(nsim=100):
                                    sigma=3.,
                                    rho=0.2,
                                    randomizer_scale=1.,
-                                   nsample=5,
-                                   nburnin=0)
+                                   nsample=1500,
+                                   nburnin=100)
 
         cov += cov_
         len += len_

From 8c386d8345dae37d3585405afc4c064e59429851 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 27 Jul 2021 09:36:00 -0400
Subject: [PATCH 128/187] some more tests

---
 selectinf/randomized/posterior_inference.py  | 7 +++----
 selectinf/randomized/tests/test_posterior.py | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 45c3742fd..c8a594ddf 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -189,7 +189,7 @@ def langevin_sampler(selective_posterior,
     for i, sample in enumerate(sampler):
         sampler.scaling = np.sqrt(selective_posterior.dispersion)
         samples[i, :] = sample.copy()
-        print("sample ", i, samples[i,:])
+        #print("sample ", i, samples[i,:])
         if i == nsample - 1:
             break
 
@@ -247,7 +247,7 @@ def __init__(self,
         self.proposal_scale = proposal_scale
         self._shape = self.state.shape[0]
         self._sqrt_step = np.sqrt(self.stepsize)
-        #self._noise = ndist(loc=0, scale=1)
+        self._noise = ndist(loc=0, scale=1)
         self.sample = np.copy(initial_condition)
         self.scaling = scaling
 
@@ -262,8 +262,7 @@ def next(self):
     def __next__(self):
         while True:
             self.posterior_ = self.gradient_map(self.state, self.scaling)
-            #_proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape))
-            _proposal = self.proposal_sqrt.dot(np.random.standard_normal(self._shape))
+            _proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape))
             candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_[1])
                          + np.sqrt(2.) * _proposal * self._sqrt_step)
 
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 3aa45b86c..1d931d915 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -79,7 +79,7 @@ def test_Langevin(n=500,
 
 
 def test_coverage(nsim=100):
-    np.random.seed(0)
+
     cov, len = 0., 0.
 
     for i in range(nsim):
@@ -87,7 +87,7 @@ def test_coverage(nsim=100):
                                    p=100,
                                    signal_fac=0.5,
                                    s=5,
-                                   sigma=3.,
+                                   sigma=2.,
                                    rho=0.2,
                                    randomizer_scale=1.,
                                    nsample=1500,
@@ -365,6 +365,6 @@ def test_hiv_data(nsample=10000,
 
 if __name__ == "__main__":
     # test_hiv_data(split_proportion=0.50)
-    test_coverage(nsim=1)
+    test_coverage(nsim=20)
 
 

From 9ea85fa5e8069205fff8a91fc7d13f8c9c530a0b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 3 Aug 2021 12:36:23 -0400
Subject: [PATCH 129/187] updated exact ref

---
 selectinf/randomized/exact_reference.py       | 145 +++++++++---------
 .../randomized/tests/test_exact_reference.py  |  16 +-
 2 files changed, 78 insertions(+), 83 deletions(-)

diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 429278d1e..cf91eb800 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -12,9 +12,9 @@ def __init__(self,
                  query,
                  observed_target,
                  cov_target,
-                 cov_target_score,
-                 solve_args={'tol': 1.e-12},
-                 useIP=False):
+                 regress_target_score,
+                 dispersion,
+                 solve_args={'tol': 1.e-12}):
 
         """
         Produce p-values and confidence intervals for targets
@@ -34,48 +34,52 @@ def __init__(self,
             Arguments passed to solver.
         """
 
-        result, inverse_info = query.selective_MLE(observed_target,
-                                                   cov_target,
-                                                   cov_target_score,
-                                                   solve_args=solve_args)[:2]
+        self.solve_args = solve_args
 
-        self.linear_part = query.sampler.affine_con.linear_part
-        self.offset = query.sampler.affine_con.offset
+        linear_part = query.sampler.affine_con.linear_part
+        offset = query.sampler.affine_con.offset
 
-        self.regress_opt = query.sampler.logdens_transform[0]
-        self.cond_mean = query.cond_mean
-        self.prec_opt = np.linalg.inv(query.cond_cov)
-        self.cond_cov = query.cond_cov
+        opt_linear = query.opt_linear
 
-        self.observed_target = observed_target
-        self.cov_target_score = cov_target_score
+        observed_score = query.observed_score_state + query.observed_subgrad
+
+        result, inverse_info, log_ref = query.selective_MLE(observed_target,
+                                                                 cov_target,
+                                                                 regress_target_score,
+                                                                 dispersion)
+
+        cond_cov = query.cond_cov
+        self.cond_precision = np.linalg.inv(cond_cov)
+        self.cond_cov = cond_cov
         self.cov_target = cov_target
+        self.prec_target = np.linalg.inv(cov_target)
+
+        self.observed_target = observed_target
+        self.regress_target_score = regress_target_score
+        self.opt_linear = opt_linear
+        self.observed_score = observed_score
+
+        self.M1 = query.M1 * dispersion
+        self.M2 = query.M2 * (dispersion ** 2)
+        self.M3 = query.M3 * (dispersion ** 2)
+        self.feasible_point = query.observed_opt_state
 
-        self.observed_soln = query.observed_opt_state
+        self.cond_mean = query.cond_mean
+        self.linear_part = linear_part
+        self.offset = offset
 
-        self.prec_randomizer = query.sampler.prec_randomizer
-        self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
+        self.feasible_point = query.observed_opt_state
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
 
-        if useIP == False:
-            ngrid = 1000
-            self.stat_grid = np.zeros((ntarget, ngrid))
-            for j in range(ntarget):
-                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                                   observed_target[j] + 1.5 * _scale[j],
-                                                   num=ngrid)
-        else:
-            ngrid = 60
-            self.stat_grid = np.zeros((ntarget, ngrid))
-            for j in range(ntarget):
-                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                                   observed_target[j] + 1.5 * _scale[j],
-                                                   num=ngrid)
-
-        self.opt_linear = query.opt_linear
-        self.useIP = useIP
+        ngrid = 1000
+        self.stat_grid = np.zeros((ntarget, ngrid))
+        for j in range(ntarget):
+            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                               observed_target[j] + 1.5 * _scale[j],
+                                               num=ngrid)
+
         self.inverse_info = inverse_info
 
     def summary(self,
@@ -120,14 +124,14 @@ def summary(self,
     def log_reference(self,
                       observed_target,
                       cov_target,
-                      cov_target_score,
+                      regress_target_score,
+                      linear_coef,
                       grid):
 
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
         prec_target = np.linalg.inv(cov_target)
-        regress_opt_target = self.regress_opt.dot(cov_target_score.T.dot(prec_target))
 
         ref_hat = []
 
@@ -138,15 +142,15 @@ def log_reference(self,
             # cond_mean is "something" times D
             # Gamma is cov_target_score.T.dot(prec_target)
 
-            num_opt = self.prec_opt.shape[0]
+            num_opt = self.cond_precision.shape[0]
             num_con = self.linear_part.shape[0]
 
-            cond_mean_grid = (regress_opt_target.dot(np.atleast_1d(grid[k] - observed_target)) +
+            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) +
                               self.cond_mean)
 
             #direction for decomposing o
 
-            eta = self.prec_opt.dot(self.regress_opt.dot(cov_target_score.T))
+            eta = self.cond_precision.dot(linear_coef).dot(cov_target)
 
             implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
             implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
@@ -156,7 +160,7 @@ def log_reference(self,
             R = np.identity(num_opt) - _A.dot(eta.T)
 
             A = self.linear_part.dot(_A).reshape((-1,))
-            b = -self.linear_part.dot(R).dot(self.observed_soln)
+            b = -self.linear_part.dot(R).dot(self.feasible_point)
 
             trunc_ = np.true_divide((self.offset + b), A)
 
@@ -198,37 +202,24 @@ def _construct_families(self):
         self._families = []
 
         for m in range(self.ntarget):
-            p = self.cov_target_score.shape[1]
+            p = self.regress_target_score.shape[1]
             observed_target_uni = (self.observed_target[m]).reshape((1,))
 
             cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
             log_ref = self.log_reference(observed_target_uni,
                                          cov_target_uni,
-                                         cov_target_score_uni,
+                                         regress_target_score_uni,
+                                         self.T[m],
                                          self.stat_grid[m])
-            if self.useIP == False:
-                logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
-                logW -= logW.max()
-                self._families.append(discrete_family(self.stat_grid[m],
-                                                      np.exp(logW)))
-            else:
-                approx_fn = interp1d(self.stat_grid[m],
-                                     log_ref,
-                                     kind='quadratic',
-                                     bounds_error=False,
-                                     fill_value='extrapolate')
 
-                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
-                logW = (approx_fn(grid) -
-                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
-
-                logW -= logW.max()
-                self._families.append(discrete_family(grid,
-                                                      np.exp(logW)))
+            logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+            logW -= logW.max()
+            self._families.append(discrete_family(self.stat_grid[m],
+                                                  np.exp(logW)))
 
     def _pivots(self,
                 mean_parameter,
@@ -290,36 +281,42 @@ def _construct_density(self):
         precs = {}
         S = {}
         r = {}
+        T = {}
 
-        p = self.cov_target_score.shape[1]
+        p = self.regress_target_score.shape[1]
 
         for m in range(self.ntarget):
             observed_target_uni = (self.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
             prec_target = 1. / cov_target_uni
-            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
+
+            T1 = regress_target_score_uni.T.dot(prec_target)
+            T2 = T1.T.dot(self.M2.dot(T1))
+            T3 = T1.T.dot(self.M3.dot(T1))
+            T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
+            T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+
+            _T = self.cond_cov.dot(T5.T)
 
-            regress_score_target = cov_target_score_uni.T.dot(prec_target)
-            resid_score_target = (self.score_offset - regress_score_target.dot(observed_target_uni)).reshape(
-                (regress_score_target.shape[0],))
+            prec_target_nosel = prec_target + T2 - T3
 
-            regress_opt_target = self.regress_opt.dot(regress_score_target)
-            resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
+            _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
 
-            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot(
-                self.prec_opt).dot(regress_opt_target)
+            bias_target = cov_target_uni.dot(T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
-            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
-            _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
-            _S = np.linalg.inv(_prec).dot(prec_target)
+            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
+            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
 
             S[m] = _S
             r[m] = _r
             precs[m] = prec_target_nosel
+            T[m] = _T
 
         self.precs = precs
         self.S = S
         self.r = r
+        self.T = T
 
 
 
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 7c1a5efb4..9b0b0476d 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -12,7 +12,6 @@ def test_inf(n=500,
              rho=0.4,
              randomizer_scale=1.,
              equicorrelated=False,
-             useIP=False,
              CI=True):
 
     while True:
@@ -55,7 +54,8 @@ def test_inf(n=500,
 
             (observed_target,
              cov_target,
-             cov_target_score,
+             regress_target_score,
+             dispersion,
              alternatives) = selected_targets(conv.loglike,
                                               conv._W,
                                               nonzero,
@@ -64,8 +64,8 @@ def test_inf(n=500,
             exact_grid_inf = exact_grid_inference(conv,
                                                   observed_target,
                                                   cov_target,
-                                                  cov_target_score,
-                                                  useIP=useIP)
+                                                  regress_target_score,
+                                                  dispersion=dispersion)
 
             if CI is False:
                 pivot = exact_grid_inf._pivots(beta_target)
@@ -89,15 +89,14 @@ def main(nsim=300, CI = False):
 
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_inf(n=100,
-                                   p=400,
+            _pivot.extend(test_inf(n=400,
+                                   p=100,
                                    signal_fac=1.,
                                    s=0,
                                    sigma=2.,
                                    rho=0.30,
                                    randomizer_scale=0.7,
                                    equicorrelated=True,
-                                   useIP=False,
                                    CI=False))
 
             print("iteration completed ", i)
@@ -122,7 +121,6 @@ def main(nsim=300, CI = False):
                                          rho=0.30,
                                          randomizer_scale=0.7,
                                          equicorrelated=True,
-                                         useIP=False,
                                          CI=True)
 
             coverage_ += cov
@@ -134,4 +132,4 @@ def main(nsim=300, CI = False):
 
 
 if __name__ == "__main__":
-    main(nsim=100, CI=True)
\ No newline at end of file
+    main(nsim=100, CI=False)
\ No newline at end of file

From 2b72a5fdfc5a09d5543fc6466bf6fa7bdae4c71b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sat, 7 Aug 2021 23:47:53 -0400
Subject: [PATCH 130/187] check in progress: w master

---
 .../randomized/tests/test_exact_reference.py  | 202 +++++++++++++-----
 1 file changed, 150 insertions(+), 52 deletions(-)

diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 9b0b0476d..2f3e4ca66 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -43,6 +43,7 @@ def test_inf(n=500,
         conv = const(X,
                      Y,
                      W,
+                     ridge_term=0.,
                      randomizer_scale=randomizer_scale * np.sqrt(dispersion))
 
         signs = conv.fit()
@@ -78,58 +79,155 @@ def test_inf(n=500,
                 mle_length = 1.65*2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
                 return np.mean(coverage), np.mean(length), np.mean(mle_length)
 
-def main(nsim=300, CI = False):
-
-    if CI is False:
-
-        import matplotlib as mpl
-        mpl.use('tkagg')
-        import matplotlib.pyplot as plt
-        from statsmodels.distributions.empirical_distribution import ECDF
-
-        _pivot = []
-        for i in range(nsim):
-            _pivot.extend(test_inf(n=400,
-                                   p=100,
-                                   signal_fac=1.,
-                                   s=0,
-                                   sigma=2.,
-                                   rho=0.30,
-                                   randomizer_scale=0.7,
-                                   equicorrelated=True,
-                                   CI=False))
-
-            print("iteration completed ", i)
-
-        plt.clf()
-        ecdf_pivot = ECDF(np.asarray(_pivot))
-        grid = np.linspace(0, 1, 101)
-        plt.plot(grid, ecdf_pivot(grid), c='blue')
-        plt.plot(grid, grid, 'k--')
-        plt.show()
-
-    else:
-        coverage_ = 0.
-        length_ = 0.
-        mle_length_= 0.
-        for n in range(nsim):
-            cov, len, mle_len = test_inf(n=400,
-                                         p=100,
-                                         signal_fac=0.5,
-                                         s=5,
-                                         sigma=2.,
-                                         rho=0.30,
-                                         randomizer_scale=0.7,
-                                         equicorrelated=True,
-                                         CI=True)
-
-            coverage_ += cov
-            length_ += len
-            mle_length_ += mle_len
-            print("coverage so far ", coverage_ / (n + 1.))
-            print("lengths so far ", length_ / (n + 1.), mle_length_/ (n + 1.))
-            print("iteration completed ", n + 1)
+# def main(nsim=300, CI = False):
+#
+#     if CI is False:
+#
+#         import matplotlib as mpl
+#         mpl.use('tkagg')
+#         import matplotlib.pyplot as plt
+#         from statsmodels.distributions.empirical_distribution import ECDF
+#
+#         _pivot = []
+#         for i in range(nsim):
+#             _pivot.extend(test_inf(n=400,
+#                                    p=100,
+#                                    signal_fac=1.,
+#                                    s=0,
+#                                    sigma=2.,
+#                                    rho=0.30,
+#                                    randomizer_scale=0.5,
+#                                    equicorrelated=True,
+#                                    CI=False))
+#
+#             print("iteration completed ", i)
+#
+#         plt.clf()
+#         ecdf_pivot = ECDF(np.asarray(_pivot))
+#         grid = np.linspace(0, 1, 101)
+#         plt.plot(grid, ecdf_pivot(grid), c='blue')
+#         plt.plot(grid, grid, 'k--')
+#         plt.show()
+#
+#     else:
+#         coverage_ = 0.
+#         length_ = 0.
+#         mle_length_= 0.
+#         for n in range(nsim):
+#             cov, len, mle_len = test_inf(n=400,
+#                                          p=100,
+#                                          signal_fac=0.5,
+#                                          s=5,
+#                                          sigma=2.,
+#                                          rho=0.30,
+#                                          randomizer_scale=0.7,
+#                                          equicorrelated=True,
+#                                          CI=True)
+#
+#             coverage_ += cov
+#             length_ += len
+#             mle_length_ += mle_len
+#             print("coverage so far ", coverage_ / (n + 1.))
+#             print("lengths so far ", length_ / (n + 1.), mle_length_/ (n + 1.))
+#             print("iteration completed ", n + 1)
+
+
+def test_selected_instance(seedn,
+                           n=500,
+                           p=100,
+                           signal_fac=1.,
+                           s=5,
+                           sigma=2.,
+                           rho=0.4,
+                           randomizer_scale=1.,
+                           equicorrelated=False):
 
+    while True:
+        np.random.seed(seedn)
+        inst, const = gaussian_instance, lasso.gaussian
+        signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=equicorrelated,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+
+        if n > (2 * p):
+            dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+        else:
+            dispersion = sigma_ ** 2
+
+        eps = np.random.standard_normal((n, 2000)) * Y.std()
+        W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
+
+        conv = const(X,
+                     Y,
+                     W,
+                     ridge_term=0.,
+                     randomizer_scale=randomizer_scale * np.sqrt(dispersion))
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("size of selected set ", nonzero.sum())
+
+        if nonzero.sum() > 0:
+            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+
+            (observed_target,
+             cov_target,
+             regress_target_score,
+             dispersion,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero,
+                                              dispersion=dispersion)
+
+            exact_grid_inf = exact_grid_inference(conv,
+                                                  observed_target,
+                                                  cov_target,
+                                                  regress_target_score,
+                                                  dispersion=dispersion)
+
+            lci, uci = exact_grid_inf._intervals(level=0.90)
+            coverage = (lci < beta_target) * (uci > beta_target)
+            length = uci - lci
+            mle_length = 1.65 * 2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
+            return coverage, length, mle_length
+
+def main(nsim =50):
+
+    import pandas as pd
+    column_names = ["Experiment Replicate", "Coverage", "Length-ER", "Length-MLE"]
+    master_DF = pd.DataFrame(columns=column_names)
+    DF = pd.DataFrame(columns=column_names)
+
+    n, p, s = 500, 100, 5
+    for i in range(nsim):
+        full_dispersion = True
+        cov, len_er, len_mle = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2)
+        DF["Coverage"] = pd.Series(cov)
+        DF["Length-ER"] = pd.Series(len_er)
+        DF["Length-MLE"] = pd.Series(len_mle)
+        DF["Experiment Replicate"] = pd.Series((i*np.ones(len(cov),int)).tolist())
+
+        master_DF = DF.append(master_DF, ignore_index=True)
+
+    import os
+    outpath = os.path.dirname(__file__)
+
+    outfile_mse_html = os.path.join(outpath, "compare_er.html")
+    outfile_mse_csv = os.path.join(outpath, "compare_er.csv")
+
+    master_DF.to_html(outfile_mse_html, index=False)
+    master_DF.to_csv(outfile_mse_csv, index=False)
 
 if __name__ == "__main__":
-    main(nsim=100, CI=False)
\ No newline at end of file
+    main(nsim=10)
\ No newline at end of file

From 5eadfe6181565d6ab4a2750ccd99d9ce1b342c39 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 9 Aug 2021 00:31:37 -0400
Subject: [PATCH 131/187] updated approx reference

---
 selectinf/randomized/approx_reference.py      | 101 +++---
 selectinf/randomized/exact_reference.py       |  14 +-
 .../randomized/tests/test_approx_reference.py |  27 +-
 .../randomized/tests/test_exact_reference.py  | 302 +++++++++---------
 4 files changed, 229 insertions(+), 215 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 40e7363c4..f60f7b2a8 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -13,7 +13,8 @@ def __init__(self,
                  query,
                  observed_target,
                  cov_target,
-                 cov_target_score,
+                 regress_target_score,
+                 dispersion,
                  solve_args={'tol': 1.e-12},
                  useIP=False):
 
@@ -37,27 +38,39 @@ def __init__(self,
 
         self.solve_args = solve_args
 
-        result, inverse_info = query.selective_MLE(observed_target,
-                                                   cov_target,
-                                                   cov_target_score,
-                                                   solve_args=solve_args)[:2]
+        linear_part = query.sampler.affine_con.linear_part
+        offset = query.sampler.affine_con.offset
 
-        self.linear_part = query.sampler.affine_con.linear_part
-        self.offset = query.sampler.affine_con.offset
+        opt_linear = query.opt_linear
 
-        self.regress_opt = query.sampler.logdens_transform[0]
-        self.cond_mean = query.cond_mean
-        self.prec_opt = np.linalg.inv(query.cond_cov)
-        self.cond_cov = query.cond_cov
+        observed_score = query.observed_score_state + query.observed_subgrad
 
-        self.observed_target = observed_target
-        self.cov_target_score = cov_target_score
+        result, inverse_info, log_ref = query.selective_MLE(observed_target,
+                                                            cov_target,
+                                                            regress_target_score,
+                                                            dispersion)
+
+        cond_cov = query.cond_cov
+        self.cond_precision = np.linalg.inv(cond_cov)
+        self.cond_cov = cond_cov
         self.cov_target = cov_target
+        self.prec_target = np.linalg.inv(cov_target)
+
+        self.observed_target = observed_target
+        self.regress_target_score = regress_target_score
+        self.opt_linear = opt_linear
+        self.observed_score = observed_score
+
+        self.M1 = query.M1 * dispersion
+        self.M2 = query.M2 * (dispersion ** 2)
+        self.M3 = query.M3 * (dispersion ** 2)
+        self.feasible_point = query.observed_opt_state
 
-        self.observed_soln = query.observed_opt_state
+        self.cond_mean = query.cond_mean
+        self.linear_part = linear_part
+        self.offset = offset
 
-        self.prec_randomizer = query.sampler.prec_randomizer
-        self.score_offset = query.observed_score_state + query.sampler.logdens_transform[1]
+        self.feasible_point = query.observed_opt_state
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
@@ -79,6 +92,7 @@ def __init__(self,
 
         self.opt_linear = query.opt_linear
         self.useIP = useIP
+        self.inverse_info = inverse_info
 
     def summary(self,
                 alternatives=None,
@@ -122,7 +136,7 @@ def summary(self,
     def _approx_log_reference(self,
                               observed_target,
                               cov_target,
-                              cov_target_score,
+                              linear_coef,
                               grid):
 
         """
@@ -131,11 +145,9 @@ def _approx_log_reference(self,
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
-        prec_target = np.linalg.inv(cov_target)
-        regress_opt_target = self.regress_opt.dot(cov_target_score.T.dot(prec_target))
-
         ref_hat = []
         solver = solve_barrier_affine_py
+
         for k in range(grid.shape[0]):
             # in the usual D = N + Gamma theta.hat,
             # regress_opt_target is "something" times Gamma,
@@ -143,13 +155,12 @@ def _approx_log_reference(self,
             # cond_mean is "something" times D
             # Gamma is cov_target_score.T.dot(prec_target)
 
-            cond_mean_grid = (regress_opt_target.dot(np.atleast_1d(grid[k] - observed_target)) +
-                              self.cond_mean)
-            conjugate_arg = self.prec_opt.dot(cond_mean_grid)
+            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + self.cond_mean)
+            conjugate_arg = self.cond_precision.dot(cond_mean_grid)
 
             val, _, _ = solver(conjugate_arg,
-                               self.prec_opt,
-                               self.observed_soln,
+                               self.cond_precision,
+                               self.feasible_point,
                                self.linear_part,
                                self.offset,
                                **self.solve_args)
@@ -165,26 +176,25 @@ def _construct_families(self):
         self._families = []
 
         for m in range(self.ntarget):
-            p = self.cov_target_score.shape[1]
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
 
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
             approx_log_ref = self._approx_log_reference(observed_target_uni,
                                                         cov_target_uni,
-                                                        cov_target_score_uni,
+                                                        self.T[m],
                                                         self.stat_grid[m])
 
-
             if self.useIP == False:
+
                 logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
                 logW -= logW.max()
                 self._families.append(discrete_family(self.stat_grid[m],
                                                       np.exp(logW)))
             else:
+
                 approx_fn = interp1d(self.stat_grid[m],
                                      approx_log_ref,
                                      kind='quadratic',
@@ -277,33 +287,40 @@ def _construct_density(self):
         precs = {}
         S = {}
         r = {}
+        T = {}
 
-        p = self.cov_target_score.shape[1]
+        p = self.regress_target_score.shape[1]
 
         for m in range(self.ntarget):
             observed_target_uni = (self.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
             prec_target = 1. / cov_target_uni
-            cov_target_score_uni = self.cov_target_score[m, :].reshape((1, p))
+            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
+
+            T1 = regress_target_score_uni.T.dot(prec_target)
+            T2 = T1.T.dot(self.M2.dot(T1))
+            T3 = T1.T.dot(self.M3.dot(T1))
+            T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
+            T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+
+            _T = self.cond_cov.dot(T5.T)
 
-            regress_score_target = cov_target_score_uni.T.dot(prec_target)
-            resid_score_target = (self.score_offset - regress_score_target.dot(observed_target_uni)).reshape(
-                (regress_score_target.shape[0],))
+            prec_target_nosel = prec_target + T2 - T3
 
-            regress_opt_target = self.regress_opt.dot(regress_score_target)
-            resid_mean_opt_target = (self.cond_mean - regress_opt_target.dot(observed_target_uni)).reshape((regress_opt_target.shape[0],))
+            _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
 
-            prec_target_nosel = prec_target + (regress_score_target.T.dot(regress_score_target) * self.prec_randomizer) - regress_opt_target.T.dot(
-                self.prec_opt).dot(regress_opt_target)
+            bias_target = cov_target_uni.dot(
+                T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
-            _P = regress_score_target.T.dot(resid_score_target) * self.prec_randomizer
-            _r = (1. / _prec).dot(regress_opt_target.T.dot(self.prec_opt).dot(resid_mean_opt_target) - _P)
-            _S = np.linalg.inv(_prec).dot(prec_target)
+            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
+            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
 
             S[m] = _S
             r[m] = _r
             precs[m] = prec_target_nosel
+            T[m] = _T
 
         self.precs = precs
         self.S = S
         self.r = r
+        self.T = T
\ No newline at end of file
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index cf91eb800..df90eacec 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -44,9 +44,9 @@ def __init__(self,
         observed_score = query.observed_score_state + query.observed_subgrad
 
         result, inverse_info, log_ref = query.selective_MLE(observed_target,
-                                                                 cov_target,
-                                                                 regress_target_score,
-                                                                 dispersion)
+                                                            cov_target,
+                                                            regress_target_score,
+                                                            dispersion)
 
         cond_cov = query.cond_cov
         self.cond_precision = np.linalg.inv(cond_cov)
@@ -124,15 +124,12 @@ def summary(self,
     def log_reference(self,
                       observed_target,
                       cov_target,
-                      regress_target_score,
                       linear_coef,
                       grid):
 
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
-        prec_target = np.linalg.inv(cov_target)
-
         ref_hat = []
 
         for k in range(grid.shape[0]):
@@ -202,17 +199,14 @@ def _construct_families(self):
         self._families = []
 
         for m in range(self.ntarget):
-            p = self.regress_target_score.shape[1]
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
 
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
             log_ref = self.log_reference(observed_target_uni,
                                          cov_target_uni,
-                                         regress_target_score_uni,
                                          self.T[m],
                                          self.stat_grid[m])
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index bbfe4b719..5d9458809 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -109,7 +109,8 @@ def test_approx_pivot(n=500,
 
             (observed_target,
              cov_target,
-             cov_target_score,
+             regress_target_score,
+             dispersion,
              alternatives) = selected_targets(conv.loglike,
                                               conv._W,
                                               nonzero,
@@ -118,10 +119,10 @@ def test_approx_pivot(n=500,
             approximate_grid_inf = approximate_grid_inference(conv,
                                                               observed_target,
                                                               cov_target,
-                                                              cov_target_score,
+                                                              regress_target_score,
+                                                              dispersion=dispersion,
                                                               useIP=useIP)
 
-
             pivot = approximate_grid_inf._approx_pivots(beta_target)
 
             return pivot
@@ -134,7 +135,8 @@ def test_approx_ci(n=500,
                    sigma=2.,
                    rho=0.4,
                    randomizer_scale=1.,
-                   level=0.9):
+                   level=0.9,
+                   useIP=False):
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
@@ -167,7 +169,8 @@ def test_approx_ci(n=500,
 
         (observed_target,
          cov_target,
-         cov_target_score,
+         regress_target_score,
+         dispersion,
          alternatives) = selected_targets(conv.loglike,
                                           conv._W,
                                           nonzero,
@@ -175,7 +178,8 @@ def test_approx_ci(n=500,
 
         result, inverse_info = conv.selective_MLE(observed_target,
                                                   cov_target,
-                                                  cov_target_score)[:2]
+                                                  regress_target_score,
+                                                  dispersion)[:2]
 
         _scale = 4 * np.sqrt(np.diag(inverse_info))
         scale_ = np.max(_scale)
@@ -184,15 +188,12 @@ def test_approx_ci(n=500,
         approximate_grid_inf = approximate_grid_inference(conv,
                                                           observed_target,
                                                           cov_target,
-                                                          cov_target_score,
-                                                          useIP=False)
+                                                          regress_target_score,
+                                                          dispersion=dispersion,
+                                                          useIP=useIP)
 
         lci, uci = approximate_grid_inf._approx_intervals(level)
 
-        S = conv.approximate_grid_inference(observed_target,
-                                            cov_target,
-                                            cov_target_score)
-
     beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
     coverage = (lci < beta_target) * (uci > beta_target)
     length = uci - lci
@@ -214,7 +215,7 @@ def main(nsim=300, CI = False):
             _pivot.extend(test_approx_pivot(n=100,
                                             p=400,
                                             signal_fac=0.5,
-                                            s=0,
+                                            s=5,
                                             sigma=2.,
                                             rho=0.30,
                                             randomizer_scale=1.,
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 2f3e4ca66..ef66cc963 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -79,155 +79,157 @@ def test_inf(n=500,
                 mle_length = 1.65*2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
                 return np.mean(coverage), np.mean(length), np.mean(mle_length)
 
-# def main(nsim=300, CI = False):
-#
-#     if CI is False:
-#
-#         import matplotlib as mpl
-#         mpl.use('tkagg')
-#         import matplotlib.pyplot as plt
-#         from statsmodels.distributions.empirical_distribution import ECDF
-#
-#         _pivot = []
-#         for i in range(nsim):
-#             _pivot.extend(test_inf(n=400,
-#                                    p=100,
-#                                    signal_fac=1.,
-#                                    s=0,
-#                                    sigma=2.,
-#                                    rho=0.30,
-#                                    randomizer_scale=0.5,
-#                                    equicorrelated=True,
-#                                    CI=False))
-#
-#             print("iteration completed ", i)
-#
-#         plt.clf()
-#         ecdf_pivot = ECDF(np.asarray(_pivot))
-#         grid = np.linspace(0, 1, 101)
-#         plt.plot(grid, ecdf_pivot(grid), c='blue')
-#         plt.plot(grid, grid, 'k--')
-#         plt.show()
-#
-#     else:
-#         coverage_ = 0.
-#         length_ = 0.
-#         mle_length_= 0.
-#         for n in range(nsim):
-#             cov, len, mle_len = test_inf(n=400,
-#                                          p=100,
-#                                          signal_fac=0.5,
-#                                          s=5,
-#                                          sigma=2.,
-#                                          rho=0.30,
-#                                          randomizer_scale=0.7,
-#                                          equicorrelated=True,
-#                                          CI=True)
-#
-#             coverage_ += cov
-#             length_ += len
-#             mle_length_ += mle_len
-#             print("coverage so far ", coverage_ / (n + 1.))
-#             print("lengths so far ", length_ / (n + 1.), mle_length_/ (n + 1.))
-#             print("iteration completed ", n + 1)
-
-
-def test_selected_instance(seedn,
-                           n=500,
-                           p=100,
-                           signal_fac=1.,
-                           s=5,
-                           sigma=2.,
-                           rho=0.4,
-                           randomizer_scale=1.,
-                           equicorrelated=False):
-
-    while True:
-        np.random.seed(seedn)
-        inst, const = gaussian_instance, lasso.gaussian
-        signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=equicorrelated,
-                          rho=rho,
-                          sigma=sigma,
-                          random_signs=True)[:3]
-
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-
-        if n > (2 * p):
-            dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-        else:
-            dispersion = sigma_ ** 2
-
-        eps = np.random.standard_normal((n, 2000)) * Y.std()
-        W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
-
-        conv = const(X,
-                     Y,
-                     W,
-                     ridge_term=0.,
-                     randomizer_scale=randomizer_scale * np.sqrt(dispersion))
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("size of selected set ", nonzero.sum())
-
-        if nonzero.sum() > 0:
-            beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-
-            (observed_target,
-             cov_target,
-             regress_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
-                                              dispersion=dispersion)
-
-            exact_grid_inf = exact_grid_inference(conv,
-                                                  observed_target,
-                                                  cov_target,
-                                                  regress_target_score,
-                                                  dispersion=dispersion)
-
-            lci, uci = exact_grid_inf._intervals(level=0.90)
-            coverage = (lci < beta_target) * (uci > beta_target)
-            length = uci - lci
-            mle_length = 1.65 * 2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
-            return coverage, length, mle_length
-
-def main(nsim =50):
-
-    import pandas as pd
-    column_names = ["Experiment Replicate", "Coverage", "Length-ER", "Length-MLE"]
-    master_DF = pd.DataFrame(columns=column_names)
-    DF = pd.DataFrame(columns=column_names)
-
-    n, p, s = 500, 100, 5
-    for i in range(nsim):
-        full_dispersion = True
-        cov, len_er, len_mle = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2)
-        DF["Coverage"] = pd.Series(cov)
-        DF["Length-ER"] = pd.Series(len_er)
-        DF["Length-MLE"] = pd.Series(len_mle)
-        DF["Experiment Replicate"] = pd.Series((i*np.ones(len(cov),int)).tolist())
-
-        master_DF = DF.append(master_DF, ignore_index=True)
-
-    import os
-    outpath = os.path.dirname(__file__)
-
-    outfile_mse_html = os.path.join(outpath, "compare_er.html")
-    outfile_mse_csv = os.path.join(outpath, "compare_er.csv")
-
-    master_DF.to_html(outfile_mse_html, index=False)
-    master_DF.to_csv(outfile_mse_csv, index=False)
+def main(nsim=300, CI = False):
+
+    if CI is False:
+
+        import matplotlib as mpl
+        mpl.use('tkagg')
+        import matplotlib.pyplot as plt
+        from statsmodels.distributions.empirical_distribution import ECDF
+
+        _pivot = []
+        for i in range(nsim):
+            _pivot.extend(test_inf(n=400,
+                                   p=100,
+                                   signal_fac=1.,
+                                   s=0,
+                                   sigma=2.,
+                                   rho=0.30,
+                                   randomizer_scale=0.5,
+                                   equicorrelated=True,
+                                   CI=False))
+
+            print("iteration completed ", i)
+
+        plt.clf()
+        ecdf_pivot = ECDF(np.asarray(_pivot))
+        grid = np.linspace(0, 1, 101)
+        plt.plot(grid, ecdf_pivot(grid), c='blue')
+        plt.plot(grid, grid, 'k--')
+        plt.show()
+
+    else:
+        coverage_ = 0.
+        length_ = 0.
+        mle_length_= 0.
+        for n in range(nsim):
+            cov, len, mle_len = test_inf(n=400,
+                                         p=100,
+                                         signal_fac=1,
+                                         s=0,
+                                         sigma=2.,
+                                         rho=0.30,
+                                         randomizer_scale=0.5,
+                                         equicorrelated=True,
+                                         CI=True)
+
+            coverage_ += cov
+            length_ += len
+            mle_length_ += mle_len
+            print("coverage so far ", coverage_ / (n + 1.))
+            print("lengths so far ", length_ / (n + 1.), mle_length_/ (n + 1.))
+            print("iteration completed ", n + 1)
 
 if __name__ == "__main__":
-    main(nsim=10)
\ No newline at end of file
+    main(nsim=500, CI=False)
+
+# def test_selected_instance(seedn,
+#                            n=500,
+#                            p=100,
+#                            signal_fac=1.,
+#                            s=5,
+#                            sigma=2.,
+#                            rho=0.4,
+#                            randomizer_scale=1.,
+#                            equicorrelated=False):
+#
+#     while True:
+#         np.random.seed(seedn)
+#         inst, const = gaussian_instance, lasso.gaussian
+#         signal = np.sqrt(signal_fac * 2 * np.log(p))
+#
+#         X, Y, beta = inst(n=n,
+#                           p=p,
+#                           signal=signal,
+#                           s=s,
+#                           equicorrelated=equicorrelated,
+#                           rho=rho,
+#                           sigma=sigma,
+#                           random_signs=True)[:3]
+#
+#         n, p = X.shape
+#
+#         sigma_ = np.std(Y)
+#
+#         if n > (2 * p):
+#             dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+#         else:
+#             dispersion = sigma_ ** 2
+#
+#         eps = np.random.standard_normal((n, 2000)) * Y.std()
+#         W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
+#
+#         conv = const(X,
+#                      Y,
+#                      W,
+#                      ridge_term=0.,
+#                      randomizer_scale=randomizer_scale * np.sqrt(dispersion))
+#
+#         signs = conv.fit()
+#         nonzero = signs != 0
+#         print("size of selected set ", nonzero.sum())
+#
+#         if nonzero.sum() > 0:
+#             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+#
+#             (observed_target,
+#              cov_target,
+#              regress_target_score,
+#              dispersion,
+#              alternatives) = selected_targets(conv.loglike,
+#                                               conv._W,
+#                                               nonzero,
+#                                               dispersion=dispersion)
+#
+#             exact_grid_inf = exact_grid_inference(conv,
+#                                                   observed_target,
+#                                                   cov_target,
+#                                                   regress_target_score,
+#                                                   dispersion=dispersion)
+#
+#             lci, uci = exact_grid_inf._intervals(level=0.90)
+#             coverage = (lci < beta_target) * (uci > beta_target)
+#             length = uci - lci
+#             mle_length = 1.65 * 2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
+#             return coverage, length, mle_length
+#
+# def main(nsim =50):
+#
+#     import pandas as pd
+#     column_names = ["Experiment Replicate", "Coverage", "Length-ER", "Length-MLE"]
+#     master_DF = pd.DataFrame(columns=column_names)
+#     DF = pd.DataFrame(columns=column_names)
+#
+#     n, p, s = 500, 100, 5
+#     for i in range(nsim):
+#         full_dispersion = True
+#         cov, len_er, len_mle = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2)
+#         DF["Coverage"] = pd.Series(cov)
+#         DF["Length-ER"] = pd.Series(len_er)
+#         DF["Length-MLE"] = pd.Series(len_mle)
+#         DF["Experiment Replicate"] = pd.Series((i*np.ones(len(cov),int)).tolist())
+#
+#         master_DF = DF.append(master_DF, ignore_index=True)
+#
+#     import os
+#     outpath = os.path.dirname(__file__)
+#
+#     outfile_mse_html = os.path.join(outpath, "compare_er.html")
+#     outfile_mse_csv = os.path.join(outpath, "compare_er.csv")
+#
+#     master_DF.to_html(outfile_mse_html, index=False)
+#     master_DF.to_csv(outfile_mse_csv, index=False)
+#
+# if __name__ == "__main__":
+#     main(nsim=10)
\ No newline at end of file

From bb2802caee33cd17550de47adbd289904a1d139f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 10 Aug 2021 10:23:50 -0400
Subject: [PATCH 132/187] clean up for all the tests

---
 selectinf/randomized/approx_reference.py      |   1 -
 selectinf/randomized/posterior_inference.py   |   1 -
 .../randomized/tests/test_approx_reference.py | 199 ++++-------------
 .../randomized/tests/test_exact_reference.py  | 104 +--------
 selectinf/randomized/tests/test_posterior.py  |  49 +++--
 .../tests/test_selective_MLE_high.py          | 206 ++++--------------
 6 files changed, 118 insertions(+), 442 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index f60f7b2a8..ac9868136 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -243,7 +243,6 @@ def _approx_pivots(self,
             var_target = 1. / ((self.precs[m])[0, 0])
 
             mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
-            #print("mean ", np.allclose(mean[0], mean_parameter[m]), self.r[m], self.S[m])
             # construction of pivot from families follows `selectinf.learning.core`
 
             _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index c8a594ddf..4395e4499 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -162,7 +162,6 @@ def _set_marginal_parameters(self):
         self.r = r
         self.S = S
         self.prec_target_nosel = prec_target_nosel
-        # print("check parameters for selected+lasso ", np.allclose(np.diag(S), np.ones(S.shape[0])), np.allclose(r, np.zeros(r.shape[0])))
 
 ### sampling methods
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 5d9458809..839cee8ca 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -4,70 +4,16 @@
 from ..lasso import lasso, selected_targets
 from ..approx_reference import approximate_grid_inference
 
-def test_summary(n=500,
-                 p=100,
-                 signal_fac=1.,
-                 s=5,
-                 sigma=2.,
-                 rho=0.4,
-                 randomizer_scale=1.):
-
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
-
-    n, p = X.shape
-
-    sigma_ = np.std(Y)
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-    conv = const(X,
-                 Y,
-                 W,
-                 randomizer_scale=randomizer_scale * dispersion)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-
-    if nonzero.sum()>0:
-        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero,
-                                          dispersion=dispersion)
-
-        inverse_info = conv.selective_MLE(observed_target,
-                                          cov_target,
-                                          cov_target_score)[1]
-        
-        S = conv.approximate_grid_inference(observed_target,
-                                            cov_target,
-                                            cov_target_score,
-                                            alternatives=alternatives)
-
-def test_approx_pivot(n=500,
-                      p=100,
-                      signal_fac=1.,
-                      s=5,
-                      sigma=2.,
-                      rho=0.4,
-                      randomizer_scale=1.,
-                      equicorrelated=False,
-                      useIP=False):
+def test_inf(n=500,
+             p=100,
+             signal_fac=1.,
+             s=5,
+             sigma=2.,
+             rho=0.4,
+             randomizer_scale=1.,
+             equicorrelated=False,
+             useIP=False,
+             CI=False):
 
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
@@ -97,8 +43,8 @@ def test_approx_pivot(n=500,
         conv = const(X,
                      Y,
                      W,
-                     ridge_term=0.)
-                     #randomizer_scale=randomizer_scale * sigma_)
+                     ridge_term=0.,
+                     randomizer_scale=randomizer_scale * sigma_)
 
         signs = conv.fit()
         nonzero = signs != 0
@@ -123,84 +69,17 @@ def test_approx_pivot(n=500,
                                                               dispersion=dispersion,
                                                               useIP=useIP)
 
-            pivot = approximate_grid_inf._approx_pivots(beta_target)
-
-            return pivot
-
-
-def test_approx_ci(n=500,
-                   p=100,
-                   signal_fac=1.,
-                   s=5,
-                   sigma=2.,
-                   rho=0.4,
-                   randomizer_scale=1.,
-                   level=0.9,
-                   useIP=False):
-
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
-
-    X, Y, beta = inst(n=n,
-                      p=p,
-                      signal=signal,
-                      s=s,
-                      equicorrelated=False,
-                      rho=rho,
-                      sigma=sigma,
-                      random_signs=True)[:3]
-
-    n, p = X.shape
-
-    sigma_ = np.std(Y)
-    dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-    conv = const(X,
-                 Y,
-                 W,
-                 randomizer_scale=randomizer_scale * sigma_)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-
-    if nonzero.sum()>0:
-
-        (observed_target,
-         cov_target,
-         regress_target_score,
-         dispersion,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero,
-                                          dispersion=dispersion)
-
-        result, inverse_info = conv.selective_MLE(observed_target,
-                                                  cov_target,
-                                                  regress_target_score,
-                                                  dispersion)[:2]
-
-        _scale = 4 * np.sqrt(np.diag(inverse_info))
-        scale_ = np.max(_scale)
-        ngrid = int(2 * scale_/0.1)
-
-        approximate_grid_inf = approximate_grid_inference(conv,
-                                                          observed_target,
-                                                          cov_target,
-                                                          regress_target_score,
-                                                          dispersion=dispersion,
-                                                          useIP=useIP)
-
-        lci, uci = approximate_grid_inf._approx_intervals(level)
-
-    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-    coverage = (lci < beta_target) * (uci > beta_target)
-    length = uci - lci
-
-    return np.mean(coverage), np.mean(length)
+            if CI is False:
+                pivot = approximate_grid_inf._approx_pivots(beta_target)
 
+                return pivot
+            else:
+                lci, uci = approximate_grid_inf._approx_intervals(level=0.90)
+                beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+                coverage = (lci < beta_target) * (uci > beta_target)
+                length = uci - lci
 
+                return np.mean(coverage), np.mean(length)
 
 def main(nsim=300, CI = False):
 
@@ -212,15 +91,16 @@ def main(nsim=300, CI = False):
     if CI is False:
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_approx_pivot(n=100,
-                                            p=400,
-                                            signal_fac=0.5,
-                                            s=5,
-                                            sigma=2.,
-                                            rho=0.30,
-                                            randomizer_scale=1.,
-                                            equicorrelated=True,
-                                            useIP=True))
+            _pivot.extend(test_inf(n=100,
+                                   p=400,
+                                   signal_fac=0.5,
+                                   s=5,
+                                   sigma=2.,
+                                   rho=0.30,
+                                   randomizer_scale=1.,
+                                   equicorrelated=True,
+                                   useIP=True,
+                                   CI=False))
 
             print("iteration completed ", i)
 
@@ -235,13 +115,16 @@ def main(nsim=300, CI = False):
         coverage_ = 0.
         length_ = 0.
         for n in range(nsim):
-            cov, len = test_approx_ci(n=500,
-                                      p=100,
-                                      signal_fac=1.,
-                                      s=5,
-                                      sigma=3.,
-                                      rho=0.4,
-                                      randomizer_scale=1.)
+            cov, len = test_inf(n=100,
+                                p=400,
+                                signal_fac=0.5,
+                                s=5,
+                                sigma=2.,
+                                rho=0.30,
+                                randomizer_scale=1.,
+                                equicorrelated=True,
+                                useIP=True,
+                                CI=True)
 
             coverage_ += cov
             length_ += len
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index ef66cc963..74e2b272e 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -90,8 +90,8 @@ def main(nsim=300, CI = False):
 
         _pivot = []
         for i in range(nsim):
-            _pivot.extend(test_inf(n=400,
-                                   p=100,
+            _pivot.extend(test_inf(n=100,
+                                   p=400,
                                    signal_fac=1.,
                                    s=0,
                                    sigma=2.,
@@ -133,103 +133,3 @@ def main(nsim=300, CI = False):
 
 if __name__ == "__main__":
     main(nsim=500, CI=False)
-
-# def test_selected_instance(seedn,
-#                            n=500,
-#                            p=100,
-#                            signal_fac=1.,
-#                            s=5,
-#                            sigma=2.,
-#                            rho=0.4,
-#                            randomizer_scale=1.,
-#                            equicorrelated=False):
-#
-#     while True:
-#         np.random.seed(seedn)
-#         inst, const = gaussian_instance, lasso.gaussian
-#         signal = np.sqrt(signal_fac * 2 * np.log(p))
-#
-#         X, Y, beta = inst(n=n,
-#                           p=p,
-#                           signal=signal,
-#                           s=s,
-#                           equicorrelated=equicorrelated,
-#                           rho=rho,
-#                           sigma=sigma,
-#                           random_signs=True)[:3]
-#
-#         n, p = X.shape
-#
-#         sigma_ = np.std(Y)
-#
-#         if n > (2 * p):
-#             dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-#         else:
-#             dispersion = sigma_ ** 2
-#
-#         eps = np.random.standard_normal((n, 2000)) * Y.std()
-#         W = 0.7 * np.median(np.abs(X.T.dot(eps)).max(1))
-#
-#         conv = const(X,
-#                      Y,
-#                      W,
-#                      ridge_term=0.,
-#                      randomizer_scale=randomizer_scale * np.sqrt(dispersion))
-#
-#         signs = conv.fit()
-#         nonzero = signs != 0
-#         print("size of selected set ", nonzero.sum())
-#
-#         if nonzero.sum() > 0:
-#             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
-#
-#             (observed_target,
-#              cov_target,
-#              regress_target_score,
-#              dispersion,
-#              alternatives) = selected_targets(conv.loglike,
-#                                               conv._W,
-#                                               nonzero,
-#                                               dispersion=dispersion)
-#
-#             exact_grid_inf = exact_grid_inference(conv,
-#                                                   observed_target,
-#                                                   cov_target,
-#                                                   regress_target_score,
-#                                                   dispersion=dispersion)
-#
-#             lci, uci = exact_grid_inf._intervals(level=0.90)
-#             coverage = (lci < beta_target) * (uci > beta_target)
-#             length = uci - lci
-#             mle_length = 1.65 * 2 * np.sqrt(np.diag(exact_grid_inf.inverse_info))
-#             return coverage, length, mle_length
-#
-# def main(nsim =50):
-#
-#     import pandas as pd
-#     column_names = ["Experiment Replicate", "Coverage", "Length-ER", "Length-MLE"]
-#     master_DF = pd.DataFrame(columns=column_names)
-#     DF = pd.DataFrame(columns=column_names)
-#
-#     n, p, s = 500, 100, 5
-#     for i in range(nsim):
-#         full_dispersion = True
-#         cov, len_er, len_mle = test_selected_instance(seedn=i, n=n, p=p, s=s, signal_fac=1.2)
-#         DF["Coverage"] = pd.Series(cov)
-#         DF["Length-ER"] = pd.Series(len_er)
-#         DF["Length-MLE"] = pd.Series(len_mle)
-#         DF["Experiment Replicate"] = pd.Series((i*np.ones(len(cov),int)).tolist())
-#
-#         master_DF = DF.append(master_DF, ignore_index=True)
-#
-#     import os
-#     outpath = os.path.dirname(__file__)
-#
-#     outfile_mse_html = os.path.join(outpath, "compare_er.html")
-#     outfile_mse_csv = os.path.join(outpath, "compare_er.csv")
-#
-#     master_DF.to_html(outfile_mse_html, index=False)
-#     master_DF.to_csv(outfile_mse_csv, index=False)
-#
-# if __name__ == "__main__":
-#     main(nsim=10)
\ No newline at end of file
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 1d931d915..e7e410512 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -115,17 +115,19 @@ def test_instance(nsample=100, nburnin=50):
     M = E.copy()
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+
     (observed_target,
      cov_target,
-     cov_target_score,
-     alternatives) = selected_targets(L.loglike,
+     regress_target_score,
+     dispersion,
+     alternatives)= selected_targets(L.loglike,
                                       L._W,
                                       M,
                                       dispersion=dispersion)
 
     posterior_inf = L.posterior(observed_target,
                                 cov_target,
-                                cov_target_score,
+                                regress_target_score,
                                 dispersion=dispersion)
 
     samples = langevin_sampler(posterior_inf,
@@ -163,9 +165,11 @@ def test_flexible_prior1(nsample=100, nburnin=50):
     M = E.copy()
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+
     (observed_target,
      cov_target,
-     cov_target_score,
+     regress_target_score,
+     dispersion,
      alternatives) = selected_targets(L.loglike,
                                       L._W,
                                       M,
@@ -181,11 +185,13 @@ def prior(target_parameter):
     seed_state = np.random.get_state()
     np.random.set_state(seed_state)
     Z1 = np.random.standard_normal()
+
     posterior_inf1 = L.posterior(observed_target,
-                                 cov_target,
-                                 cov_target_score,
-                                 dispersion=dispersion,
-                                 prior=prior)
+                                cov_target,
+                                regress_target_score,
+                                dispersion=dispersion,
+                                prior=prior)
+
     W1 = np.random.standard_normal()
     samples1 = langevin_sampler(posterior_inf1,
                                 nsample=nsample,
@@ -195,8 +201,9 @@ def prior(target_parameter):
     Z2 = np.random.standard_normal()
     posterior_inf2 = L.posterior(observed_target,
                                  cov_target,
-                                 cov_target_score,
+                                 regress_target_score,
                                  dispersion=dispersion)
+
     W2 = np.random.standard_normal()
     samples2 = langevin_sampler(posterior_inf2,
                                 nsample=nsample,
@@ -222,9 +229,11 @@ def test_flexible_prior2(nsample=1000, nburnin=50):
     M = E.copy()
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+
     (observed_target,
      cov_target,
-     cov_target_score,
+     regress_target_score,
+     dispersion,
      alternatives) = selected_targets(L.loglike,
                                       L._W,
                                       M,
@@ -238,10 +247,11 @@ def prior(target_parameter):
         return log_prior, grad_prior
 
     posterior_inf = L.posterior(observed_target,
-                                cov_target,
-                                cov_target_score,
-                                dispersion=dispersion,
-                                prior=prior)
+                                 cov_target,
+                                 regress_target_score,
+                                 dispersion=dispersion,
+                                 prior=prior)
+
     adaptive_proposal = np.linalg.inv(np.linalg.inv(posterior_inf.inverse_info) +
                                       np.identity(posterior_inf.inverse_info.shape[0]) / 0.05 ** 2)
     samples = langevin_sampler(posterior_inf,
@@ -285,7 +295,8 @@ def test_hiv_data(nsample=10000,
 
     (observed_target,
      cov_target,
-     cov_target_score,
+     regress_target_score,
+     dispersion,
      alternatives) = selected_targets(conv.loglike,
                                       conv._W,
                                       nonzero,
@@ -293,17 +304,19 @@ def test_hiv_data(nsample=10000,
 
     mle, inverse_info = conv.selective_MLE(observed_target,
                                            cov_target,
-                                           cov_target_score,
+                                           regress_target_score,
+                                           dispersion,
                                            level=level,
                                            solve_args={'tol': 1.e-12})[:2]
 
     approx_inf = conv.approximate_grid_inference(observed_target,
                                                  cov_target,
-                                                 cov_target_score)
+                                                 regress_target_score,
+                                                 dispersion)
 
     posterior_inf = conv.posterior(observed_target,
                                    cov_target,
-                                   cov_target_score,
+                                   regress_target_score,
                                    dispersion=dispersion)
 
     samples_langevin = langevin_sampler(posterior_inf,
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 5753ba668..0d9ea51ac 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -66,7 +66,8 @@ def test_full_targets(n=200,
             if n > p:
                 (observed_target,
                  cov_target,
-                 cov_target_score,
+                 regress_target_score,
+                 dispersion,
                  alternatives) = full_targets(conv.loglike,
                                               conv._W,
                                               nonzero,
@@ -74,7 +75,8 @@ def test_full_targets(n=200,
             else:
                 (observed_target,
                  cov_target,
-                 cov_target_score,
+                 regress_target_score,
+                 dispersion,
                  alternatives) = debiased_targets(conv.loglike,
                                                   conv._W,
                                                   nonzero,
@@ -83,7 +85,9 @@ def test_full_targets(n=200,
 
             result = conv.selective_MLE(observed_target,
                                         cov_target,
-                                        cov_target_score)[0]
+                                        regress_target_score,
+                                        dispersion)[0]
+
             pval = result['pvalue']
             estimate = result['MLE']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -144,7 +148,8 @@ def test_selected_targets(n=2000,
 
             (observed_target,
              cov_target,
-             cov_target_score,
+             regress_target_score,
+             dispersion,
              alternatives) = selected_targets(conv.loglike,
                                               conv._W,
                                               nonzero,
@@ -152,8 +157,9 @@ def test_selected_targets(n=2000,
 
             result = conv.selective_MLE(observed_target,
                                         cov_target,
-                                        cov_target_score)[0]
-            estimate = result['MLE']
+                                        regress_target_score,
+                                        dispersion)[0]
+
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
 
@@ -183,7 +189,8 @@ def test_instance():
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
     (observed_target,
      cov_target,
-     cov_target_score,
+     regress_target_score,
+     dispersion,
      alternatives) = selected_targets(L.loglike,
                                       L._W,
                                       M,
@@ -193,16 +200,14 @@ def test_instance():
 
     result = L.selective_MLE(observed_target,
                              cov_target,
-                             cov_target_score)[0]
-    estimate = result['MLE']
-    pval = result['pvalue']
+                             regress_target_score,
+                             dispersion)[0]
+
     intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
 
     beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
 
     coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
-    print("observed_opt_state ", L.observed_opt_state)
-    # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
 
     return coverage
 
@@ -219,7 +224,6 @@ def test_instance():
 
 def test_selected_targets_disperse(n=500,
                                    p=100,
-                                   signal_fac=1.,
                                    s=5,
                                    sigma=1.,
                                    rho=0.4,
@@ -242,10 +246,6 @@ def test_selected_targets_disperse(n=500,
                           sigma=sigma,
                           random_signs=True)[:3]
 
-        idx = np.arange(p)
-        sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
-        print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))
-
         n, p = X.shape
 
         sigma_ = np.std(Y)
@@ -267,7 +267,8 @@ def test_selected_targets_disperse(n=500,
 
             (observed_target,
              cov_target,
-             cov_target_score,
+             regress_target_score,
+             dispersion,
              alternatives) = selected_targets(conv.loglike,
                                               conv._W,
                                               nonzero,
@@ -275,7 +276,8 @@ def test_selected_targets_disperse(n=500,
 
             result = conv.selective_MLE(observed_target,
                                         cov_target,
-                                        cov_target_score)[0]
+                                        regress_target_score,
+                                        dispersion)[0]
 
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -287,150 +289,30 @@ def test_selected_targets_disperse(n=500,
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
-# def main(nsim=500, full=False):
-#     P0, PA, cover, length_int = [], [], [], []
-#     from statsmodels.distributions import ECDF
-#
-#     n, p, s = 500, 100, 0
-#
-#     for i in range(nsim):
-#         if full:
-#             if n > p:
-#                 full_dispersion = True
-#             else:
-#                 full_dispersion = False
-#             p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-#             avg_length = intervals[:, 1] - intervals[:, 0]
-#         else:
-#             full_dispersion = True
-#             p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-#             avg_length = intervals[:, 1] - intervals[:, 0]
-#
-#         cover.extend(cover_)
-#         P0.extend(p0)
-#         PA.extend(pA)
-#         # print(
-#         #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
-#         #     np.mean(avg_length), 'null pvalue + power + length')
-#         print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
-
-
-def test_mle_inference(seedn,
-                       n=2000,
-                       p=200,
-                       signal_fac=1.2,
-                       s=5,
-                       sigma=2,
-                       rho=0.7,
-                       randomizer_scale=1.,
-                       full_dispersion=True,
-                       full=False):
-    """
-    Compare to R randomized lasso
-    """
+def main(nsim=500, full=False):
+    P0, PA, cover, length_int = [], [], [], []
+    from statsmodels.distributions import ECDF
 
-    inst, const = gaussian_instance, lasso.gaussian
-    signal = np.sqrt(signal_fac * 2 * np.log(p))
+    n, p, s = 500, 100, 0
 
-    while True:
-        np.random.seed(seed=seedn)
-        X, Y, beta = inst(n=n,
-                          p=p,
-                          signal=signal,
-                          s=s,
-                          equicorrelated=True,
-                          rho=rho,
-                          sigma=sigma,
-                          random_signs=True)[:3]
-
-        idx = np.arange(p)
-        sigmaX = rho ** np.abs(np.subtract.outer(idx, idx))
-        snr = beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n)
-        print("snr", beta.T.dot(sigmaX).dot(beta) / ((sigma ** 2.) * n))
-
-        n, p = X.shape
-
-        sigma_ = np.std(Y)
-        W = 0.8 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
-
-        conv = const(X,
-                     Y,
-                     W,
-                     #ridge_term=0.,
-                     randomizer_scale=randomizer_scale * sigma_)
-
-        signs = conv.fit()
-        nonzero = signs != 0
-        print("dimensions", n, p, nonzero.sum())
-
-        if nonzero.sum() > 0:
-            dispersion = None
-            if full_dispersion:
-                dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
-
-            if full:
-                (observed_target,
-                 cov_target,
-                 cov_target_score,
-                 dispersion,
-                 alternatives) = full_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
-                                              dispersion=dispersion)
-
-            else:
-                (observed_target,
-                 cov_target,
-                 cov_target_score,
-                 dispersion,
-                 alternatives) = selected_targets(conv.loglike,
-                                                  conv._W,
-                                                  nonzero,
-                                                  dispersion=dispersion)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score,
-                                        dispersion)[0]
-
-            return result['MLE'], result['lower_confidence'], result['upper_confidence'], snr
-
-def main(nsim =50):
-
-    import pandas as pd
-    column_names = ["Experiment Replicate", "MLE", "Lower Conf", "Upper Conf", "SNR"]
-    master_DF = pd.DataFrame(columns=column_names)
-    DF = pd.DataFrame(columns=column_names)
-
-    n, p, s = 500, 100, 5
     for i in range(nsim):
-        full_dispersion = True
-        mle, lower_conf, upper_conf, snr = test_mle_inference(seedn=i,
-                                                         n=n,
-                                                         p=p,
-                                                         s=s,
-                                                         signal_fac=1.2,
-                                                         full_dispersion=full_dispersion,
-                                                         full=True)
-
-        DF["MLE"] = pd.Series(mle)
-        DF["Lower Conf"] = pd.Series(lower_conf)
-        DF["Upper Conf"] = pd.Series(upper_conf)
-        DF["Experiment Replicate"] = pd.Series((i*np.ones(len(mle),int)).tolist())
-        DF["SNR"] = pd.Series((snr * np.ones(len(mle))).tolist())
-
-        master_DF = DF.append(master_DF, ignore_index=True)
-
-    import os
-    outpath = os.path.dirname(__file__)
-
-    outfile_mse_html = os.path.join(outpath, "compare_mle.html")
-    outfile_mse_csv = os.path.join(outpath, "compare_mle.csv")
-
-    master_DF.to_html(outfile_mse_html, index=False)
-    master_DF.to_csv(outfile_mse_csv, index=False)
-
-if __name__ == "__main__":
-    main(nsim=50)
-
+        if full:
+            if n > p:
+                full_dispersion = True
+            else:
+                full_dispersion = False
+            p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+            avg_length = intervals[:, 1] - intervals[:, 0]
+        else:
+            full_dispersion = True
+            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
+            avg_length = intervals[:, 1] - intervals[:, 0]
+
+        cover.extend(cover_)
+        P0.extend(p0)
+        PA.extend(pA)
+        # print(
+        #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
+        #     np.mean(avg_length), 'null pvalue + power + length')
+        print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
 

From 40ac8ab9ba4db0a7a87e0c9fe7b278816ec73533 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 12:01:43 -0700
Subject: [PATCH 133/187] delete some comments

---
 selectinf/randomized/query.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 31909ac00..14740cb79 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -107,8 +107,6 @@ def _setup_sampler(self,
                        offset,
                        opt_linear,
                        observed_subgrad,
-                       # optional dispersion parameter
-                       # for covariance of randomization
                        dispersion=1):
 
         A, b = linear_part, offset
@@ -161,12 +159,9 @@ def log_density(regress_opt, u, cond_prec, opt, score): # u == subgrad
     def _setup_implied_gaussian(self,
                                 opt_linear,
                                 observed_subgrad,
-                                # optional dispersion parameter
-                                # for covariance of randomization
                                 dispersion=1):
 
         cov_rand, prec = self.randomizer.cov_prec
-        prec = prec / dispersion # why do we do this here -- prec is just known
 
         if np.asarray(prec).shape in [(), (0,)]:
             _prod_score_prec_unnorm = self._hessian * prec

From b57198500607d4bceba0fe5fcc8425b3e04e5da6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 13:45:37 -0700
Subject: [PATCH 134/187] suppressing dispersion to _setup_implied_gaussian;
 putting back in tests for logistic, poisson, cox

---
 selectinf/randomized/approx_reference.py      |   6 +-
 selectinf/randomized/exact_reference.py       |   6 +-
 selectinf/randomized/posterior_inference.py   |   6 +-
 selectinf/randomized/query.py                 |  18 +-
 .../tests/test_selective_MLE_high.py          | 591 +++++++++++++++++-
 5 files changed, 576 insertions(+), 51 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 552fa177c..8e2b3009e 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -61,9 +61,9 @@ def __init__(self,
         self.opt_linear = opt_linear
         self.observed_score = observed_score
 
-        self.M1 = query.M1 * dispersion
-        self.M2 = query.M2 * (dispersion ** 2)
-        self.M3 = query.M3 * (dispersion ** 2)
+        self.M1 = query.M1
+        self.M2 = query.M2
+        self.M3 = query.M3
         self.feasible_point = query.observed_opt_state
 
         self.cond_mean = query.cond_mean
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index df90eacec..a81894d8b 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -59,9 +59,9 @@ def __init__(self,
         self.opt_linear = opt_linear
         self.observed_score = observed_score
 
-        self.M1 = query.M1 * dispersion
-        self.M2 = query.M2 * (dispersion ** 2)
-        self.M3 = query.M3 * (dispersion ** 2)
+        self.M1 = query.M1
+        self.M2 = query.M2
+        self.M3 = query.M3
         self.feasible_point = query.observed_opt_state
 
         self.cond_mean = query.cond_mean
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 4395e4499..e1faacc54 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -66,9 +66,9 @@ def __init__(self,
         self.opt_linear = opt_linear
         self.observed_score = observed_score
 
-        self.M1 = query.M1 * dispersion
-        self.M2 = query.M2 * (dispersion ** 2)
-        self.M3 = query.M3 * (dispersion ** 2)
+        self.M1 = query.M1
+        self.M2 = query.M2
+        self.M3 = query.M3
         self.feasible_point = query.observed_opt_state
 
         self.cond_mean = query.cond_mean
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 86d6cdca6..26e0a297f 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -120,7 +120,8 @@ def _setup_sampler(self,
          M1,
          M2,
          M3) = self._setup_implied_gaussian(opt_linear,
-                                            observed_subgrad)
+                                            observed_subgrad,
+                                            dispersion=dispersion)
 
         def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
             if score.ndim == 1:
@@ -157,7 +158,8 @@ def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
 
     def _setup_implied_gaussian(self,
                                 opt_linear,
-                                observed_subgrad):
+                                observed_subgrad,
+                                dispersion=1):
 
         cov_rand, prec = self.randomizer.cov_prec
 
@@ -179,9 +181,9 @@ def _setup_implied_gaussian(self,
 
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
-        M1 = prod_score_prec_unnorm
-        M2 = M1.dot(cov_rand).dot(M1.T)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T)
+        M1 = prod_score_prec_unnorm * dispersion
+        M2 = M1.dot(cov_rand).dot(M1.T) * (dispersion**2) 
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) * (dispersion**2)
 
         self.M1 = M1
         self.M2 = M2
@@ -978,9 +980,9 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
-        self.M1 = self.M1 * dispersion
-        self.M2 = self.M2 * (dispersion**2)
-        self.M3 = self.M3 * (dispersion**2)
+        # self.M1 = self.M1 * dispersion
+        # self.M2 = self.M2 * (dispersion**2)
+        # self.M3 = self.M3 * (dispersion**2)
 
         return selective_MLE(observed_target,
                              cov_target,
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 0d9ea51ac..3ece533c2 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -187,6 +187,7 @@ def test_instance():
     M[-3:] = 1
     print("check ", M)
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+
     (observed_target,
      cov_target,
      regress_target_score,
@@ -212,16 +213,6 @@ def test_instance():
     return coverage
 
 
-# def main(nsim=500):
-#
-#     cover = []
-#     for i in range(nsim):
-#
-#         cover_ = test_instance()
-#         cover.extend(cover_)
-#         print(np.mean(cover), 'coverage so far ')
-
-
 def test_selected_targets_disperse(n=500,
                                    p=100,
                                    s=5,
@@ -289,30 +280,562 @@ def test_selected_targets_disperse(n=500,
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals
 
 
-def main(nsim=500, full=False):
-    P0, PA, cover, length_int = [], [], [], []
-    from statsmodels.distributions import ECDF
+def test_logistic(n=2000, 
+                  p=200, 
+                  signal_fac=10.,
+                  s=5, 
+                  rho=0.4, 
+                  randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on binomial data
+    """
 
-    n, p, s = 500, 100, 0
+    inst, const = logistic_instance, lasso.logistic
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
 
-    for i in range(nsim):
-        if full:
-            if n > p:
-                full_dispersion = True
-            else:
-                full_dispersion = False
-            p0, pA, cover_, intervals = test_full_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-            avg_length = intervals[:, 1] - intervals[:, 0]
-        else:
-            full_dispersion = True
-            p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion)
-            avg_length = intervals[:, 1] - intervals[:, 0]
-
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        # print(
-        #     np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover),
-        #     np.mean(avg_length), 'null pvalue + power + length')
-        print("coverage and lengths ", np.mean(cover), np.mean(avg_length))
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             dispersion,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_logistic_split(n=2000, 
+                        p=200, 
+                        signal_fac=10.,
+                        s=5, 
+                        rho=0.4, 
+                        randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on binomial data with data splitting
+    """
+
+    inst, const = logistic_instance, split_lasso.logistic
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             dispersion,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+        
+def test_poisson(n=2000, 
+                 p=200, 
+                 signal_fac=10.,
+                 s=5, 
+                 rho=0.4, 
+                 randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on Poisson data 
+    """
+
+    inst, const = poisson_instance, lasso.poisson
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             dispersion,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_poisson_split(n=2000, 
+                       p=200, 
+                       signal_fac=10.,
+                       s=5, 
+                       rho=0.4, 
+                       randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on Poisson data with data splitting
+    """
+
+    inst, const = poisson_instance, split_lasso.poisson
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          random_signs=True)[:3]
+
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+
+        conv = const(X,
+                     Y,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            (observed_target,
+             cov_target,
+             cov_target_score,
+             dispersion,
+             alternatives) = selected_targets(conv.loglike,
+                                              conv._W,
+                                              nonzero, 
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_cox(n=2000, 
+             p=200, 
+             signal_fac=10.,
+             s=5, 
+             rho=0.4, 
+             randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on survival data 
+    """
+
+    inst, const = cox_instance, lasso.coxph
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, T, S, beta = inst(n=n,
+                             p=p,
+                             signal=signal,
+                             s=s,
+                             equicorrelated=False,
+                             rho=rho,
+                             random_signs=True)[:4]
+
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) 
+
+        conv = const(X,
+                     T,
+                     S,
+                     W,
+                     randomizer_scale=randomizer_scale)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            cox_full = rr.glm.cox(X, T, S)
+            full_hess = cox_full.hessian(conv.observed_soln)
+
+            (observed_target, 
+             cov_target, 
+             cov_target_score, 
+             dispersion,
+             alternatives) = selected_targets(conv.loglike, 
+                                              None,
+                                              nonzero,
+                                              hessian=full_hess,
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+
+def test_cox_split(n=2000, 
+                   p=200, 
+                   signal_fac=10.,
+                   s=5, 
+                   rho=0.4, 
+                   randomizer_scale=1):
+    """
+    Run approx MLE with selected targets on survival data with data splitting
+    """
+
+    inst, const = cox_instance, split_lasso.coxph
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    while True:
+        X, T, S, beta = inst(n=n,
+                             p=p,
+                             signal=signal,
+                             s=s,
+                             equicorrelated=False,
+                             rho=rho,
+                             random_signs=True)[:4]
+
+        n, p = X.shape
+
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p))
+
+        conv = const(X,
+                     T,
+                     S,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print("dimensions", n, p, nonzero.sum())
+
+        if nonzero.sum() > 0:
+
+            cox_full = rr.glm.cox(X, T, S)
+            full_hess = cox_full.hessian(conv.observed_soln)
+
+            (observed_target, 
+             cov_target, 
+             cov_target_score, 
+             dispersion,
+             alternatives) = selected_targets(conv.loglike, 
+                                              None,
+                                              nonzero,
+                                              hessian=full_hess,
+                                              dispersion=1)
+
+            result = conv.selective_MLE(observed_target,
+                                        cov_target,
+                                        cov_target_score)[0]
+            estimate = result['MLE']
+            pval = result['pvalue']
+            intervals = np.asarray(result[['lower_confidence',
+                                           'upper_confidence']])
+            
+            return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
+        
+def test_scale_invariant_split(n=200, 
+                               p=20, 
+                               signal_fac=10.,
+                               s=5, 
+                               sigma=3, 
+                               rho=0.4, 
+                               randomizer_scale=1,
+                               full_dispersion=True,
+                               seed=2):
+    """
+    Confirm Gaussian version is appropriately scale invariant with data splitting
+    """
+
+    inst, const = gaussian_instance, split_lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    results = []
+
+    scales = [1, 5]
+    for scale in scales:
+
+        np.random.seed(seed)
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        Y *= scale; beta *= scale
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+        print('W', W[0]/scale)
+        conv = const(X,
+                     Y,
+                     W,
+                     proportion=0.7)
+
+        signs = conv.fit()
+        nonzero = signs != 0
+        print('nonzero', np.where(nonzero)[0])
+        print('feature_weights', conv.feature_weights[0] / scale)
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         dispersion,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero, 
+                                          dispersion=dispersion)
+
+        print('dispersion', dispersion/scale**2)
+        print('target', observed_target[0]/scale)
+        print('cov_target', cov_target[0,0]/scale**2)
+        print('cov_target_score',  cov_target_score[0,0]/scale**2)
+        
+        result = conv.selective_MLE(observed_target,
+                                    cov_target,
+                                    cov_target_score)[0]
+
+        print(result['MLE'] / scale)
+        results.append(result)
+
+    assert np.allclose(results[0]['MLE'] / scales[0],
+                       results[1]['MLE'] / scales[1])
+    assert np.allclose(results[0]['SE'] / scales[0],
+                       results[1]['SE'] / scales[1])
+    assert np.allclose(results[0]['upper_confidence'] / scales[0],
+                       results[1]['upper_confidence'] / scales[1])
+    assert np.allclose(results[0]['lower_confidence'] / scales[0],
+                       results[1]['lower_confidence'] / scales[1])
+    assert np.allclose(results[0]['Zvalue'],
+                       results[1]['Zvalue'])
+    assert np.allclose(results[0]['pvalue'],
+                       results[1]['pvalue'])
+
+def test_scale_invariant(n=200, 
+                         p=20, 
+                         signal_fac=10.,
+                         s=5, 
+                         sigma=3, 
+                         rho=0.4, 
+                         randomizer_scale=1,
+                         full_dispersion=True,
+                         seed=2):
+    """
+    Confirm Gaussian version is appropriately scale invariant
+    """
+
+    inst, const = gaussian_instance, lasso.gaussian
+    signal = np.sqrt(signal_fac * 2 * np.log(p))
+
+    results = []
+
+    scales = [1, 5]
+    for scale in scales:
+
+        np.random.seed(seed)
+        X, Y, beta = inst(n=n,
+                          p=p,
+                          signal=signal,
+                          s=s,
+                          equicorrelated=False,
+                          rho=rho,
+                          sigma=sigma,
+                          random_signs=True)[:3]
+
+        Y *= scale; beta *= scale
+        n, p = X.shape
+
+        sigma_ = np.std(Y)
+        W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_
+        print('W', W[0]/scale)
+        conv = const(X,
+                     Y,
+                     W,
+                     randomizer_scale=randomizer_scale * sigma_)
 
+        signs = conv.fit()
+        nonzero = signs != 0
+        print('nonzero', np.where(nonzero)[0])
+        print('feature_weights', conv.feature_weights[0] / scale)
+        print('perturb', conv._initial_omega[0] / scale)
+        dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+
+        (observed_target,
+         cov_target,
+         cov_target_score,
+         dispersion,
+         alternatives) = selected_targets(conv.loglike,
+                                          conv._W,
+                                          nonzero, 
+                                          dispersion=dispersion)
+
+        print('dispersion', dispersion/scale**2)
+        print('target', observed_target[0]/scale)
+        print('cov_target', cov_target[0,0]/scale**2)
+        print('cov_target_score',  cov_target_score[0,0]/scale**2)
+        
+        result = conv.selective_MLE(observed_target,
+                                    cov_target,
+                                    cov_target_score)[0]
+
+        print(result['MLE'] / scale)
+        results.append(result)
+
+    assert np.allclose(results[0]['MLE'] / scales[0],
+                       results[1]['MLE'] / scales[1])
+    assert np.allclose(results[0]['SE'] / scales[0],
+                       results[1]['SE'] / scales[1])
+    assert np.allclose(results[0]['upper_confidence'] / scales[0],
+                       results[1]['upper_confidence'] / scales[1])
+    assert np.allclose(results[0]['lower_confidence'] / scales[0],
+                       results[1]['lower_confidence'] / scales[1])
+    assert np.allclose(results[0]['Zvalue'],
+                       results[1]['Zvalue'])
+    assert np.allclose(results[0]['pvalue'],
+                       results[1]['pvalue'])
+    
+
+def test_instance():
+    n, p, s = 500, 100, 5
+    X = np.random.standard_normal((n, p))
+    beta = np.zeros(p)
+    beta[:s] = np.sqrt(2 * np.log(p) / n)
+    Y = X.dot(beta) + np.random.standard_normal(n)
+
+    scale_ = np.std(Y)
+    # uses noise of variance n * scale_ / 4 by default
+    L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n)))
+    signs = L.fit()
+    E = (signs != 0)
+
+    M = E.copy()
+    M[-3:] = 1
+    print("check ", M)
+    dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
+    (observed_target,
+     cov_target,
+     cov_target_score,
+     dispersion,
+     alternatives) = selected_targets(L.loglike,
+                                      L._W,
+                                      M,
+                                      dispersion=dispersion)
+
+    print("check shapes", observed_target.shape, E.sum())
+
+    result = L.selective_MLE(observed_target,
+                             cov_target,
+                             cov_target_score)[0]
+    estimate = result['MLE']
+    pval = result['pvalue']
+    intervals = np.asarray(result[['lower_confidence',
+                                   'upper_confidence']])
+
+    beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
+
+    coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
+    print("observed_opt_state ", L.observed_opt_state)
+    # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
+
+    return coverage

From f580e93dca6a0bdc62d61f93e58fbfc33704bb91 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 13:47:29 -0700
Subject: [PATCH 135/187] removing dispersion from selective mle

---
 selectinf/randomized/query.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 26e0a297f..500e64d48 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -301,7 +301,6 @@ def selective_MLE(self,
                       observed_target,
                       cov_target,
                       regress_target_score,
-                      dispersion=1,
                       level=0.9,
                       solve_args={'tol': 1.e-12}):
         """
@@ -323,7 +322,7 @@ def selective_MLE(self,
                                           cov_target,
                                           regress_target_score,
                                           self.observed_opt_state,
-                                          dispersion=dispersion,
+#                                          dispersion=dispersion,
                                           level=level,
                                           solve_args=solve_args)
 
@@ -958,7 +957,6 @@ def selective_MLE(self,
                       # used as a feasible point.
                       # precise value used only for independent estimator
                       observed_soln,
-                      dispersion=1,
                       solve_args={'tol': 1.e-12},
                       level=0.9):
         """

From e578753dcbdd5f53ee40a8b7a9d099cb75f92208 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 13:53:41 -0700
Subject: [PATCH 136/187] removing dispersion where possible

---
 selectinf/randomized/approx_reference.py      |  4 +-
 selectinf/randomized/exact_reference.py       |  4 +-
 selectinf/randomized/posterior_inference.py   |  5 +-
 .../randomized/tests/test_approx_reference.py | 51 -------------------
 .../randomized/tests/test_exact_reference.py  |  3 +-
 5 files changed, 5 insertions(+), 62 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 8e2b3009e..5b1e43c19 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -14,7 +14,6 @@ def __init__(self,
                  observed_target,
                  cov_target,
                  regress_target_score,
-                 dispersion=1,
                  solve_args={'tol': 1.e-12},
                  useIP=False):
 
@@ -47,8 +46,7 @@ def __init__(self,
 
         result, inverse_info, log_ref = query.selective_MLE(observed_target,
                                                             cov_target,
-                                                            regress_target_score,
-                                                            dispersion)
+                                                            regress_target_score)
 
         cond_cov = query.cond_cov
         self.cond_precision = np.linalg.inv(cond_cov)
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index a81894d8b..9facaa7fe 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -13,7 +13,6 @@ def __init__(self,
                  observed_target,
                  cov_target,
                  regress_target_score,
-                 dispersion,
                  solve_args={'tol': 1.e-12}):
 
         """
@@ -45,8 +44,7 @@ def __init__(self,
 
         result, inverse_info, log_ref = query.selective_MLE(observed_target,
                                                             cov_target,
-                                                            regress_target_score,
-                                                            dispersion)
+                                                            regress_target_score)
 
         cond_cov = query.cond_cov
         self.cond_precision = np.linalg.inv(cond_cov)
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index e1faacc54..bbab9bd5d 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -47,8 +47,7 @@ def __init__(self,
 
         result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
                                                                  cov_target,
-                                                                 regress_target_score,
-                                                                 dispersion)
+                                                                 regress_target_score)
 
         ### Note for an informative prior we might want to change this...
 
@@ -76,7 +75,7 @@ def __init__(self,
         self.offset = offset
 
         self.initial_estimate = np.asarray(result['MLE'])
-        self.dispersion = dispersion
+        self.dispersion = dispersion # why is this needed?
         self.log_ref = log_ref
 
         self._set_marginal_parameters()
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 80c6c3776..1b08b2235 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -66,7 +66,6 @@ def test_inf(n=500,
                                                               observed_target,
                                                               cov_target,
                                                               regress_target_score,
-                                                              dispersion=dispersion,
                                                               useIP=useIP)
 
             if CI is False:
@@ -81,54 +80,4 @@ def test_inf(n=500,
 
                 return np.mean(coverage), np.mean(length)
 
-def main(nsim=300, CI = False):
-
-    import matplotlib as mpl
-    mpl.use('tkagg')
-    import matplotlib.pyplot as plt
-    from statsmodels.distributions.empirical_distribution import ECDF
-
-    if CI is False:
-        _pivot = []
-        for i in range(nsim):
-            _pivot.extend(test_inf(n=100,
-                                   p=400,
-                                   signal_fac=0.5,
-                                   s=5,
-                                   sigma=2.,
-                                   rho=0.30,
-                                   randomizer_scale=1.,
-                                   equicorrelated=True,
-                                   useIP=True,
-                                   CI=False))
-
-            print("iteration completed ", i)
-
-        plt.clf()
-        ecdf_MLE = ECDF(np.asarray(_pivot))
-        grid = np.linspace(0, 1, 101)
-        plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^')
-        plt.plot(grid, grid, 'k--')
-        plt.show()
-
-    if CI is True:
-        coverage_ = 0.
-        length_ = 0.
-        for n in range(nsim):
-            cov, len = test_inf(n=100,
-                                p=400,
-                                signal_fac=0.5,
-                                s=5,
-                                sigma=2.,
-                                rho=0.30,
-                                randomizer_scale=1.,
-                                equicorrelated=True,
-                                useIP=True,
-                                CI=True)
-
-            coverage_ += cov
-            length_ += len
-            print("coverage so far ", coverage_ / (n + 1.))
-            print("lengths so far ", length_ / (n + 1.))
-            print("iteration completed ", n + 1)
 
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index ce4a41c2d..d8a1e180e 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -65,8 +65,7 @@ def test_inf(n=500,
             exact_grid_inf = exact_grid_inference(conv,
                                                   observed_target,
                                                   cov_target,
-                                                  regress_target_score,
-                                                  dispersion=dispersion)
+                                                  regress_target_score)
 
             if CI is False:
                 pivot = exact_grid_inf._pivots(beta_target)

From 51918b2d7f09dbb06134de972e914b7e52021314 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 16:17:48 -0700
Subject: [PATCH 137/187] WIP: moving targets to base module; fixing slope and
 screening

---
 selectinf/base.py                             | 213 ++++++++++++++++++
 selectinf/randomized/lasso.py                 | 209 ++---------------
 selectinf/randomized/modelQ.py                |   2 +
 selectinf/randomized/query.py                 |  18 +-
 selectinf/randomized/screening.py             |  18 +-
 selectinf/randomized/slope.py                 |  11 +-
 .../randomized/tests/test_exact_reference.py  |   6 +-
 selectinf/randomized/tests/test_lasso.py      | 119 +++++-----
 .../tests/test_marginal_screening.py          |   3 +
 selectinf/randomized/tests/test_modelQ.py     |   2 +-
 selectinf/randomized/tests/test_posterior.py  |  25 +-
 .../tests/test_selective_MLE_high.py          |  88 ++------
 selectinf/randomized/tests/test_slope.py      |  23 +-
 selectinf/randomized/tests/test_topK.py       |   4 +
 14 files changed, 364 insertions(+), 377 deletions(-)

diff --git a/selectinf/base.py b/selectinf/base.py
index dc6db4230..c6ee4ac46 100644
--- a/selectinf/base.py
+++ b/selectinf/base.py
@@ -1,6 +1,11 @@
+import numpy as np
+
 import regreg.api as rr
 import regreg.affine as ra
 
+from .algorithms.debiased_lasso import (debiasing_matrix,
+                                        pseudoinverse_debiasing_matrix)
+
 def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
     """
     Fit a restricted model using only columns `active`.
@@ -35,3 +40,211 @@ def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
     beta_E = loss_restricted.solve(**solve_args)
     
     return beta_E
+
+
+# functions construct targets of inference
+# and covariance with score representation
+
+def selected_targets(loglike, 
+                     solution,
+                     features=None,
+                     sign_info={}, 
+                     dispersion=None,
+                     solve_args={'tol': 1.e-12, 'min_its': 100},
+                     hessian=None):
+
+    if features is None:
+        features = solution != 0
+
+    X, y = loglike.data
+    n, p = X.shape
+
+    observed_target = restricted_estimator(loglike, features, solve_args=solve_args)
+    linpred = X[:, features].dot(observed_target)
+    
+    Hfeat = _compute_hessian(loglike,
+                             solution,
+                             features)[1]
+    Qfeat = Hfeat[features]
+    _score_linear = -Hfeat
+
+    cov_target = np.linalg.inv(Qfeat)
+    crosscov_target_score = _score_linear.dot(cov_target)
+    alternatives = ['twosided'] * features.sum()
+    features_idx = np.arange(p)[features]
+
+    for i in range(len(alternatives)):
+        if features_idx[i] in sign_info.keys():
+            alternatives[i] = sign_info[features_idx[i]]
+
+    if dispersion is None:  # use Pearson's X^2
+        dispersion = _pearsonX2(y,
+                                linpred,
+                                loglike,
+                                observed_target.shape[0])
+
+    regress_target_score = np.zeros((cov_target.shape[0], p))
+    regress_target_score[:,features] = cov_target
+    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
+
+def full_targets(loglike, 
+                 solution,
+                 features=None,
+                 dispersion=None,
+                 solve_args={'tol': 1.e-12, 'min_its': 50},
+                 hessian=None):
+    
+    if features is None:
+        features = solution != 0
+
+    X, y = loglike.data
+    n, p = X.shape
+    features_bool = np.zeros(p, np.bool)
+    features_bool[features] = True
+    features = features_bool
+
+    # target is one-step estimator
+
+    full_estimator = loglike.solve(**solve_args)
+    linpred = X.dot(full_estimator)
+    Qfull = _compute_hessian(loglike,
+                             full_estimator)
+
+    Qfull_inv = np.linalg.inv(Qfull)
+    cov_target = Qfull_inv[features][:, features]
+    observed_target = full_estimator[features]
+    crosscov_target_score = np.zeros((p, cov_target.shape[0]))
+    crosscov_target_score[features] = -np.identity(cov_target.shape[0])
+
+    if dispersion is None:  # use Pearson's X^2
+        dispersion = _pearsonX2(y,
+                                linpred,
+                                loglike,
+                                p)
+
+    alternatives = ['twosided'] * features.sum()
+    regress_target_score = Qfull_inv[features] # weights missing?
+    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
+
+def debiased_targets(loglike, 
+                     solution,
+                     features=None,
+                     sign_info={}, 
+                     penalty=None, #required kwarg
+                     dispersion=None,
+                     approximate_inverse='JM',
+                     debiasing_args={}):
+
+    if features is None:
+        features = solution != 0
+
+    if penalty is None:
+        raise ValueError('require penalty for consistent estimator')
+
+    X, y = loglike.data
+    n, p = X.shape
+    features_bool = np.zeros(p, np.bool)
+    features_bool[features] = True
+    features = features_bool
+
+    # relevant rows of approximate inverse
+
+    linpred = X.dot(solution)
+    W = loglike.saturated_loss.hessian(linpred)
+    if approximate_inverse == 'JM':
+        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(W)[:, None], 
+                                                  np.nonzero(features)[0],
+                                                  **debiasing_args)) / n
+    else:
+        Qinv_hat = np.atleast_2d(pseudoinverse_debiasing_matrix(X * np.sqrt(W)[:, None],
+                                                                np.nonzero(features)[0],
+                                                                **debiasing_args))
+
+    problem = rr.simple_problem(loglike, penalty)
+    nonrand_soln = problem.solve()
+    G_nonrand = loglike.smooth_objective(nonrand_soln, 'grad')
+
+    observed_target = nonrand_soln[features] - Qinv_hat.dot(G_nonrand)
+
+    Qfull, Qrelax = _compute_hessian(loglike,
+                                     solution,
+                                     features)
+
+    if p > n:
+        M1 = Qinv_hat.dot(X.T)
+        cov_target = (M1 * W[None, :]).dot(M1.T)
+        crosscov_target_score = -(M1 * W[None, :]).dot(X).T
+    else:
+        Qfull = X.T.dot(W[:, None] * X)
+        cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
+        crosscov_target_score = -Qinv_hat.dot(Qfull).T
+
+    if dispersion is None:  # use Pearson's X^2
+        relaxed_soln = nonrand_soln[features] - np.linalg.inv(Qrelax[features]).dot(G_nonrand[features])
+        Xfeat = X[:, features]
+        linpred =  Xfeat.dot(relaxed_soln)
+        dispersion = _pearsonX2(y,
+                                linpred,
+                                loglike,
+                                features.sum())
+
+    alternatives = ['twosided'] * features.sum()
+    return observed_target, cov_target * dispersion, Qinv_hat, dispersion, alternatives
+
+def form_targets(target, 
+                 loglike, 
+                 solution,
+                 features, 
+                 **kwargs):
+    _target = {'full':full_targets,
+               'selected':selected_targets,
+               'debiased':debiased_targets}[target]
+    return _target(loglike,
+                   solution,
+                   features,
+                   **kwargs)
+
+def _compute_hessian(loglike,
+                     beta_bar,
+                     *bool_indices):
+
+    X, y = loglike.data
+    linpred = X.dot(beta_bar)
+    n = linpred.shape[0]
+
+    if hasattr(loglike.saturated_loss, "hessian"): # a GLM -- all we need is W
+        W = loglike.saturated_loss.hessian(linpred)
+        parts = [np.dot(X.T, X[:, bool_idx] * W[:, None]) for bool_idx in bool_indices]
+        _hessian = np.dot(X.T, X * W[:, None]) # CAREFUL -- this will be big
+    elif hasattr(loglike.saturated_loss, "hessian_mult"):
+        parts = []
+        for bool_idx in bool_indices:
+            _right = np.zeros((n, bool_idx.sum()))
+            for i, j in enumerate(np.nonzero(bool_idx)[0]):
+                _right[:,i] = loglike.saturated_loss.hessian_mult(linpred, 
+                                                                       X[:,j], 
+                                                                       case_weights=loglike.saturated_loss.case_weights)
+            parts.append(X.T.dot(_right))
+        _hessian = np.zeros_like(X)
+        for i in range(X.shape[1]):
+            _hessian[:,i] = loglike.saturated_loss.hessian_mult(linpred, 
+                                                                     X[:,i], 
+                                                                     case_weights=loglike.saturated_loss.case_weights)
+        _hessian = X.T.dot(_hessian)
+    else:
+        raise ValueError('saturated_loss has no hessian or hessian_mult method')
+
+    if bool_indices:
+        return (_hessian,) + tuple(parts)
+    else:
+        return _hessian
+
+def _pearsonX2(y,
+               linpred,
+               loglike,
+               df_fit):
+
+    W = loglike.saturated_loss.hessian(linpred)
+    n = y.shape[0]
+    resid = y - loglike.saturated_loss.mean_function(linpred)
+    return (resid ** 2 / W).sum() / (n - df_fit)
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 674f85a62..ce4062033 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -12,9 +12,9 @@
 from .query import gaussian_query
 
 from .randomization import randomization
-from ..base import restricted_estimator
-from ..algorithms.debiased_lasso import (debiasing_matrix,
-                                         pseudoinverse_debiasing_matrix)
+from ..base import (restricted_estimator,
+                    _compute_hessian,
+                    _pearsonX2)
 
 #### High dimensional version
 #### - parametric covariance
@@ -143,6 +143,8 @@ def fit(self,
                                                  self._overall, 
                                                  solve_args=solve_args)
 
+        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
+
         beta_bar = np.zeros(p)
         beta_bar[overall] = _beta_unpenalized
         self._beta_full = beta_bar
@@ -156,42 +158,18 @@ def fit(self,
         # U for unpenalized
         # -E for inactive
 
+        # compute part of hessian
+
+        _hessian, _hessian_active, _hessian_unpen  = _compute_hessian(self.loglike,
+                                                                      beta_bar,
+                                                                      active,
+                                                                      unpenalized)
+        
+        # fill in pieces of query
+        
         opt_linear = np.zeros((p, num_opt_var))
         _score_linear_term = np.zeros((p, num_opt_var))
 
-        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator
-
-        X, y = self.loglike.data
-        linpred = X.dot(beta_bar)
-        n = linpred.shape[0]
-
-        if hasattr(self.loglike.saturated_loss, "hessian"): # a GLM -- all we need is W
-            W = self._W = self.loglike.saturated_loss.hessian(linpred)
-            _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
-            _hessian_unpen = np.dot(X.T, X[:, unpenalized] * W[:, None])
-            _hessian = np.dot(X.T, X * W[:, None]) # CAREFUL -- this will be big
-        elif hasattr(self.loglike.saturated_loss, "hessian_mult"):
-            active_right = np.zeros((n, active.sum()))
-            for i, j in enumerate(np.nonzero(active)[0]):
-                active_right[:,i] = self.loglike.saturated_loss.hessian_mult(linpred, 
-                                                                             X[:,j], 
-                                                                             case_weights=self.loglike.saturated_loss.case_weights)
-            unpen_right = np.zeros((n, unpenalized.sum()))
-            for i, j in enumerate(np.nonzero(unpenalized)[0]):
-                unpen_right[:,i] = self.loglike.saturated_loss.hessian_mult(linpred, 
-                                                                            X[:,j], 
-                                                                            case_weights=self.loglike.saturated_loss.case_weights)
-            _hessian_active = X.T.dot(active_right)
-            _hessian_unpen = X.T.dot(unpen_right)
-            _hessian = []
-            for i in range(p):
-                _hessian.append(self.loglike.saturated_loss.hessian_mult(linpred, 
-                                                                         X[:,i], 
-                                                                         case_weights=self.loglike.saturated_loss.case_weights))
-            _hessian = X.T.dot(np.array(_hessian).T)
-        else:
-            raise ValueError('saturated_loss has no hessian or hessian_mult method')
-
         _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])
 
         # set the observed score (data dependent) state
@@ -249,7 +227,7 @@ def signed_basis_vector(p, j, s):
 
         #### to be fixed -- set the cov_score here without dispersion
 
-        self._hessian = _hessian
+        self._unscaled_cov_score = _hessian
 
         #####
         
@@ -699,151 +677,6 @@ def sqrt_lasso(X,
 
         return obj
 
-# private functions
-
-# functions construct targets of inference
-# and covariance with score representation
-
-def selected_targets(loglike, 
-                     W, 
-                     features, 
-                     sign_info={}, 
-                     dispersion=None,
-                     solve_args={'tol': 1.e-12, 'min_its': 100},
-                     hessian=None):
-
-    X, y = loglike.data
-    n, p = X.shape
-
-    Xfeat = X[:, features]
-    if hessian is None:
-        Qfeat = Xfeat.T.dot(W[:, None] * Xfeat)
-        _score_linear = -Xfeat.T.dot(W[:, None] * X).T
-    else:
-        Qfeat = hessian[features][:,features]
-        _score_linear = -hessian[features].T
-    observed_target = restricted_estimator(loglike, features, solve_args=solve_args)
-    cov_target = np.linalg.inv(Qfeat)
-    crosscov_target_score = _score_linear.dot(cov_target)
-    alternatives = ['twosided'] * features.sum()
-    features_idx = np.arange(p)[features]
-
-    for i in range(len(alternatives)):
-        if features_idx[i] in sign_info.keys():
-            alternatives[i] = sign_info[features_idx[i]]
-
-    if dispersion is None:  # use Pearson's X^2
-        dispersion = ((y - loglike.saturated_loss.mean_function(
-            Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1])
-
-    regress_target_score = np.zeros((cov_target.shape[0], p))
-    regress_target_score[:,features] = cov_target
-    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
-
-def full_targets(loglike, 
-                 W, 
-                 features, 
-                 dispersion=None,
-                 solve_args={'tol': 1.e-12, 'min_its': 50},
-                 hessian=None):
-    
-    X, y = loglike.data
-    n, p = X.shape
-    features_bool = np.zeros(p, np.bool)
-    features_bool[features] = True
-    features = features_bool
-
-    # target is one-step estimator
-
-    Qfull = X.T.dot(W[:, None] * X)
-    if hessian is None:
-        Qfull = X.T.dot(W[:, None] * X)
-    else:
-        Qfull = hessian
-
-    Qfull_inv = np.linalg.inv(Qfull)
-    full_estimator = loglike.solve(**solve_args)
-    cov_target = Qfull_inv[features][:, features]
-    observed_target = full_estimator[features]
-    crosscov_target_score = np.zeros((p, cov_target.shape[0]))
-    crosscov_target_score[features] = -np.identity(cov_target.shape[0])
-
-    if dispersion is None:  # use Pearson's X^2
-        dispersion = (((y - loglike.saturated_loss.mean_function(X.dot(full_estimator))) ** 2 / W).sum() / 
-                      (n - p))
-
-    alternatives = ['twosided'] * features.sum()
-    regress_target_score = Qfull_inv[features] # weights missing?
-    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
-
-def debiased_targets(loglike, 
-                     W, 
-                     features, 
-                     sign_info={}, 
-                     penalty=None, #required kwarg
-                     dispersion=None,
-                     approximate_inverse='JM',
-                     debiasing_args={}):
-
-    if penalty is None:
-        raise ValueError('require penalty for consistent estimator')
-
-    X, y = loglike.data
-    n, p = X.shape
-    features_bool = np.zeros(p, np.bool)
-    features_bool[features] = True
-    features = features_bool
-
-    # relevant rows of approximate inverse
-
-
-    if approximate_inverse == 'JM':
-        Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(W)[:, None], 
-                                                  np.nonzero(features)[0],
-                                                  **debiasing_args)) / n
-    else:
-        Qinv_hat = np.atleast_2d(pseudoinverse_debiasing_matrix(X * np.sqrt(W)[:, None],
-                                                                np.nonzero(features)[0],
-                                                                **debiasing_args))
-
-    problem = rr.simple_problem(loglike, penalty)
-    nonrand_soln = problem.solve()
-    G_nonrand = loglike.smooth_objective(nonrand_soln, 'grad')
-
-    observed_target = nonrand_soln[features] - Qinv_hat.dot(G_nonrand)
-
-    if p > n:
-        M1 = Qinv_hat.dot(X.T)
-        cov_target = (M1 * W[None, :]).dot(M1.T)
-        crosscov_target_score = -(M1 * W[None, :]).dot(X).T
-    else:
-        Qfull = X.T.dot(W[:, None] * X)
-        cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T))
-        crosscov_target_score = -Qinv_hat.dot(Qfull).T
-
-    if dispersion is None:  # use Pearson's X^2
-        Xfeat = X[:, features]
-        Qrelax = Xfeat.T.dot(W[:, None] * Xfeat)
-        relaxed_soln = nonrand_soln[features] - np.linalg.inv(Qrelax).dot(G_nonrand[features])
-        dispersion = (((y - loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / W).sum() / 
-                      (n - features.sum()))
-
-    alternatives = ['twosided'] * features.sum()
-    return observed_target, cov_target * dispersion, Qinv_hat, dispersion, alternatives
-
-def form_targets(target, 
-                 loglike, 
-                 W, 
-                 features, 
-                 **kwargs):
-    _target = {'full':full_targets,
-               'selected':selected_targets,
-               'debiased':debiased_targets}[target]
-    return _target(loglike,
-                   W,
-                   features,
-                   **kwargs)
-
 class split_lasso(lasso):
 
     """
@@ -940,15 +773,15 @@ def _setup_implied_gaussian(self,
 
         prod_score_prec = np.identity(self.nfeature) / ratio
         
-        cov_rand = self._hessian * dispersion
+        cov_rand = self._unscaled_cov_score * dispersion
 
-        M1 = prod_score_prec
-        M2 = M1.dot(cov_rand).dot(M1.T)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
+        M1 = prod_score_prec * dispersion
+        M2 = M1.dot(cov_rand).dot(M1.T) * (dispersion**2)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) * (dispersion**2)
     
         # would be nice to not store these?
         
-        self.M1 = M1
+        self.M1 = M1  
         self.M2 = M2
         self.M3 = M3
 
@@ -1235,3 +1068,5 @@ def poisson(X,
         return split_lasso(loglike, 
                            np.asarray(feature_weights),
                            proportion)
+
+    
diff --git a/selectinf/randomized/modelQ.py b/selectinf/randomized/modelQ.py
index 62aa37b47..c239f8821 100644
--- a/selectinf/randomized/modelQ.py
+++ b/selectinf/randomized/modelQ.py
@@ -177,6 +177,8 @@ def fit(self,
         _hessian_active = self.Q[:, active]
         _hessian_unpen = self.Q[:, unpenalized]
 
+        self._unscaled_cov_score = self.Q
+        
         _score_linear_term = -np.hstack([_hessian_active, _hessian_unpen])
 
         # set the observed score (data dependent) state
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 500e64d48..e7a401c01 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -12,10 +12,14 @@
 from ..distributions.api import discrete_family
 from ..constraints.affine import (sample_from_constraints,
                                   constraints)
+from ..algorithms.barrier_affine import solve_barrier_affine_py
+from ..base import (selected_targets,
+                    full_targets,
+                    debiased_targets)
+
 from .posterior_inference import posterior
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from .approx_reference import approximate_grid_inference
-from ..algorithms.barrier_affine import solve_barrier_affine_py
 
 class query(object):
     r"""
@@ -164,9 +168,9 @@ def _setup_implied_gaussian(self,
         cov_rand, prec = self.randomizer.cov_prec
 
         if np.asarray(prec).shape in [(), (0,)]:
-            prod_score_prec_unnorm = self._hessian * prec
+            prod_score_prec_unnorm = self._unscaled_cov_score * prec
         else:
-            prod_score_prec_unnorm = self._hessian.dot(prec)
+            prod_score_prec_unnorm = self._unscaled_cov_score.dot(prec)
 
         if np.asarray(prec).shape in [(), (0,)]:
             cond_precision = opt_linear.T.dot(opt_linear) * prec
@@ -201,7 +205,6 @@ def summary(self,
                 observed_target,
                 cov_target,
                 regress_target_score,
-                dispersion,
                 alternatives,
                 opt_sample=None,
                 target_sample=None,
@@ -234,8 +237,6 @@ def summary(self,
             Defaults to 1000.
         compute_intervals : bool
             Compute confidence intervals?
-        dispersion : float (optional)
-            Use a known value for dispersion, or Pearson's X^2?
         """
 
         if parameter is None:
@@ -322,7 +323,6 @@ def selective_MLE(self,
                                           cov_target,
                                           regress_target_score,
                                           self.observed_opt_state,
-#                                          dispersion=dispersion,
                                           level=level,
                                           solve_args=solve_args)
 
@@ -373,7 +373,6 @@ def approximate_grid_inference(self,
                                    cov_target,
                                    regress_target_score,
                                    alternatives=None,
-                                   dispersion=1,
                                    solve_args={'tol': 1.e-12},
                                    useIP=False):
 
@@ -398,7 +397,6 @@ def approximate_grid_inference(self,
                                        cov_target,
                                        regress_target_score,
                                        solve_args=solve_args,
-                                       dispersion=dispersion,
                                        useIP=useIP)
         return G.summary(alternatives=alternatives)
 
@@ -1460,5 +1458,3 @@ def selective_MLE(observed_target,
 
     return result, observed_info_mean, log_ref
 
-
-
diff --git a/selectinf/randomized/screening.py b/selectinf/randomized/screening.py
index db6602cc4..3c5df5cd6 100644
--- a/selectinf/randomized/screening.py
+++ b/selectinf/randomized/screening.py
@@ -21,6 +21,7 @@ def __init__(self,
         self.covariance = covariance
         self.randomizer = randomizer
         self._initial_omega = perturb
+        self._unscaled_cov_score = covariance
 
     def fit(self, perturb=None):
 
@@ -28,7 +29,9 @@ def fit(self, perturb=None):
         self._randomized_score = self.observed_score_state - self._initial_omega
         return self._randomized_score, self._randomized_score.shape[0]
 
-    def multivariate_targets(self, features, dispersion=1.):
+    def multivariate_targets(self,
+                             features,
+                             dispersion=1):
         """
         Entries of the mean of \Sigma[E,E]^{-1}Z_E
         """
@@ -42,9 +45,12 @@ def multivariate_targets(self, features, dispersion=1.):
         return (observed_target, 
                 cov_target * dispersion, 
                 crosscov_target_score.T * dispersion, 
+                dispersion,
                 alternatives)
 
-    def full_targets(self, features, dispersion=1.):
+    def full_targets(self,
+                     features,
+                     dispersion=1):
         """
         Entries of the mean of \Sigma[E,E]^{-1}Z_E
         """
@@ -55,9 +61,11 @@ def full_targets(self, features, dispersion=1.):
         crosscov_target_score = -np.identity(Q.shape[0])[:, features]
         alternatives = ['twosided'] * features.sum()
 
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives
+        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, dispersion, alternatives
 
-    def marginal_targets(self, features):
+    def marginal_targets(self,
+                         features,
+                         dispersion=1):
         """
         Entries of the mean of Z_E
         """
@@ -68,7 +76,7 @@ def marginal_targets(self, features):
         crosscov_target_score = -score_linear
         alternatives = ['twosided'] * features.sum()
 
-        return observed_target, cov_target, crosscov_target_score.T, alternatives
+        return observed_target, cov_target, crosscov_target_score.T, dispersion, alternatives
 
 class marginal_screening(screening):
 
diff --git a/selectinf/randomized/slope.py b/selectinf/randomized/slope.py
index 5f88676e8..b7ede0954 100644
--- a/selectinf/randomized/slope.py
+++ b/selectinf/randomized/slope.py
@@ -20,7 +20,7 @@
 from ..constraints.affine import constraints
 
 from .randomization import randomization
-from ..base import restricted_estimator
+from ..base import restricted_estimator, _compute_hessian
 from .query import gaussian_query
 from .lasso import lasso
 
@@ -121,9 +121,11 @@ def fit(self,
 
         self.num_opt_var = self.observed_opt_state.shape[0]
 
-        X, y = self.loglike.data
-        W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar))
-        _hessian_active = np.dot(X.T, X[:, active] * W[:, None])
+        self._unscaled_cov_score, _hessian_active = _compute_hessian(self.loglike,
+                                                                     beta_bar,
+                                                                     active)
+
+
         _score_linear_term = -_hessian_active
         self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))
 
@@ -152,6 +154,7 @@ def fit(self,
         if signs_cluster.size == 0:
             return active_signs
         else:
+            X, y = self.loglike.data
             X_clustered = X[:, indices].dot(signs_cluster)
             _opt_linear_term = X.T.dot(X_clustered)
 
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index d8a1e180e..6e7e73f6d 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -1,7 +1,8 @@
 import numpy as np
 
 from ...tests.instance import gaussian_instance
-from ..lasso import lasso, selected_targets
+from ..lasso import lasso
+from ...base import selected_targets
 from ..exact_reference import exact_grid_inference
 
 def test_inf(n=500,
@@ -58,8 +59,7 @@ def test_inf(n=500,
              regress_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
+                                              conv.observed_soln,
                                               dispersion=dispersion)
 
             exact_grid_inf = exact_grid_inference(conv,
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 3a16411ec..1ba443c29 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -5,14 +5,18 @@
 
 import regreg.api as rr
 
-from ..lasso import lasso, selected_targets, full_targets, debiased_targets
+from ..lasso import lasso
+from ...base import selected_targets, full_targets, debiased_targets
 from ...tests.instance import gaussian_instance, logistic_instance
-from ...tests.flags import SET_SEED
+from ...tests.flags import SET_SEED, SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
 from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
 from ..randomization import randomization
 from ...tests.decorators import rpy_test_safe
 
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10)
 def test_highdim_lasso(n=500, 
                        p=200, 
                        signal_fac=1.5, 
@@ -59,23 +63,23 @@ def test_highdim_lasso(n=500,
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = full_targets(conv.loglike, 
-                                      conv._W, 
-                                      nonzero)
+                                      conv.observed_soln)
     elif target == 'selected':
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = selected_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero)
+                                          conv.observed_soln) 
     elif target == 'debiased':
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = debiased_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero,
+                                          conv.observed_soln,
                                           penalty=conv.penalty)
 
     result = conv.summary(observed_target, 
@@ -89,6 +93,8 @@ def test_highdim_lasso(n=500,
 
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10)
 def test_AR_randomization(n=300, 
                           p=100, 
                           signal=4.5,
@@ -147,23 +153,23 @@ def test_AR_randomization(n=300,
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = full_targets(conv.loglike, 
-                                      conv._W, 
-                                      nonzero)
+                                      conv.observed_soln)
     elif target == 'selected':
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = selected_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero)
+                                          conv.observed_soln)
     elif target == 'debiased':
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = debiased_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero,
+                                          conv.observed_soln,
                                           penalty=conv.penalty)
 
     result = conv.summary(observed_target, 
@@ -177,10 +183,29 @@ def test_AR_randomization(n=300,
 
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
-def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4):
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10)
+def test_all_targets(n=100,
+                     p=20,
+                     signal_fac=1.5,
+                     s=5,
+                     sigma=3,
+                     rho=0.4,
+                     ndraw=5000,
+                     burnin=1000):
     for target in ['full', 'selected', 'debiased']:
-        test_highdim_lasso(n=n, p=p, signal_fac=signal_fac, s=s, sigma=sigma, rho=rho, target=target)
+        test_highdim_lasso(n=n,
+                           p=p,
+                           signal_fac=signal_fac,
+                           s=s,
+                           sigma=sigma,
+                           rho=rho,
+                           target=target,
+                           ndraw=ndraw,
+                           burnin=burnin)
 
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10)
 def test_sqrt_highdim_lasso(n=500, 
                             p=200, 
                             signal_fac=1.5, 
@@ -231,7 +256,7 @@ def test_sqrt_highdim_lasso(n=500,
         q_term = rr.identity_quadratic(ridge_term, 0, -perturb, 0)
 
         soln2, sqrt_loss = solve_sqrt_lasso(X, Y, W, solve_args={'min_its':1000}, quadratic=q_term, force_fat=True)
-        soln = conv.initial_soln
+        soln = conv.observed_soln
 
         denom = np.linalg.norm(Y - X.dot(soln))
         new_weights = W * denom
@@ -253,16 +278,16 @@ def test_sqrt_highdim_lasso(n=500,
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = full_targets(conv.loglike, 
-                                      conv._W, 
-                                      nonzero)
+                                      conv.observed_soln)
     else:
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = selected_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero)
+                                          conv.observed_soln)
 
     result = conv.summary(observed_target, 
                           cov_target, 
@@ -275,6 +300,7 @@ def test_sqrt_highdim_lasso(n=500,
 
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
+@np.testing.dec.skipif(True, "comparison to R is broken")
 @set_seed_iftrue(SET_SEED)
 @rpy_test_safe(libraries=['selectiveInference'])
 def test_compareR(n=200, 
@@ -348,13 +374,15 @@ def Rpval(X, Y, W, noise_scale=None):
 
     assert np.fabs(conv.ridge_term - ridge_term) / ridge_term < 1.e-4
 
-    assert np.fabs(soln - conv.initial_soln).max() / np.fabs(soln).max() < 1.e-3
+    assert np.fabs(soln - conv.observed_soln).max() / np.fabs(soln).max() < 1.e-3
 
     nonzero = signs != 0
 
     assert np.linalg.norm(conv.sampler.affine_con.covariance - cond_cov) / np.linalg.norm(cond_cov) < 1.e-3
     assert np.linalg.norm(conv.sampler.affine_con.mean - cond_mean[:,0]) / np.linalg.norm(cond_mean[:,0]) < 1.e-3
 
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, ndraw=50, burnin=10)
 def test_logistic_lasso(n=500, 
                         p=200, 
                         signal_fac=1.5, 
@@ -402,17 +430,16 @@ def test_logistic_lasso(n=500,
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = full_targets(conv.loglike, 
-                                      conv._W, 
-                                      nonzero)
+                                      conv.observed_soln)
     else:
         (observed_target, 
          cov_target, 
          cov_target_score, 
+         dispersion,
          alternatives) = selected_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero)
-
+                                          conv.observed_soln)
     result = conv.summary(observed_target, 
                           cov_target, 
                           cov_target_score, 
@@ -425,41 +452,3 @@ def test_logistic_lasso(n=500,
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
 
-def main(nsim=500, n=500, p=200, sqrt=False, target='full', sigma=3, AR=True):
-
-    import matplotlib.pyplot as plt
-    P0, PA = [], []
-    from statsmodels.distributions import ECDF
-
-    for i in range(nsim):
-        if True: 
-            if not sqrt:
-                if AR:
-                    p0, pA = test_AR_randomization(n=n, p=p, target=target, sigma=sigma)
-                else:
-                    p0, pA = test_highdim_lasso(n=n, p=p, target=target, sigma=sigma)
-            else:
-                p0, pA = test_sqrt_highdim_lasso(n=n, p=p, target=target, compare_to_lasso=False)
-        else: 
-            p0, pA = [], []
-        print(len(p0), len(pA))
-        P0.extend(p0)
-        PA.extend(pA)
-
-        P0_clean = np.array(P0)
-        
-        P0_clean = P0_clean[P0_clean > 1.e-5] # 
-        print(np.mean(P0_clean), np.std(P0_clean), np.mean(np.array(PA) < 0.05), np.sum(np.array(PA) < 0.05) / (i+1), np.mean(np.array(P0) < 0.05), np.mean(P0_clean < 0.05), np.mean(np.array(P0) < 1e-5), 'null pvalue + power + failure')
-    
-        if i % 3 == 0 and i > 0:
-            U = np.linspace(0, 1, 101)
-            plt.clf()
-            if len(P0_clean) > 0:
-                plt.plot(U, ECDF(P0_clean)(U))
-            if len(PA) > 0:
-                plt.plot(U, ECDF(PA)(U), 'r')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig("plot.pdf")
-    plt.show()
-
-
diff --git a/selectinf/randomized/tests/test_marginal_screening.py b/selectinf/randomized/tests/test_marginal_screening.py
index 6db0fbdf2..e8dac39aa 100644
--- a/selectinf/randomized/tests/test_marginal_screening.py
+++ b/selectinf/randomized/tests/test_marginal_screening.py
@@ -49,11 +49,13 @@ def test_marginal(n=500,
                 (observed_target, 
                  cov_target, 
                  crosscov_target_score, 
+                 dispersion,
                  alternatives) = marginal_select.marginal_targets(nonzero)
             else:
                 (observed_target, 
                  cov_target, 
                  crosscov_target_score, 
+                 dispersion,
                  alternatives) = marginal_select.multivariate_targets(nonzero, dispersion=sigma**2)
 
             if use_MLE:
@@ -137,6 +139,7 @@ def test_simple(n=100,
             (observed_target, 
              cov_target, 
              crosscov_target_score, 
+             dispersion,
              alternatives) = marginal_select.marginal_targets(nonzero)
 
             if use_MLE:
diff --git a/selectinf/randomized/tests/test_modelQ.py b/selectinf/randomized/tests/test_modelQ.py
index e88522423..09d70d29c 100644
--- a/selectinf/randomized/tests/test_modelQ.py
+++ b/selectinf/randomized/tests/test_modelQ.py
@@ -29,7 +29,7 @@ def test_modelQ():
     conH = LH.sampler.affine_con
     conQ = LQ.sampler.affine_con
 
-    np.testing.assert_allclose(LH.initial_soln, LQ.initial_soln)
+    np.testing.assert_allclose(LH.observed_soln, LQ.observed_soln)
     np.testing.assert_allclose(LH.initial_subgrad, LQ.initial_subgrad)
 
     np.testing.assert_allclose(conH.linear_part, conQ.linear_part)
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index fbfbbb5ce..66780a5b4 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -2,10 +2,11 @@
 import pandas as pd
 from scipy.stats import norm as ndist
 
-from ..lasso import lasso, selected_targets, split_lasso
+from ..lasso import lasso, split_lasso
 from ..posterior_inference import (langevin_sampler,
                                    gibbs_sampler)
 
+from ...base import selected_targets
 from ...tests.instance import gaussian_instance, HIV_NRTI
 from ...tests.flags import SET_SEED, SMALL_SAMPLES
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
@@ -57,8 +58,7 @@ def test_Langevin(n=500,
      regress_target_score,
      dispersion,
      alternatives) = selected_targets(conv.loglike,
-                                      conv._W,
-                                      nonzero,
+                                      conv.observed_soln,
                                       dispersion=dispersion)
 
     posterior_inf = conv.posterior(observed_target,
@@ -127,9 +127,9 @@ def test_instance(nsample=100, nburnin=50):
      regress_target_score,
      dispersion,
      alternatives)= selected_targets(L.loglike,
-                                      L._W,
-                                      M,
-                                      dispersion=dispersion)
+                                     L.observed_soln,
+                                     features=M,
+                                     dispersion=dispersion)
 
     posterior_inf = L.posterior(observed_target,
                                 cov_target,
@@ -183,8 +183,8 @@ def test_flexible_prior1(nsample=100,
      regress_target_score,
      dispersion,
      alternatives) = selected_targets(L.loglike,
-                                      L._W,
-                                      M,
+                                      L.observed_soln,
+                                      features=M,
                                       dispersion=dispersion)
 
     # default prior
@@ -253,8 +253,8 @@ def test_flexible_prior2(nsample=1000, nburnin=50):
      regress_target_score,
      dispersion,
      alternatives) = selected_targets(L.loglike,
-                                      L._W,
-                                      M,
+                                      L.observed_soln,
+                                      features=M,
                                       dispersion=dispersion)
 
     prior_var = 0.05 ** 2
@@ -318,21 +318,18 @@ def test_hiv_data(nsample=10000,
      regress_target_score,
      dispersion,
      alternatives) = selected_targets(conv.loglike,
-                                      conv._W,
-                                      nonzero,
+                                      conv.observed_soln,
                                       dispersion=dispersion)
 
     mle, inverse_info = conv.selective_MLE(observed_target,
                                            cov_target,
                                            regress_target_score,
-                                           dispersion,
                                            level=level,
                                            solve_args={'tol': 1.e-12})[:2]
 
     approx_inf = conv.approximate_grid_inference(observed_target,
                                                  cov_target,
                                                  regress_target_score,
-                                                 dispersion=dispersion,
                                                  useIP=False)
 
     posterior_inf = conv.posterior(observed_target,
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 3ece533c2..818cdc012 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -5,8 +5,9 @@
 
 
 from ..lasso import (lasso,
-                     split_lasso,
-                     full_targets,
+                     split_lasso)
+
+from ...base import (full_targets,
                      selected_targets,
                      debiased_targets)
 from ...tests.instance import (gaussian_instance,
@@ -69,7 +70,7 @@ def test_full_targets(n=200,
                  regress_target_score,
                  dispersion,
                  alternatives) = full_targets(conv.loglike,
-                                              conv._W,
+                                              conv.observed_soln,
                                               nonzero,
                                               dispersion=dispersion)
             else:
@@ -78,7 +79,7 @@ def test_full_targets(n=200,
                  regress_target_score,
                  dispersion,
                  alternatives) = debiased_targets(conv.loglike,
-                                                  conv._W,
+                                                  conv.observed_soln,
                                                   nonzero,
                                                   penalty=conv.penalty,
                                                   dispersion=dispersion)
@@ -151,8 +152,7 @@ def test_selected_targets(n=2000,
              regress_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
+                                              conv.observed_soln,
                                               dispersion=dispersion)
 
             result = conv.selective_MLE(observed_target,
@@ -193,8 +193,8 @@ def test_instance():
      regress_target_score,
      dispersion,
      alternatives) = selected_targets(L.loglike,
-                                      L._W,
-                                      M,
+                                      L.observed_soln,
+                                      features=M,
                                       dispersion=dispersion)
 
     print("check shapes", observed_target.shape, E.sum())
@@ -261,8 +261,7 @@ def test_selected_targets_disperse(n=500,
              regress_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
+                                              conv.observed_soln,
                                               dispersion=dispersion)
 
             result = conv.selective_MLE(observed_target,
@@ -323,8 +322,7 @@ def test_logistic(n=2000,
              cov_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
+                                              conv.observed_soln,
                                               dispersion=1)
 
             result = conv.selective_MLE(observed_target,
@@ -380,8 +378,7 @@ def test_logistic_split(n=2000,
              cov_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
+                                              conv.observed_soln,
                                               dispersion=1)
 
             result = conv.selective_MLE(observed_target,
@@ -437,8 +434,7 @@ def test_poisson(n=2000,
              cov_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
+                                              conv.observed_soln,
                                               dispersion=1)
 
             result = conv.selective_MLE(observed_target,
@@ -494,8 +490,7 @@ def test_poisson_split(n=2000,
              cov_target_score,
              dispersion,
              alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero, 
+                                              conv.observed_soln,
                                               dispersion=1)
 
             result = conv.selective_MLE(observed_target,
@@ -554,9 +549,7 @@ def test_cox(n=2000,
              cov_target_score, 
              dispersion,
              alternatives) = selected_targets(conv.loglike, 
-                                              None,
-                                              nonzero,
-                                              hessian=full_hess,
+                                              conv.observed_soln,
                                               dispersion=1)
 
             result = conv.selective_MLE(observed_target,
@@ -615,9 +608,7 @@ def test_cox_split(n=2000,
              cov_target_score, 
              dispersion,
              alternatives) = selected_targets(conv.loglike, 
-                                              None,
-                                              nonzero,
-                                              hessian=full_hess,
+                                              conv.observed_soln,
                                               dispersion=1)
 
             result = conv.selective_MLE(observed_target,
@@ -683,8 +674,7 @@ def test_scale_invariant_split(n=200,
          cov_target_score,
          dispersion,
          alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero, 
+                                          conv.observed_soln,
                                           dispersion=dispersion)
 
         print('dispersion', dispersion/scale**2)
@@ -766,8 +756,7 @@ def test_scale_invariant(n=200,
          cov_target_score,
          dispersion,
          alternatives) = selected_targets(conv.loglike,
-                                          conv._W,
-                                          nonzero, 
+                                          conv.observed_soln,
                                           dispersion=dispersion)
 
         print('dispersion', dispersion/scale**2)
@@ -796,46 +785,3 @@ def test_scale_invariant(n=200,
                        results[1]['pvalue'])
     
 
-def test_instance():
-    n, p, s = 500, 100, 5
-    X = np.random.standard_normal((n, p))
-    beta = np.zeros(p)
-    beta[:s] = np.sqrt(2 * np.log(p) / n)
-    Y = X.dot(beta) + np.random.standard_normal(n)
-
-    scale_ = np.std(Y)
-    # uses noise of variance n * scale_ / 4 by default
-    L = lasso.gaussian(X, Y, 3 * scale_ * np.sqrt(2 * np.log(p) * np.sqrt(n)))
-    signs = L.fit()
-    E = (signs != 0)
-
-    M = E.copy()
-    M[-3:] = 1
-    print("check ", M)
-    dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
-    (observed_target,
-     cov_target,
-     cov_target_score,
-     dispersion,
-     alternatives) = selected_targets(L.loglike,
-                                      L._W,
-                                      M,
-                                      dispersion=dispersion)
-
-    print("check shapes", observed_target.shape, E.sum())
-
-    result = L.selective_MLE(observed_target,
-                             cov_target,
-                             cov_target_score)[0]
-    estimate = result['MLE']
-    pval = result['pvalue']
-    intervals = np.asarray(result[['lower_confidence',
-                                   'upper_confidence']])
-
-    beta_target = np.linalg.pinv(X[:, M]).dot(X.dot(beta))
-
-    coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
-    print("observed_opt_state ", L.observed_opt_state)
-    # print("check ", np.asarray(result['MLE']), np.asarray(result['unbiased']))
-
-    return coverage
diff --git a/selectinf/randomized/tests/test_slope.py b/selectinf/randomized/tests/test_slope.py
index 66a89ac19..65cc553c7 100644
--- a/selectinf/randomized/tests/test_slope.py
+++ b/selectinf/randomized/tests/test_slope.py
@@ -6,7 +6,7 @@
 import regreg.api as rr
 
 from ..slope import slope
-from ..lasso import full_targets, selected_targets
+from ...base import full_targets, selected_targets
 from ...tests.decorators import rpy_test_safe
 
 try:
@@ -155,16 +155,18 @@ def test_randomized_slope(n=2000,
                 (observed_target, 
                  cov_target, 
                  cov_target_score, 
+                 dispersion,
                  alternatives) = full_targets(conv.loglike, 
-                                              conv._W, 
-                                              nonzero, dispersion=sigma_)
+                                              conv.observed_soln,
+                                              dispersion=sigma_)
             elif target == 'selected':
                 (observed_target, 
                  cov_target, 
                  cov_target_score, 
+                 dispersion,
                  alternatives) = selected_targets(conv.loglike, 
-                                                  conv._W, 
-                                                  nonzero, dispersion=sigma_)
+                                                  conv.observed_soln,
+                                                  dispersion=sigma_)
 
             if target == "selected":
                 beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
@@ -196,17 +198,6 @@ def test_randomized_slope(n=2000,
     if True:
         return pval[beta_target == 0], pval[beta_target != 0], coverage, lower, upper
 
-def main(nsim=100, use_MLE=True):
-
-    P0, PA, cover, length_int = [], [], [], []
-    
-    for i in range(nsim):
-        p0, pA, cover_, _, _ = test_randomized_slope(use_MLE=use_MLE)
-
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        print('coverage', np.mean(cover))
 
 
 
diff --git a/selectinf/randomized/tests/test_topK.py b/selectinf/randomized/tests/test_topK.py
index 8091f8ac3..45dbb54b9 100644
--- a/selectinf/randomized/tests/test_topK.py
+++ b/selectinf/randomized/tests/test_topK.py
@@ -49,11 +49,13 @@ def test_topK(n=500,
                 (observed_target, 
                  cov_target, 
                  crosscov_target_score, 
+                 dipsersion,
                  alternatives) = topK_select.marginal_targets(nonzero)
             else:
                 (observed_target, 
                  cov_target, 
                  crosscov_target_score, 
+                 dispersion,
                  alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
                
             if use_MLE:
@@ -129,12 +131,14 @@ def test_bias_topK(n=500,
                 (observed_target,
                  cov_target,
                  crosscov_target_score,
+                 dispersion,
                  alternatives) = topK_select.marginal_targets(nonzero)
             else:
                 beta_target = beta[nonzero]
                 (observed_target,
                  cov_target,
                  crosscov_target_score,
+                 dispersion,
                  alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
 
             result = topK_select.selective_MLE(observed_target,

From 048474b5e155409794f4c1808d6c643e0c37b0c0 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 16:24:43 -0700
Subject: [PATCH 138/187] fix target calls in multiple queries

---
 .../randomized/tests/test_multiple_queries.py | 49 +++++--------------
 1 file changed, 13 insertions(+), 36 deletions(-)

diff --git a/selectinf/randomized/tests/test_multiple_queries.py b/selectinf/randomized/tests/test_multiple_queries.py
index 38c069f9e..267b7e53b 100644
--- a/selectinf/randomized/tests/test_multiple_queries.py
+++ b/selectinf/randomized/tests/test_multiple_queries.py
@@ -5,12 +5,14 @@
 
 import regreg.api as rr
 
-from ..lasso import lasso, selected_targets, full_targets, debiased_targets
-from ..screening import marginal_screening
-from ..query import multiple_queries
+from ...base import selected_targets
 from ...tests.instance import gaussian_instance
 from ...algorithms.sqrt_lasso import choose_lambda, solve_sqrt_lasso
 
+from ..lasso import lasso
+from ..screening import marginal_screening
+from ..query import multiple_queries
+
 # the test here is marginal_screening + lasso
 def test_multiple_queries(n=500, 
                           p=100, 
@@ -60,14 +62,19 @@ def test_multiple_queries(n=500,
     if nonzero.sum() == 0:
       return [], []
 
-    observed_target1, cov_target1, cov_target_score1, alternatives1 = conv1.multivariate_targets(nonzero, sigma**2)
+    (observed_target1,
+     cov_target1,
+     cov_target_score1,
+     dispersion1,
+     alternatives1) = conv1.multivariate_targets(nonzero, sigma**2)
 
     (observed_target2, 
      cov_target2, 
      cov_target_score2, 
+     dispersion2,
      alternatives2) = selected_targets(conv2.loglike, 
-                                       conv2._W, 
-                                       nonzero)
+                                       conv2.observed_soln,
+                                       features=nonzero)
 
     mq = multiple_queries([conv1, conv2])
 
@@ -79,33 +86,3 @@ def test_multiple_queries(n=500,
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
 
 
-def main(nsim=500, n=500, p=100, sigma=3):
-
-    P0, PA = [], []
-    from statsmodels.distributions import ECDF
-    import matplotlib.pyplot as plt
-
-    for i in range(nsim):
-        if True:
-          p0, pA = test_multiple_queries(n=n, p=p, sigma=sigma)
-        else: 
-          p0, pA = [], []
-        P0.extend(p0)
-        PA.extend(pA)
-
-        P0_clean = np.array(P0)
-        
-        P0_clean = P0_clean[P0_clean > 1.e-5] # 
-        print(np.mean(P0_clean), np.std(P0_clean), np.mean(np.array(PA) < 0.05), np.mean(np.array(P0) < 0.05), np.mean(P0_clean < 0.05), np.mean(np.array(P0) < 1e-5))
-    
-        if i % 3 == 0 and i > 0:
-            U = np.linspace(0, 1, 101)
-            plt.clf()
-            if len(P0_clean) > 0:
-                plt.plot(U, ECDF(P0_clean)(U))
-            if len(PA) > 0:
-                plt.plot(U, ECDF(PA)(U), 'r')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig("plot.pdf")
-    plt.show()
-

From 3d3f7784342394962632ad7d3ec1ae7739f642f6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 18 Aug 2021 16:57:36 -0700
Subject: [PATCH 139/187] BF: fixing handling of dispersion

---
 selectinf/randomized/lasso.py | 4 ++--
 selectinf/randomized/query.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index ce4062033..6beb26dc0 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -776,8 +776,8 @@ def _setup_implied_gaussian(self,
         cov_rand = self._unscaled_cov_score * dispersion
 
         M1 = prod_score_prec * dispersion
-        M2 = M1.dot(cov_rand).dot(M1.T) * (dispersion**2)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) * (dispersion**2)
+        M2 = M1.dot(cov_rand).dot(M1.T)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
     
         # would be nice to not store these?
         
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index e7a401c01..a20e0240c 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -186,8 +186,8 @@ def _setup_implied_gaussian(self,
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
         M1 = prod_score_prec_unnorm * dispersion
-        M2 = M1.dot(cov_rand).dot(M1.T) * (dispersion**2) 
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) * (dispersion**2)
+        M2 = M1.dot(cov_rand).dot(M1.T)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T)
 
         self.M1 = M1
         self.M2 = M2

From c8eca0a2f98b5a8c8db9d68944c25d8e9e9f2edf Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 24 Aug 2021 10:28:40 -0700
Subject: [PATCH 140/187] using NamedTuple for target specification as this arg
 appears over and over

---
 selectinf/base.py                             |  31 ++-
 selectinf/randomized/approx_reference.py      |  12 +-
 selectinf/randomized/drop_losers.py           |  41 ++--
 selectinf/randomized/exact_reference.py       |  12 +-
 selectinf/randomized/posterior_inference.py   |  22 +-
 selectinf/randomized/query.py                 | 105 ++++----
 selectinf/randomized/screening.py             |  51 ++--
 selectinf/randomized/tests/test_BH.py         |  59 +----
 .../randomized/tests/test_approx_reference.py |  22 +-
 .../randomized/tests/test_drop_losers.py      |  49 ++--
 .../randomized/tests/test_exact_reference.py  |  14 +-
 selectinf/randomized/tests/test_hiv_data.py   | 117 +++++++++
 selectinf/randomized/tests/test_lasso.py      | 106 +++-----
 .../tests/test_marginal_screening.py          |  60 +----
 .../randomized/tests/test_multiple_queries.py |  22 +-
 selectinf/randomized/tests/test_posterior.py  | 193 ++-------------
 .../tests/test_selective_MLE_high.py          | 227 ++++++------------
 .../tests/test_selective_MLE_onedim.py        |  30 +--
 selectinf/randomized/tests/test_slope.py      |  29 +--
 .../randomized/tests/test_split_lasso.py      |  79 ++----
 .../tests/test_standalone_lasso_mle.py        |  49 ++--
 selectinf/randomized/tests/test_topK.py       |  79 +-----
 .../tests/test_unbiased_estimates.py          |  30 +--
 23 files changed, 521 insertions(+), 918 deletions(-)
 create mode 100644 selectinf/randomized/tests/test_hiv_data.py

diff --git a/selectinf/base.py b/selectinf/base.py
index c6ee4ac46..b6fbc182a 100644
--- a/selectinf/base.py
+++ b/selectinf/base.py
@@ -1,3 +1,5 @@
+import typing
+
 import numpy as np
 
 import regreg.api as rr
@@ -45,6 +47,14 @@ def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
 # functions construct targets of inference
 # and covariance with score representation
 
+class TargetSpec(typing.NamedTuple):
+    
+    observed_target : np.ndarray
+    cov_target : np.ndarray
+    regress_target_score : np.ndarray
+    alternatives : list
+    dispersion : float = 1
+    
 def selected_targets(loglike, 
                      solution,
                      features=None,
@@ -85,7 +95,12 @@ def selected_targets(loglike,
 
     regress_target_score = np.zeros((cov_target.shape[0], p))
     regress_target_score[:,features] = cov_target
-    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
+
+    return TargetSpec(observed_target,
+                      cov_target * dispersion,
+                      regress_target_score,
+                      alternatives,
+                      dispersion)
 
 def full_targets(loglike, 
                  solution,
@@ -124,7 +139,12 @@ def full_targets(loglike,
 
     alternatives = ['twosided'] * features.sum()
     regress_target_score = Qfull_inv[features] # weights missing?
-    return observed_target, cov_target * dispersion, regress_target_score, dispersion, alternatives
+
+    return TargetSpec(observed_target,
+                      cov_target * dispersion,
+                      regress_target_score,
+                      alternatives,
+                      dispersion)
 
 def debiased_targets(loglike, 
                      solution,
@@ -189,7 +209,12 @@ def debiased_targets(loglike,
                                 features.sum())
 
     alternatives = ['twosided'] * features.sum()
-    return observed_target, cov_target * dispersion, Qinv_hat, dispersion, alternatives
+
+    return TargetSpec(observed_target,
+                      cov_target * dispersion,
+                      Qinv_hat, 
+                      alternatives,
+                      dispersion)
 
 def form_targets(target, 
                  loglike, 
diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 5b1e43c19..7d10c4ef1 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -11,9 +11,7 @@ class approximate_grid_inference(object):
 
     def __init__(self,
                  query,
-                 observed_target,
-                 cov_target,
-                 regress_target_score,
+                 target_spec,
                  solve_args={'tol': 1.e-12},
                  useIP=False):
 
@@ -35,6 +33,10 @@ def __init__(self,
             Arguments passed to solver.
         """
 
+        (observed_target,
+         cov_target,
+         regress_target_score) = target_spec[:3]
+        
         self.solve_args = solve_args
 
         linear_part = query.sampler.affine_con.linear_part
@@ -44,9 +46,7 @@ def __init__(self,
 
         observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, inverse_info, log_ref = query.selective_MLE(observed_target,
-                                                            cov_target,
-                                                            regress_target_score)
+        result, inverse_info, log_ref = query.selective_MLE(target_spec)
 
         cond_cov = query.cond_cov
         self.cond_precision = np.linalg.inv(cond_cov)
diff --git a/selectinf/randomized/drop_losers.py b/selectinf/randomized/drop_losers.py
index ac3134144..6c5d45cb3 100644
--- a/selectinf/randomized/drop_losers.py
+++ b/selectinf/randomized/drop_losers.py
@@ -6,6 +6,7 @@
 from .query import gaussian_query
 
 from .randomization import randomization
+from ..base import TargetSpec
 
 class drop_losers(gaussian_query):
 
@@ -41,7 +42,7 @@ def __init__(self,
         A = -np.identity(K)
         b = -np.ones(K) * best_loser
         linear = np.identity(K)
-        offset = np.zeros(K)
+        observed_subgrad = np.zeros(K)
         
         # Work out the implied randomization variance
         # Let X1=X[stage1].mean(), X2=X[stage2].mean() and Xf = X.mean()
@@ -60,11 +61,12 @@ def __init__(self,
         # needed for gaussian_query api
 
         self.randomizer = randomization.gaussian(np.diag(std_win**2) * mult)
-        self.observed_opt_state = stage1_means['data'].iloc[:K]
-        self.observed_score_state = -self.means[self._winners] # problem is a minimization
+        self.observed_opt_state = np.asarray(stage1_means['data'].iloc[:K])
+        self.observed_score_state = -np.asarray(self.means[self._winners]) # problem is a minimization
         self.selection_variable = {'winners':self._winners}
 
-        self._setup_sampler(A, b, linear, offset)
+        self._unscaled_cov_score = np.diag(std_win**2) * (1/n1_win + 1/n2_win)
+        self._setup_sampler(A, b, linear, observed_subgrad)
 
     def MLE_inference(self,
                       level=0.9,
@@ -82,15 +84,19 @@ def MLE_inference(self,
 
         """
         
-        observed_target = self.means[self._winners]
-        std_win = self.std.loc[self._winners]
+        observed_target = np.asarray(self.means[self._winners])
+        std_win = np.asarray(self.std.loc[self._winners])
         cov_target = np.diag(std_win**2 / (self._n1_win + self._n2_win))
-        cov_target_score = -cov_target
+        regress_target_score = -np.identity(observed_target.shape[0])
+        
+        target_spec = TargetSpec(observed_target,
+                                 cov_target,
+                                 regress_target_score,
+                                 dispersion=1,
+                                 alternatives=['greater']*observed_target.shape[0])
         
         result = gaussian_query.selective_MLE(self,
-                                              observed_target,
-                                              cov_target,
-                                              cov_target_score,
+                                              target_spec,
                                               level=level,
                                               solve_args=solve_args)
         result[0].insert(0, 'arm', self._winners)
@@ -118,16 +124,19 @@ def summary(self,
             Defaults to 1000.
 
         """
-        observed_target = self.means[self._winners]
+        observed_target = np.asarray(self.means[self._winners])
         std_win = self.std.loc[self._winners]
         cov_target = np.diag(std_win**2 / (self._n1_win + self._n2_win))
-        cov_target_score = -cov_target
+        regress_target_score = -np.identity(observed_target.shape[0])
+        
+        target_spec = TargetSpec(observed_target,
+                                 cov_target,
+                                 regress_target_score,
+                                 dispersion=1,
+                                 alternatives=['greater']*observed_target.shape[0])
 
         result = gaussian_query.summary(self,
-                                        observed_target,
-                                        cov_target,
-                                        cov_target_score,
-                                        alternatives=['twosided']*self.K,
+                                        target_spec,
                                         ndraw=ndraw,
                                         level=level,
                                         burnin=burnin,
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 9facaa7fe..13fdbd4a6 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -10,9 +10,7 @@ class exact_grid_inference(object):
 
     def __init__(self,
                  query,
-                 observed_target,
-                 cov_target,
-                 regress_target_score,
+                 target_spec,
                  solve_args={'tol': 1.e-12}):
 
         """
@@ -33,6 +31,10 @@ def __init__(self,
             Arguments passed to solver.
         """
 
+        (observed_target,
+         cov_target,
+         regress_target_score) = target_spec[:3]
+        
         self.solve_args = solve_args
 
         linear_part = query.sampler.affine_con.linear_part
@@ -42,9 +44,7 @@ def __init__(self,
 
         observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, inverse_info, log_ref = query.selective_MLE(observed_target,
-                                                            cov_target,
-                                                            regress_target_score)
+        result, inverse_info, log_ref = query.selective_MLE(target_spec)
 
         cond_cov = query.cond_cov
         self.cond_precision = np.linalg.inv(cond_cov)
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index bbab9bd5d..4284f5211 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -29,15 +29,18 @@ class posterior(object):
 
     def __init__(self,
                  query,
-                 observed_target,
-                 cov_target,
-                 regress_target_score,
-                 dispersion,
+                 target_spec,
                  prior,
                  solve_args={'tol': 1.e-12}):
 
         self.solve_args = solve_args
 
+        (observed_target,
+         cov_target,
+         regress_target_score,
+         _,
+         dispersion) = target_spec
+
         linear_part = query.sampler.affine_con.linear_part
         offset = query.sampler.affine_con.offset
 
@@ -45,9 +48,7 @@ def __init__(self,
 
         observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, self.inverse_info, log_ref = query.selective_MLE(observed_target,
-                                                                 cov_target,
-                                                                 regress_target_score)
+        result, self.inverse_info, log_ref = query.selective_MLE(target_spec)
 
         ### Note for an informative prior we might want to change this...
 
@@ -217,6 +218,13 @@ def gibbs_sampler(selective_posterior,
         sample = sampler.__next__()
         samples[i, :] = sample
 
+        import sys
+        sys.stderr.write('a: ' + str(0.1 +
+                          selective_posterior.ntarget +
+                          selective_posterior.ntarget / 2)+'\n')
+        sys.stderr.write('scale: ' + str(0.1 - ((scale_update ** 2) * sampler.posterior_[0])) + '\n')
+        sys.stderr.write('scale_update: ' + str(scale_update) + '\n')
+        sys.stderr.write('initpoint: ' + str(sampler.posterior_[0]) + '\n')
         scale_update_sq = invgamma.rvs(a=(0.1 +
                                           selective_posterior.ntarget +
                                           selective_posterior.ntarget / 2),
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index a20e0240c..b2cd82373 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -202,10 +202,7 @@ def _setup_implied_gaussian(self,
                 M3)
 
     def summary(self,
-                observed_target,
-                cov_target,
-                regress_target_score,
-                alternatives,
+                target_spec,
                 opt_sample=None,
                 target_sample=None,
                 parameter=None,
@@ -240,7 +237,7 @@ def summary(self,
         """
 
         if parameter is None:
-            parameter = np.zeros_like(observed_target)
+            parameter = np.zeros_like(target_spec.observed_target)
 
         if opt_sample is None:
             opt_sample, logW = self.sampler.sample(ndraw, burnin)
@@ -252,38 +249,36 @@ def summary(self,
                 opt_sample, logW = opt_sample
             ndraw = opt_sample.shape[0]
 
-        pivots = self.sampler.coefficient_pvalues(observed_target,
-                                                  cov_target,
-                                                  regress_target_score,
+        pivots = self.sampler.coefficient_pvalues(target_spec.observed_target,
+                                                  target_spec.cov_target,
+                                                  target_spec.regress_target_score,
                                                   parameter=parameter,
                                                   sample=(opt_sample, logW),
                                                   normal_sample=target_sample,
-                                                  alternatives=alternatives)
+                                                  alternatives=target_spec.alternatives)
 
         if not np.all(parameter == 0):
-            pvalues = self.sampler.coefficient_pvalues(observed_target,
-                                                       cov_target,
-                                                       regress_target_score,
+            pvalues = self.sampler.coefficient_pvalues(target_spec.observed_target,
+                                                       target_spec.cov_target,
+                                                       target_spec.regress_target_score,
                                                        parameter=np.zeros_like(parameter),
                                                        sample=(opt_sample, logW),
                                                        normal_sample=target_sample,
-                                                       alternatives=alternatives)
+                                                       alternatives=target_spec.alternatives)
         else:
             pvalues = pivots
 
-        result = pd.DataFrame({'target': observed_target,
+        result = pd.DataFrame({'target': target_spec.observed_target,
                                'pvalue': pvalues})
 
         if compute_intervals:
-            MLE = self.selective_MLE(observed_target,
-                                     cov_target,
-                                     regress_target_score)[0]
+            MLE = self.selective_MLE(target_spec)[0]
             MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
 
             intervals = self.sampler.confidence_intervals(
-                observed_target,
-                cov_target,
-                regress_target_score,
+                target_spec.observed_target,
+                target_spec.cov_target,
+                target_spec.regress_target_score,
                 sample=(opt_sample, logW),
                 normal_sample=target_sample,
                 initial_guess=MLE_intervals,
@@ -299,9 +294,7 @@ def summary(self,
         return result
 
     def selective_MLE(self,
-                      observed_target,
-                      cov_target,
-                      regress_target_score,
+                      target_spec,
                       level=0.9,
                       solve_args={'tol': 1.e-12}):
         """
@@ -319,18 +312,13 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
-        return self.sampler.selective_MLE(observed_target,
-                                          cov_target,
-                                          regress_target_score,
+        return self.sampler.selective_MLE(target_spec,
                                           self.observed_opt_state,
                                           level=level,
                                           solve_args=solve_args)
 
     def posterior(self,
-                  observed_target,
-                  cov_target,
-                  regress_target_score,
-                  dispersion=1,
+                  target_spec,
                   prior=None,
                   solve_args={'tol': 1.e-12}):
         """
@@ -353,7 +341,7 @@ def posterior(self,
         """
 
         if prior is None:
-            Di = 1. / (200 * np.diag(cov_target))
+            Di = 1. / (200 * np.diag(target_spec.cov_target))
 
             def prior(target_parameter):
                 grad_prior = -target_parameter * Di
@@ -361,18 +349,12 @@ def prior(target_parameter):
                 return log_prior, grad_prior
 
         return posterior(self,
-                         observed_target,
-                         cov_target,
-                         regress_target_score,
-                         dispersion,
+                         target_spec,
                          prior,
                          solve_args=solve_args)
 
     def approximate_grid_inference(self,
-                                   observed_target,
-                                   cov_target,
-                                   regress_target_score,
-                                   alternatives=None,
+                                   target_spec,
                                    solve_args={'tol': 1.e-12},
                                    useIP=False):
 
@@ -393,12 +375,10 @@ def approximate_grid_inference(self,
         """
 
         G = approximate_grid_inference(self,
-                                       observed_target,
-                                       cov_target,
-                                       regress_target_score,
+                                       target_spec,
                                        solve_args=solve_args,
                                        useIP=useIP)
-        return G.summary(alternatives=alternatives)
+        return G.summary(alternatives=target_spec.alternatives)
 
 
 class multiple_queries(object):
@@ -438,10 +418,10 @@ def fit(self):
                 objective.fit()
 
     def summary(self,
-                observed_target,
-                opt_sampling_info,  # a sequence of (cov_target, score_cov)
+                target_specs,
+                # a sequence of target_specs
                 # objects in theory all cov_target
-                # should be about the same...
+                # should be about the same. as should the observed_target
                 alternatives=None,
                 parameter=None,
                 level=0.9,
@@ -471,25 +451,28 @@ def summary(self,
             Compute confidence intervals?
         """
 
+        observed_target = target_specs[0].observed_target
+        alternatives = target_specs[0].alternatives
+        
         if parameter is None:
             parameter = np.zeros_like(observed_target)
 
         if alternatives is None:
             alternatives = ['twosided'] * observed_target.shape[0]
 
-        if len(self.objectives) != len(opt_sampling_info):
+        if len(self.objectives) != len(target_specs):
             raise ValueError("number of objectives and sampling cov infos do not match")
 
         self.opt_sampling_info = []
         for i in range(len(self.objectives)):
-            if opt_sampling_info[i][0] is None or opt_sampling_info[i][1] is None:
+            if target_specs[i].cov_target is None or target_specs[i].regress_target_score is None:
                 raise ValueError("did not input target and score covariance info")
             opt_sample, opt_logW = self.objectives[i].sampler.sample(ndraw, burnin)
             self.opt_sampling_info.append((self.objectives[i].sampler,
                                            opt_sample,
                                            opt_logW,
-                                           opt_sampling_info[i][0],
-                                           opt_sampling_info[i][1]))
+                                           target_specs[i].cov_target,
+                                           target_specs[i].regress_target_score))
 
         pivots = self.coefficient_pvalues(observed_target,
                                           parameter=parameter,
@@ -568,7 +551,7 @@ def coefficient_pvalues(self,
         return np.array(pvals)
 
     def confidence_intervals(self,
-                             observed_target,
+                             target_specs,
                              sample_args=(),
                              level=0.9):
 
@@ -948,9 +931,7 @@ def sample(self, ndraw, burnin):
         return _sample, np.zeros(_sample.shape[0])
 
     def selective_MLE(self,
-                      observed_target,
-                      cov_target,
-                      regress_target_score,
+                      target_spec,
                       # initial (observed) value of optimization variables --
                       # used as a feasible point.
                       # precise value used only for independent estimator
@@ -976,13 +957,7 @@ def selective_MLE(self,
             Arguments passed to solver.
         """
 
-        # self.M1 = self.M1 * dispersion
-        # self.M2 = self.M2 * (dispersion**2)
-        # self.M3 = self.M3 * (dispersion**2)
-
-        return selective_MLE(observed_target,
-                             cov_target,
-                             regress_target_score,
+        return selective_MLE(target_spec,
                              observed_soln,
                              self.mean,
                              self.covariance,
@@ -1335,9 +1310,7 @@ def naive_pvalues(diag_cov, observed, parameter):
         pvalues[j] = 2 * min(pval, 1 - pval)
     return pvalues
 
-def selective_MLE(observed_target,
-                  cov_target,
-                  regress_target_score,
+def selective_MLE(target_spec,
                   observed_soln,  # initial (observed) value of
                   # optimization variables -- used as a
                   # feasible point.  precise value used
@@ -1387,6 +1360,10 @@ def selective_MLE(observed_target,
         Use python or C solver.
     """
 
+    (observed_target,
+     cov_target,
+     regress_target_score) = target_spec[:3]
+    
     if np.asarray(observed_target).shape in [(), (0,)]:
         raise ValueError('no target specified')
 
diff --git a/selectinf/randomized/screening.py b/selectinf/randomized/screening.py
index 3c5df5cd6..0b61626b0 100644
--- a/selectinf/randomized/screening.py
+++ b/selectinf/randomized/screening.py
@@ -7,12 +7,13 @@
 
 from .query import gaussian_query
 from .randomization import randomization
+from ..base import TargetSpec
 
 class screening(gaussian_query):
 
     def __init__(self,
                  observed_data,
-                 covariance, 
+                 covariance, # unscaled
                  randomizer,
                  perturb=None):
 
@@ -35,33 +36,38 @@ def multivariate_targets(self,
         """
         Entries of the mean of \Sigma[E,E]^{-1}Z_E
         """
-        score_linear = self.covariance[:, features].copy() / dispersion
-        Q = score_linear[features]
-        cov_target = np.linalg.inv(Q)
+        Q = self.covariance[features][:,features] 
+        Qinv = np.linalg.inv(Q)
+        cov_target = np.linalg.inv(Q) * dispersion
         observed_target = -np.linalg.inv(Q).dot(self.observed_score_state[features])
-        crosscov_target_score = -score_linear.dot(cov_target)
+        regress_target_score = -Qinv.dot(np.identity(self.covariance.shape[0])[features])
         alternatives = ['twosided'] * features.sum()
 
-        return (observed_target, 
-                cov_target * dispersion, 
-                crosscov_target_score.T * dispersion, 
-                dispersion,
-                alternatives)
+        return TargetSpec(observed_target, 
+                          cov_target,
+                          regress_target_score,
+                          alternatives,
+                          dispersion)
 
     def full_targets(self,
                      features,
                      dispersion=1):
         """
-        Entries of the mean of \Sigma[E,E]^{-1}Z_E
+        Entries of the mean of (\Sigma^{-1}Z)[E]
         """
-        score_linear = self.covariance[:, features].copy() / dispersion
-        Q = self.covariance / dispersion
-        cov_target = (np.linalg.inv(Q)[features])[:, features]
+
+        Q = self.covariance
+        Qinv = np.linalg.inv(Q)
+        cov_target = Qinv[features][:, features] * dispersion
         observed_target = -np.linalg.inv(Q).dot(self.observed_score_state)[features]
-        crosscov_target_score = -np.identity(Q.shape[0])[:, features]
+        regress_target_score = -Qinv[:, features]
         alternatives = ['twosided'] * features.sum()
 
-        return observed_target, cov_target * dispersion, crosscov_target_score.T * dispersion, dispersion, alternatives
+        return TargetSpec(observed_target,
+                          cov_target,
+                          regress_target_score.T,
+                          alternatives,
+                          dispersion)
 
     def marginal_targets(self,
                          features,
@@ -69,14 +75,17 @@ def marginal_targets(self,
         """
         Entries of the mean of Z_E
         """
-        score_linear = self.covariance[:, features]
-        Q = score_linear[features]
-        cov_target = Q
+        Q = self.covariance[features][:,features] 
+        cov_target = Q * dispersion
         observed_target = -self.observed_score_state[features]
-        crosscov_target_score = -score_linear
+        regress_target_score = -np.identity(self.covariance.shape[0])[:,features]
         alternatives = ['twosided'] * features.sum()
 
-        return observed_target, cov_target, crosscov_target_score.T, dispersion, alternatives
+        return TargetSpec(observed_target,
+                          cov_target,
+                          regress_target_score.T,
+                          alternatives,
+                          dispersion)
 
 class marginal_screening(screening):
 
diff --git a/selectinf/randomized/tests/test_BH.py b/selectinf/randomized/tests/test_BH.py
index 34c26ac5f..59927c56a 100644
--- a/selectinf/randomized/tests/test_BH.py
+++ b/selectinf/randomized/tests/test_BH.py
@@ -61,16 +61,13 @@ def test_independent_estimator(n=100, n1=50, q=0.2, signal=3, p=100):
     cov_target = np.identity(selected.sum()) / n
     cross_cov = -np.identity(p)[selected] / n
 
-    (observed_target1, 
-     cov_target1, 
-     cross_cov1,
-     _) = BH_select.marginal_targets(selected)
+    target_spec = BH_select.marginal_targets(selected)
 
-    assert(np.linalg.norm(observed_target - observed_target1) / 
+    assert(np.linalg.norm(observed_target - target_spec.observed_target) / 
            np.linalg.norm(observed_target) < 1.e-7)
-    assert(np.linalg.norm(cov_target - cov_target1) / 
+    assert(np.linalg.norm(cov_target - target_spec.cov_target) / 
            np.linalg.norm(cov_target) < 1.e-7)
-    assert(np.linalg.norm(cross_cov - cross_cov1) / np.linalg.norm(cross_cov) 
+    assert(np.linalg.norm(regress_target_score - target_spec.regress_target_score) / np.linalg.norm(regress_target_score)
            < 1.e-7)
 
     result = BH_select.selective_MLE(observed_target, cov_target, cross_cov)[0]
@@ -121,15 +118,9 @@ def test_BH(n=500,
         if nonzero is not None:
 
             if marginal:
-                (observed_target, 
-                 cov_target, 
-                 crosscov_target_score, 
-                 alternatives) = BH_select.marginal_targets(nonzero)
+                target_spec = BH_select.marginal_targets(nonzero)
             else:
-                (observed_target, 
-                 cov_target, 
-                 crosscov_target_score, 
-                 alternatives) = BH_select.full_targets(nonzero, dispersion=sigma**2)
+                target_spec = BH_select.full_targets(nonzero, dispersion=sigma**2)
                
             if marginal:
                 beta_target = true_mean[nonzero]
@@ -137,20 +128,14 @@ def test_BH(n=500,
                 beta_target = beta[nonzero]
 
             if use_MLE:
-                print('huh')
-                result = BH_select.selective_MLE(observed_target,
-                                                 cov_target,
-                                                 crosscov_target_score,
+                result = BH_select.selective_MLE(target_spec,
                                                  level=level)[0]
                 estimate = result['MLE']
                 pivots = ndist.cdf((estimate - beta_target) / result['SE'])
                 pivots = 2 * np.minimum(pivots, 1 - pivots)
                 # run summary
             else:
-                result = BH_select.summary(observed_target, 
-                                           cov_target, 
-                                           crosscov_target_score, 
-                                           alternatives,
+                result = BH_select.summary(target_spec,
                                            compute_intervals=True,
                                            level=level,
                                            ndraw=20000,
@@ -174,33 +159,5 @@ def test_both():
     test_BH(marginal=True)
     test_BH(marginal=False)
 
-def main(nsim=500, use_MLE=True, marginal=False):
-
-    import matplotlib.pyplot as plt
-    import statsmodels.api as sm
-    U = np.linspace(0, 1, 101)
-    P0, PA, cover, length_int = [], [], [], []
-    Ps = []
-    for i in range(nsim):
-        p0, pA, cover_, intervals, pivots = test_BH(use_MLE=use_MLE, 
-                                                    marginal=marginal)
-        Ps.extend(pivots)
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        print(np.mean(cover),'coverage so far')
-
-        period = 10
-        if use_MLE:
-            period = 50
-        if i % period == 0 and i > 0:
-            plt.clf()
-            if len(P0) > 0:
-                plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null')
-            plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt')
-            plt.plot(U, sm.distributions.ECDF(Ps)(U), 'tab:orange', label='pivot')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.legend()
-            plt.savefig('BH_pvals.pdf')
 
 
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 1b08b2235..7dc873368 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -1,7 +1,8 @@
 import numpy as np
 
 from ...tests.instance import gaussian_instance
-from ..lasso import lasso, selected_targets
+from ..lasso import lasso
+from ...base import selected_targets
 from ..approx_reference import approximate_grid_inference
 
 def test_inf(n=500,
@@ -12,7 +13,7 @@ def test_inf(n=500,
              rho=0.4,
              randomizer_scale=1.,
              equicorrelated=False,
-             useIP=False,
+             useIP=True,
              CI=False):
 
     inst, const = gaussian_instance, lasso.gaussian
@@ -53,19 +54,14 @@ def test_inf(n=500,
         if nonzero.sum() > 0:
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
-            (observed_target,
-             cov_target,
-             regress_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv._W,
-                                              nonzero,
-                                              dispersion=dispersion)
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=dispersion)
 
+            print(target_spec)
+            
             approximate_grid_inf = approximate_grid_inference(conv,
-                                                              observed_target,
-                                                              cov_target,
-                                                              regress_target_score,
+                                                              target_spec,
                                                               useIP=useIP)
 
             if CI is False:
diff --git a/selectinf/randomized/tests/test_drop_losers.py b/selectinf/randomized/tests/test_drop_losers.py
index 45bd3595d..5f212b740 100644
--- a/selectinf/randomized/tests/test_drop_losers.py
+++ b/selectinf/randomized/tests/test_drop_losers.py
@@ -109,7 +109,7 @@ def test_compare_topK(p=20,
     randomizer = randomization.gaussian(np.diag(np.array(full_std)**2 / np.array(n_1)) - 
                                         covariance)
 
-    randomized_topK = topK(full_means,
+    randomized_topK = topK(np.asarray(full_means),
                            covariance,
                            randomizer,
                            K,
@@ -117,11 +117,9 @@ def test_compare_topK(p=20,
 
     randomized_topK.fit(perturb=perturb)
 
-    (observed_target,
-     target_cov,
-     target_score_cov,
-     _) = randomized_topK.marginal_targets(randomized_topK.selection_variable['variables'])
-
+    target_spec = randomized_topK.marginal_targets(randomized_topK.selection_variable['variables'])
+    print('var', randomized_topK.selection_variable['variables'])
+    
     # try with a degenerate covariance now
 
     means2 = df2.groupby('arm').mean()['data'].iloc[range(p)]
@@ -135,52 +133,33 @@ def test_compare_topK(p=20,
                                        np.array(n_1)) - 
                                covariance2)
 
-    degenerate_topK = topK(means2,
+    degenerate_topK = topK(np.asarray(means2),
                            covariance2,
                            degenerate_randomizer,
                            K,
                            perturb=perturb2)
 
     np.random.seed(0)
-    summary1 = randomized_topK.summary(observed_target,
-                                       target_cov,
-                                       target_score_cov,
-                                       alternatives=['twosided']*K,
-                                       ndraw=10000,
-                                       burnin=2000,
-                                       compute_intervals=True)
+    summary1 = randomized_topK.selective_MLE(target_spec)[0]
     np.random.seed(0)
-    summary2 = dtl.summary(ndraw=10000,
-                           burnin=2000)
+    summary2 = dtl.MLE_inference()[0]
 
+
+    np.testing.assert_allclose(summary1['MLE'], summary2['MLE'], rtol=1.e-3)
     np.testing.assert_allclose(summary1['pvalue'], summary2['pvalue'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['target'], summary2['target'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['lower_confidence'], summary2['lower_confidence'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['upper_confidence'], summary2['upper_confidence'], rtol=1.e-3)
+    #np.testing.assert_allclose(summary1['lower_confidence'], summary2['lower_confidence'], rtol=1.e-3)
+    #np.testing.assert_allclose(summary1['upper_confidence'], summary2['upper_confidence'], rtol=1.e-3)
 
     np.random.seed(0)
     degenerate_topK.fit(perturb=perturb2)
-    summary3 = degenerate_topK.summary(observed_target,
-                                       target_cov,
-                                       target_score_cov,
-                                       alternatives=['twosided']*K,
+    summary3 = degenerate_topK.summary(target_spec,
                                        ndraw=10000,
                                        burnin=2000,
                                        compute_intervals=True)
     
     np.testing.assert_allclose(summary1['pvalue'], summary3['pvalue'], rtol=1.e-3)
     np.testing.assert_allclose(summary1['target'], summary3['target'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['lower_confidence'], summary3['lower_confidence'], rtol=1.e-3)
-    np.testing.assert_allclose(summary1['upper_confidence'], summary3['upper_confidence'], rtol=1.e-3)
-
+    #np.testing.assert_allclose(summary1['lower_confidence'], summary3['lower_confidence'], rtol=1.e-3)
+    #np.testing.assert_allclose(summary1['upper_confidence'], summary3['upper_confidence'], rtol=1.e-3)
 
-def main(nsim=100, use_MLE=True):
-
-    P0, cover = [], []
-    
-    for i in range(nsim):
-        p0, cover_ = test_drop_losers(use_MLE=use_MLE)
 
-        cover.extend(cover_)
-        P0.extend(p0)
-        print('coverage', np.mean(cover))
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 6e7e73f6d..534e4beaf 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -54,18 +54,12 @@ def test_inf(n=500,
         if nonzero.sum() > 0:
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
-            (observed_target,
-             cov_target,
-             regress_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=dispersion)
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=dispersion)
 
             exact_grid_inf = exact_grid_inference(conv,
-                                                  observed_target,
-                                                  cov_target,
-                                                  regress_target_score)
+                                                  target_spec)
 
             if CI is False:
                 pivot = exact_grid_inf._pivots(beta_target)
diff --git a/selectinf/randomized/tests/test_hiv_data.py b/selectinf/randomized/tests/test_hiv_data.py
new file mode 100644
index 000000000..4c3a741e8
--- /dev/null
+++ b/selectinf/randomized/tests/test_hiv_data.py
@@ -0,0 +1,117 @@
+import numpy as np
+import pandas as pd
+from scipy.stats import norm as ndist
+
+from ..lasso import split_lasso
+from ..posterior_inference import (langevin_sampler,
+                                   gibbs_sampler)
+
+from ...base import selected_targets
+from ...tests.instance import HIV_NRTI
+from ...tests.flags import SET_SEED, SMALL_SAMPLES
+from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
+
+@set_seed_iftrue(SET_SEED)
+@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10)
+def test_hiv_data(nsample=10000,
+                  nburnin=500,
+                  level=0.90,
+                  split_proportion=0.50,
+                  seedn=1):
+    np.random.seed(seedn)
+
+    alpha = (1 - level) / 2
+    Z_quantile = ndist.ppf(1 - alpha)
+
+    X, Y, _ = HIV_NRTI(standardize=True)
+    Y *= 15
+    n, p = X.shape
+    X /= np.sqrt(n)
+
+    ols_fit = np.linalg.pinv(X).dot(Y)
+    _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
+
+    const = split_lasso.gaussian
+
+    dispersion = _sigma ** 2
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma
+
+    conv = const(X,
+                 Y,
+                 W,
+                 proportion=split_proportion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    target_spec = selected_targets(conv.loglike,
+                                   conv.observed_soln,
+                                   dispersion=dispersion)
+
+    mle, inverse_info = conv.selective_MLE(target_spec,
+                                           level=level,
+                                           solve_args={'tol': 1.e-12})[:2]
+
+    approx_inf = conv.approximate_grid_inference(target_spec,
+                                                 useIP=True)
+
+    posterior_inf = conv.posterior(target_spec,
+                                   dispersion=dispersion)
+
+    samples_langevin = langevin_sampler(posterior_inf,
+                                        nsample=nsample,
+                                        nburnin=nburnin,
+                                        step=1.)
+
+    lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0)
+    upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0)
+
+    samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
+                                               nsample=nsample,
+                                               nburnin=nburnin)
+
+    lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0)
+    upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0)
+
+    naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
+    naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
+    naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
+                                 naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
+
+    X_split = X[~conv._selection_idx, :]
+    Y_split = Y[~conv._selection_idx]
+    split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
+    split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
+    split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
+                                 split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
+
+    print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
+          np.mean(upper_langevin - lower_langevin),
+          np.mean(upper_gibbs - lower_gibbs),
+          np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))),
+          np.mean(mle['upper_confidence'] - mle['lower_confidence']),
+          np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
+          )
+
+    print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]))
+
+    print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
+
+    scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100])
+    output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
+                           'Langevin_upper_credible': upper_langevin,
+                           'Gibbs_lower_credible': lower_gibbs,
+                           'Gibbs_upper_credible': upper_gibbs,
+                           'MLE_lower_confidence': mle['lower_confidence'],
+                           'MLE_upper_confidence': mle['upper_confidence'],
+                           'approx_lower_confidence': approx_inf['lower_confidence'],
+                           'approx_upper_confidence': approx_inf['upper_confidence'],
+                           'Split_lower_confidence': split_intervals[:, 0],
+                           'Split_upper_confidence': split_intervals[:, 1],
+                           'Naive_lower_confidence': naive_intervals[:, 0],
+                           'Naive_upper_confidence': naive_intervals[:, 1]
+                           })
+
+    return output, scale_interval, _sigma
+
diff --git a/selectinf/randomized/tests/test_lasso.py b/selectinf/randomized/tests/test_lasso.py
index 1ba443c29..07d1e9989 100644
--- a/selectinf/randomized/tests/test_lasso.py
+++ b/selectinf/randomized/tests/test_lasso.py
@@ -60,32 +60,17 @@ def test_highdim_lasso(n=500,
     nonzero = signs != 0
 
     if target == 'full':
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = full_targets(conv.loglike, 
-                                      conv.observed_soln)
+        target_spec = full_targets(conv.loglike, 
+                                   conv.observed_soln)
     elif target == 'selected':
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = selected_targets(conv.loglike, 
-                                          conv.observed_soln) 
+        target_spec = selected_targets(conv.loglike, 
+                                       conv.observed_soln) 
     elif target == 'debiased':
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = debiased_targets(conv.loglike, 
-                                          conv.observed_soln,
-                                          penalty=conv.penalty)
-
-    result = conv.summary(observed_target, 
-                          cov_target, 
-                          cov_target_score, 
-                          alternatives,
+        target_spec = debiased_targets(conv.loglike, 
+                                       conv.observed_soln,
+                                       penalty=conv.penalty)
+
+    result = conv.summary(target_spec,
                           ndraw=ndraw,
                           burnin=burnin, 
                           compute_intervals=True)
@@ -150,32 +135,17 @@ def test_AR_randomization(n=300,
     nonzero = signs != 0
 
     if target == 'full':
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = full_targets(conv.loglike, 
-                                      conv.observed_soln)
+        target_spec = full_targets(conv.loglike, 
+                                   conv.observed_soln)
     elif target == 'selected':
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = selected_targets(conv.loglike, 
-                                          conv.observed_soln)
+        target_spec = selected_targets(conv.loglike, 
+                                       conv.observed_soln)
     elif target == 'debiased':
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = debiased_targets(conv.loglike, 
-                                          conv.observed_soln,
-                                          penalty=conv.penalty)
-
-    result = conv.summary(observed_target, 
-                          cov_target, 
-                          cov_target_score, 
-                          alternatives,
+        target_spec = debiased_targets(conv.loglike, 
+                                       conv.observed_soln,
+                                       penalty=conv.penalty)
+
+    result = conv.summary(target_spec,
                           ndraw=ndraw,
                           burnin=burnin, 
                           compute_intervals=True)
@@ -275,24 +245,13 @@ def test_sqrt_highdim_lasso(n=500,
         np.testing.assert_allclose(soln, soln3)
 
     if full:
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = full_targets(conv.loglike, 
-                                      conv.observed_soln)
+        target_spec = full_targets(conv.loglike, 
+                                   conv.observed_soln)
     else:
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = selected_targets(conv.loglike, 
-                                          conv.observed_soln)
+        target_spec = selected_targets(conv.loglike, 
+                                       conv.observed_soln)
 
-    result = conv.summary(observed_target, 
-                          cov_target, 
-                          cov_target_score, 
-                          alternatives,
+    result = conv.summary(target_spec,
                           ndraw=ndraw,
                           burnin=burnin, 
                           compute_intervals=False)
@@ -427,23 +386,12 @@ def test_logistic_lasso(n=500,
     # sanity check
 
     if full:
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = full_targets(conv.loglike, 
-                                      conv.observed_soln)
+        target_spec = full_targets(conv.loglike, 
+                                   conv.observed_soln)
     else:
-        (observed_target, 
-         cov_target, 
-         cov_target_score, 
-         dispersion,
-         alternatives) = selected_targets(conv.loglike, 
+        target_spec = selected_targets(conv.loglike, 
                                           conv.observed_soln)
-    result = conv.summary(observed_target, 
-                          cov_target, 
-                          cov_target_score, 
-                          alternatives,
+    result = conv.summary(target_spec,
                           ndraw=ndraw,
                           burnin=burnin, 
                           compute_intervals=False)
diff --git a/selectinf/randomized/tests/test_marginal_screening.py b/selectinf/randomized/tests/test_marginal_screening.py
index e8dac39aa..50c769fb6 100644
--- a/selectinf/randomized/tests/test_marginal_screening.py
+++ b/selectinf/randomized/tests/test_marginal_screening.py
@@ -46,28 +46,15 @@ def test_marginal(n=500,
 
 
             if marginal:
-                (observed_target, 
-                 cov_target, 
-                 crosscov_target_score, 
-                 dispersion,
-                 alternatives) = marginal_select.marginal_targets(nonzero)
+                target_spec = marginal_select.marginal_targets(nonzero)
             else:
-                (observed_target, 
-                 cov_target, 
-                 crosscov_target_score, 
-                 dispersion,
-                 alternatives) = marginal_select.multivariate_targets(nonzero, dispersion=sigma**2)
+                target_spec = marginal_select.multivariate_targets(nonzero, dispersion=sigma**2)
 
             if use_MLE:
-                result = marginal_select.selective_MLE(observed_target,
-                                                       cov_target,
-                                                       crosscov_target_score)[0]
+                result = marginal_select.selective_MLE(target_spec)[0]
             # run summary
             else:
-                result = marginal_select.summary(observed_target, 
-                                                 cov_target, 
-                                                 crosscov_target_score, 
-                                                 alternatives,
+                result = marginal_select.summary(target_spec,
                                                  compute_intervals=True)
 
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -136,28 +123,19 @@ def test_simple(n=100,
 
         if nonzero.sum() > 0:
 
-            (observed_target, 
-             cov_target, 
-             crosscov_target_score, 
-             dispersion,
-             alternatives) = marginal_select.marginal_targets(nonzero)
+            target_spec = marginal_select.marginal_targets(nonzero)
 
             if use_MLE:
-                result = marginal_select.selective_MLE(observed_target,
-                                                       cov_target,
-                                                       crosscov_target_score)
+                result = marginal_select.selective_MLE(target_spec)
             # run summary
             else:
-                result = marginal_select.summary(observed_target, 
-                                                 cov_target, 
-                                                 crosscov_target_score, 
-                                                 alternatives,
+                result = marginal_select.summary(target_spec,
                                                  compute_intervals=True)
 
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
             print(pval)
-            beta_target = cov_target.dot(true_mean[nonzero])
+            beta_target = target_spec.cov_target.dot(true_mean[nonzero])
             print("beta_target and intervals", beta_target, intervals)
             coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
             print("coverage for selected target", coverage.sum()/float(nonzero.sum()))
@@ -166,25 +144,3 @@ def test_simple(n=100,
 def test_both():
     test_marginal(marginal=True)
     test_marginal(marginal=False)
-
-def main(nsim=1000, test_fn=test_marginal, use_MLE=False):
-
-    import matplotlib.pyplot as plt
-    import statsmodels.api as sm
-    U = np.linspace(0, 1, 101)
-    P0, PA, cover, length_int = [], [], [], []
-    for i in range(nsim):
-        p0, pA, cover_, intervals = test_fn(use_MLE=use_MLE)
-
-        cover.extend(cover_)
-        P0.extend(p0)
-        PA.extend(pA)
-        print(np.mean(cover),'coverage so far')
-
-        if i % 50 == 0 and i > 0:
-            plt.clf()
-            plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null')
-            plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig('marginal_screening_pvals.pdf')
-
diff --git a/selectinf/randomized/tests/test_multiple_queries.py b/selectinf/randomized/tests/test_multiple_queries.py
index 267b7e53b..a56a8a440 100644
--- a/selectinf/randomized/tests/test_multiple_queries.py
+++ b/selectinf/randomized/tests/test_multiple_queries.py
@@ -62,25 +62,15 @@ def test_multiple_queries(n=500,
     if nonzero.sum() == 0:
       return [], []
 
-    (observed_target1,
-     cov_target1,
-     cov_target_score1,
-     dispersion1,
-     alternatives1) = conv1.multivariate_targets(nonzero, sigma**2)
-
-    (observed_target2, 
-     cov_target2, 
-     cov_target_score2, 
-     dispersion2,
-     alternatives2) = selected_targets(conv2.loglike, 
-                                       conv2.observed_soln,
-                                       features=nonzero)
+    target_spec1 = conv1.multivariate_targets(nonzero, sigma**2)
+
+    target_spec2 = selected_targets(conv2.loglike, 
+                                    conv2.observed_soln,
+                                    features=nonzero)
 
     mq = multiple_queries([conv1, conv2])
 
-    results = mq.summary(observed_target1, 
-                         [(cov_target1, cov_target_score1), 
-                          (cov_target2, cov_target_score2)],
+    results = mq.summary([target_spec1, target_spec2],
                          compute_intervals=True)
     pval = np.asarray(results['pvalue'])
     return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0]
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 66780a5b4..b6c1c8ddb 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -53,19 +53,12 @@ def test_Langevin(n=500,
 
     beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives) = selected_targets(conv.loglike,
-                                      conv.observed_soln,
-                                      dispersion=dispersion)
-
-    posterior_inf = conv.posterior(observed_target,
-                                   cov_target,
-                                   regress_target_score,
+    target_spec = selected_targets(conv.loglike,
+                                   conv.observed_soln,
                                    dispersion=dispersion)
 
+    posterior_inf = conv.posterior(target_spec)
+
     samples = langevin_sampler(posterior_inf,
                                nsample=nsample,
                                nburnin=nburnin)
@@ -106,6 +99,7 @@ def test_coverage(nsim=100,
 @set_seed_iftrue(SET_SEED)
 @set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10)
 def test_instance(nsample=100, nburnin=50):
+    np.random.seed(10)
     n, p, s = 500, 100, 5
     X = np.random.standard_normal((n, p))
     beta = np.zeros(p)
@@ -122,19 +116,13 @@ def test_instance(nsample=100, nburnin=50):
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives)= selected_targets(L.loglike,
-                                     L.observed_soln,
-                                     features=M,
-                                     dispersion=dispersion)
-
-    posterior_inf = L.posterior(observed_target,
-                                cov_target,
-                                regress_target_score,
-                                dispersion=dispersion)
+    target_spec = selected_targets(L.loglike,
+                                   L.observed_soln,
+                                   features=M,
+                                   dispersion=dispersion)
+    print(target_spec.dispersion, dispersion)
+    
+    posterior_inf = L.posterior(target_spec)
 
     samples = langevin_sampler(posterior_inf,
                                nsample=nsample,
@@ -178,18 +166,14 @@ def test_flexible_prior1(nsample=100,
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives) = selected_targets(L.loglike,
-                                      L.observed_soln,
-                                      features=M,
-                                      dispersion=dispersion)
+    target_spec = selected_targets(L.loglike,
+                                   L.observed_soln,
+                                   features=M,
+                                   dispersion=dispersion)
 
     # default prior
 
-    Di = 1. / (200 * np.diag(cov_target))
+    Di = 1. / (200 * np.diag(target_spec.cov_target))
 
     def prior(target_parameter):
         grad_prior = -target_parameter * Di
@@ -200,10 +184,7 @@ def prior(target_parameter):
     np.random.set_state(seed_state)
     Z1 = np.random.standard_normal()
 
-    posterior_inf1 = L.posterior(observed_target,
-                                 cov_target,
-                                 regress_target_score,
-                                 dispersion=dispersion,
+    posterior_inf1 = L.posterior(target_spec,
                                  prior=prior)
 
     W1 = np.random.standard_normal()
@@ -213,10 +194,7 @@ def prior(target_parameter):
 
     np.random.set_state(seed_state)
     Z2 = np.random.standard_normal()
-    posterior_inf2 = L.posterior(observed_target,
-                                 cov_target,
-                                 regress_target_score,
-                                 dispersion=dispersion)
+    posterior_inf2 = L.posterior(target_spec)
 
     W2 = np.random.standard_normal()
     samples2 = langevin_sampler(posterior_inf2,
@@ -248,14 +226,10 @@ def test_flexible_prior2(nsample=1000, nburnin=50):
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives) = selected_targets(L.loglike,
-                                      L.observed_soln,
-                                      features=M,
-                                      dispersion=dispersion)
+    target_spec = selected_targets(L.loglike,
+                                   L.observed_soln,
+                                   features=M,
+                                   dispersion=dispersion)
 
     prior_var = 0.05 ** 2
 
@@ -264,11 +238,8 @@ def prior(target_parameter):
         log_prior = -np.linalg.norm(target_parameter) ** 2 / (2. * prior_var)
         return log_prior, grad_prior
 
-    posterior_inf = L.posterior(observed_target,
-                                 cov_target,
-                                 regress_target_score,
-                                 dispersion=dispersion,
-                                 prior=prior)
+    posterior_inf = L.posterior(target_spec,
+                                prior=prior)
 
     adaptive_proposal = np.linalg.inv(np.linalg.inv(posterior_inf.inverse_info) +
                                       np.identity(posterior_inf.inverse_info.shape[0]) / 0.05 ** 2)
@@ -279,118 +250,4 @@ def prior(target_parameter):
     return samples
 
 
-@set_seed_iftrue(SET_SEED)
-@set_sampling_params_iftrue(SMALL_SAMPLES, nsample=50, nburnin=10)
-def test_hiv_data(nsample=10000,
-                  nburnin=500,
-                  level=0.90,
-                  split_proportion=0.50,
-                  seedn=1):
-    np.random.seed(seedn)
-
-    alpha = (1 - level) / 2
-    Z_quantile = ndist.ppf(1 - alpha)
-
-    X, Y, _ = HIV_NRTI(standardize=True)
-    Y *= 15
-    n, p = X.shape
-    X /= np.sqrt(n)
-
-    ols_fit = np.linalg.pinv(X).dot(Y)
-    _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
-
-    const = split_lasso.gaussian
-
-    dispersion = _sigma ** 2
-
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma
-
-    conv = const(X,
-                 Y,
-                 W,
-                 proportion=split_proportion)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives) = selected_targets(conv.loglike,
-                                      conv.observed_soln,
-                                      dispersion=dispersion)
-
-    mle, inverse_info = conv.selective_MLE(observed_target,
-                                           cov_target,
-                                           regress_target_score,
-                                           level=level,
-                                           solve_args={'tol': 1.e-12})[:2]
-
-    approx_inf = conv.approximate_grid_inference(observed_target,
-                                                 cov_target,
-                                                 regress_target_score,
-                                                 useIP=False)
-
-    posterior_inf = conv.posterior(observed_target,
-                                   cov_target,
-                                   regress_target_score,
-                                   dispersion=dispersion)
-
-    samples_langevin = langevin_sampler(posterior_inf,
-                                        nsample=nsample,
-                                        nburnin=nburnin,
-                                        step=1.)
-
-    lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0)
-    upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0)
-
-    samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
-                                               nsample=nsample,
-                                               nburnin=nburnin)
-
-    lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0)
-    upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0)
-
-    naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
-    naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
-    naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
-                                 naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
-
-    X_split = X[~conv._selection_idx, :]
-    Y_split = Y[~conv._selection_idx]
-    split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
-    split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
-    split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
-                                 split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
-
-    print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
-          np.mean(upper_langevin - lower_langevin),
-          np.mean(upper_gibbs - lower_gibbs),
-          np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))),
-          np.mean(mle['upper_confidence'] - mle['lower_confidence']),
-          np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
-          )
-
-    print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]))
-
-    print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
-
-    scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100])
-    output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
-                           'Langevin_upper_credible': upper_langevin,
-                           'Gibbs_lower_credible': lower_gibbs,
-                           'Gibbs_upper_credible': upper_gibbs,
-                           'MLE_lower_confidence': mle['lower_confidence'],
-                           'MLE_upper_confidence': mle['upper_confidence'],
-                           'approx_lower_confidence': approx_inf['lower_confidence'],
-                           'approx_upper_confidence': approx_inf['upper_confidence'],
-                           'Split_lower_confidence': split_intervals[:, 0],
-                           'Split_upper_confidence': split_intervals[:, 1],
-                           'Naive_lower_confidence': naive_intervals[:, 0],
-                           'Naive_upper_confidence': naive_intervals[:, 1]
-                           })
-
-    return output, scale_interval, _sigma
-
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 818cdc012..552d2b9ce 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -65,29 +65,18 @@ def test_full_targets(n=200,
                 dispersion = None
 
             if n > p:
-                (observed_target,
-                 cov_target,
-                 regress_target_score,
-                 dispersion,
-                 alternatives) = full_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              nonzero,
-                                              dispersion=dispersion)
+                target_spec = full_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           nonzero,
+                                           dispersion=dispersion)
             else:
-                (observed_target,
-                 cov_target,
-                 regress_target_score,
-                 dispersion,
-                 alternatives) = debiased_targets(conv.loglike,
-                                                  conv.observed_soln,
-                                                  nonzero,
-                                                  penalty=conv.penalty,
-                                                  dispersion=dispersion)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        regress_target_score,
-                                        dispersion)[0]
+                target_spec = debiased_targets(conv.loglike,
+                                               conv.observed_soln,
+                                               nonzero,
+                                               penalty=conv.penalty,
+                                               dispersion=dispersion)
+
+            result = conv.selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
             estimate = result['MLE']
@@ -147,17 +136,11 @@ def test_selected_targets(n=2000,
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-            (observed_target,
-             cov_target,
-             regress_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=dispersion)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        regress_target_score,
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=dispersion)
+
+            result = conv.selective_MLE(target_spec,
                                         dispersion)[0]
 
             pval = result['pvalue']
@@ -188,20 +171,14 @@ def test_instance():
     print("check ", M)
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives) = selected_targets(L.loglike,
-                                      L.observed_soln,
-                                      features=M,
-                                      dispersion=dispersion)
+    target_spec = selected_targets(L.loglike,
+                                   L.observed_soln,
+                                   features=M,
+                                   dispersion=dispersion)
 
-    print("check shapes", observed_target.shape, E.sum())
+    print("check shapes", target_spec.observed_target.shape, E.sum())
 
-    result = L.selective_MLE(observed_target,
-                             cov_target,
-                             regress_target_score,
+    result = L.selective_MLE(target_spec,
                              dispersion)[0]
 
     intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -256,17 +233,11 @@ def test_selected_targets_disperse(n=500,
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-            (observed_target,
-             cov_target,
-             regress_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=dispersion)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        regress_target_score,
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=dispersion)
+
+            result = conv.selective_MLE(target_spec,
                                         dispersion)[0]
 
             pval = result['pvalue']
@@ -317,17 +288,11 @@ def test_logistic(n=2000,
 
         if nonzero.sum() > 0:
 
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=1)
+
+            result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -373,17 +338,11 @@ def test_logistic_split(n=2000,
 
         if nonzero.sum() > 0:
 
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=1)
+
+            result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -429,17 +388,11 @@ def test_poisson(n=2000,
 
         if nonzero.sum() > 0:
 
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=1)
+
+            result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -485,17 +438,11 @@ def test_poisson_split(n=2000,
 
         if nonzero.sum() > 0:
 
-            (observed_target,
-             cov_target,
-             cov_target_score,
-             dispersion,
-             alternatives) = selected_targets(conv.loglike,
-                                              conv.observed_soln,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
+            target_spec = selected_targets(conv.loglike,
+                                           conv.observed_soln,
+                                           dispersion=1)
+
+            result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -544,17 +491,11 @@ def test_cox(n=2000,
             cox_full = rr.glm.cox(X, T, S)
             full_hess = cox_full.hessian(conv.observed_soln)
 
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             dispersion,
-             alternatives) = selected_targets(conv.loglike, 
-                                              conv.observed_soln,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
+            target_spec = selected_targets(conv.loglike, 
+                                           conv.observed_soln,
+                                           dispersion=1)
+
+            result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -603,17 +544,11 @@ def test_cox_split(n=2000,
             cox_full = rr.glm.cox(X, T, S)
             full_hess = cox_full.hessian(conv.observed_soln)
 
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             dispersion,
-             alternatives) = selected_targets(conv.loglike, 
-                                              conv.observed_soln,
-                                              dispersion=1)
-
-            result = conv.selective_MLE(observed_target,
-                                        cov_target,
-                                        cov_target_score)[0]
+            target_spec = selected_targets(conv.loglike, 
+                                           conv.observed_soln,
+                                           dispersion=1)
+
+            result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -669,22 +604,16 @@ def test_scale_invariant_split(n=200,
         print('feature_weights', conv.feature_weights[0] / scale)
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         dispersion,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv.observed_soln,
-                                          dispersion=dispersion)
-
-        print('dispersion', dispersion/scale**2)
-        print('target', observed_target[0]/scale)
-        print('cov_target', cov_target[0,0]/scale**2)
-        print('cov_target_score',  cov_target_score[0,0]/scale**2)
+        target_spec = selected_targets(conv.loglike,
+                                       conv.observed_soln,
+                                       dispersion=dispersion)
+
+        print('dispersion', target_spec.dispersion/scale**2)
+        print('target', target_spec.observed_target[0]/scale)
+        print('cov_target', target_spec.cov_target[0,0]/scale**2)
+        print('regress_target_score',  target_spec.regress_target_score[0,0]/scale**2)
         
-        result = conv.selective_MLE(observed_target,
-                                    cov_target,
-                                    cov_target_score)[0]
+        result = conv.selective_MLE(target_spec)[0]
 
         print(result['MLE'] / scale)
         results.append(result)
@@ -751,22 +680,16 @@ def test_scale_invariant(n=200,
         print('perturb', conv._initial_omega[0] / scale)
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
-        (observed_target,
-         cov_target,
-         cov_target_score,
-         dispersion,
-         alternatives) = selected_targets(conv.loglike,
-                                          conv.observed_soln,
-                                          dispersion=dispersion)
-
-        print('dispersion', dispersion/scale**2)
-        print('target', observed_target[0]/scale)
-        print('cov_target', cov_target[0,0]/scale**2)
-        print('cov_target_score',  cov_target_score[0,0]/scale**2)
+        target_spec = selected_targets(conv.loglike,
+                                       conv.observed_soln,
+                                       dispersion=dispersion)
+
+        print('dispersion', target_spec.dispersion/scale**2)
+        print('target', target_spec.observed_target[0]/scale)
+        print('cov_target', target_spec.cov_target[0,0]/scale**2)
+        print('regress_target_score',  target_spec.regress_target_score[0,0]/scale**2)
         
-        result = conv.selective_MLE(observed_target,
-                                    cov_target,
-                                    cov_target_score)[0]
+        result = conv.selective_MLE(target_spec)[0]
 
         print(result['MLE'] / scale)
         results.append(result)
diff --git a/selectinf/randomized/tests/test_selective_MLE_onedim.py b/selectinf/randomized/tests/test_selective_MLE_onedim.py
index 33599a725..dd7ded2ff 100644
--- a/selectinf/randomized/tests/test_selective_MLE_onedim.py
+++ b/selectinf/randomized/tests/test_selective_MLE_onedim.py
@@ -4,7 +4,8 @@
 from scipy.stats import norm as ndist
 import nose.tools as nt
 
-from ..lasso import lasso, full_targets
+from ..lasso import lasso
+from ...base import full_targets, TargetSpec
 from ...tests.instance import gaussian_instance
 
 def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=2, randomizer_scale=1):
@@ -28,18 +29,11 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=2, randomizer_scale=1):
 
             # this is current code where we estimate sigma
 
-            (observed_target, 
-             cov_target, 
-             regress_target_score, 
-             dispersion,
-             alternatives) = full_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero)
+            target_spec = full_targets(conv.loglike, 
+                                       conv.observed_soln,
+                                       features=nonzero)
             
-            result = conv.selective_MLE(observed_target, 
-                                        cov_target, 
-                                        regress_target_score,
-                                        np.ones((1,)) * signs[0])
+            result = conv.selective_MLE(target_spec)
 
             estimate_cur = float(result[0]['MLE'])
             Z_cur = float(result[0]['Zvalue'])
@@ -50,11 +44,13 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=2, randomizer_scale=1):
 
             target_Z = X.T.dot(Y) / np.sqrt((X**2).sum(0))
 
-            result2 = conv.sampler.selective_MLE(target_Z, 
-                                                 sigma**2 * np.ones((1,1)), 
-                                                 -np.ones((1,1)) / np.sqrt((X**2).sum(0)), 
+            target = TargetSpec(target_Z,
+                                sigma**2 * np.ones((1,1)),
+                                -np.ones((1,1)) / np.sqrt((X**2).sum(0)), 
+                                ['greater'],
+                                sigma**2)
+            result2 = conv.sampler.selective_MLE(target,
                                                  np.ones((1,)) * signs[0],
-                                                 dispersion=sigma**2,
                                                  solve_args={'tol':1.e-12})
             estimate, I, Z, pv = (float(result2[0]['MLE']),
                                   result2[1],
@@ -75,7 +71,7 @@ def test_onedim_lasso(n=50000, W=1.5, signal=2., sigma=2, randomizer_scale=1):
 
             pivot = ndist.cdf((estimate_cur - signal) / np.sqrt(I_cur[0,0]))
 
-            debug = Falsee
+            debug = False
             if debug:
                 print(estimate, approx_MLE, 'selective MLE')
                 print(beta[nonzero], 'truth')
diff --git a/selectinf/randomized/tests/test_slope.py b/selectinf/randomized/tests/test_slope.py
index 65cc553c7..1545d0f47 100644
--- a/selectinf/randomized/tests/test_slope.py
+++ b/selectinf/randomized/tests/test_slope.py
@@ -152,21 +152,13 @@ def test_randomized_slope(n=2000,
         if nonzero.sum() > 0:
 
             if target == 'full':
-                (observed_target, 
-                 cov_target, 
-                 cov_target_score, 
-                 dispersion,
-                 alternatives) = full_targets(conv.loglike, 
-                                              conv.observed_soln,
-                                              dispersion=sigma_)
+                target_spec = full_targets(conv.loglike, 
+                                           conv.observed_soln,
+                                           dispersion=sigma_)
             elif target == 'selected':
-                (observed_target, 
-                 cov_target, 
-                 cov_target_score, 
-                 dispersion,
-                 alternatives) = selected_targets(conv.loglike, 
-                                                  conv.observed_soln,
-                                                  dispersion=sigma_)
+                target_spec = selected_targets(conv.loglike, 
+                                               conv.observed_soln,
+                                               dispersion=sigma_)
 
             if target == "selected":
                 beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
@@ -174,14 +166,9 @@ def test_randomized_slope(n=2000,
                 beta_target = beta[nonzero]
             if use_MLE:
 
-                result = conv.selective_MLE(observed_target, 
-                                            cov_target, 
-                                            cov_target_score)[0]
+                result = conv.selective_MLE(target_spec)[0]
             else:
-                result = conv.summary(observed_target, 
-                                      cov_target, 
-                                      cov_target_score, 
-                                      alternatives, 
+                result = conv.summary(target_spec,
                                       compute_intervals=True,
                                       ndraw=150000)
             pval = np.asarray(result['pvalue'])
diff --git a/selectinf/randomized/tests/test_split_lasso.py b/selectinf/randomized/tests/test_split_lasso.py
index f994c05cc..0e0bd855e 100644
--- a/selectinf/randomized/tests/test_split_lasso.py
+++ b/selectinf/randomized/tests/test_split_lasso.py
@@ -7,10 +7,10 @@
 
 import regreg.api as rr
 
-from ..lasso import (split_lasso, 
-                     selected_targets, 
+from ..lasso import split_lasso 
+from ...base import (selected_targets, 
                      full_targets, 
-                     debiased_targets)
+                     debiased_targets)                     
 from ...tests.instance import gaussian_instance
 from ...tests.flags import SET_SEED
 from ...tests.decorators import set_sampling_params_iftrue, set_seed_iftrue
@@ -66,44 +66,26 @@ def test_split_lasso(n=100,
     if nonzero.sum() > 0:
 
         if target == 'full':
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             alternatives) = full_targets(conv.loglike, 
-                                          conv._W, 
-                                          nonzero,
-                                          dispersion=sigma**2)
+            target_spec = full_targets(conv.loglike, 
+                                       conv.observed_soln,
+                                       dispersion=sigma**2)
         elif target == 'selected':
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             alternatives) = selected_targets(conv.loglike, 
-                                              conv._W, 
-                                              nonzero) #,
-                                              #dispersion=sigma**2)
+            target_spec = selected_targets(conv.loglike, 
+                                           conv.observed_soln,
+                                           dispersion=sigma**2)
 
         elif target == 'debiased':
-            (observed_target, 
-             cov_target, 
-             cov_target_score, 
-             alternatives) = debiased_targets(conv.loglike, 
-                                              conv._W, 
-                                              nonzero,
-                                              penalty=conv.penalty,
-                                              dispersion=sigma**2)
-
-        result = conv.summary(observed_target, 
-                              cov_target, 
-                              cov_target_score, 
-                              alternatives,
+            target_spec = debiased_targets(conv.loglike, 
+                                           conv.observed_soln,
+                                           penalty=conv.penalty,
+                                           dispersion=sigma**2)
+
+        result = conv.summary(target_spec,
                               ndraw=ndraw,
                               burnin=burnin, 
                               compute_intervals=False)
 
-        MLE_result, observed_info_mean, _ = conv.selective_MLE(
-            observed_target,
-            cov_target,
-            cov_target_score)
+        MLE_result, observed_info_mean, _ = conv.selective_MLE(target_spec)
 
         final_estimator = np.asarray(MLE_result['MLE'])
         pval = np.asarray(result['pvalue'])
@@ -134,32 +116,3 @@ def test_all_targets(n=100, p=20, signal_fac=1.5, s=5, sigma=3, rho=0.4):
                          rho=rho, 
                          target=target)
 
-def main(nsim=500, n=100, p=200, target='selected', sigma=3, s=3):
-
-    import matplotlib.pyplot as plt
-    P0, PA = [], []
-    from statsmodels.distributions import ECDF
-
-    for i in range(nsim):
-        p0, pA = test_split_lasso(n=n, p=p, target=target, sigma=sigma, s=s)
-        print(len(p0), len(pA))
-        if not (len(pA) < s and target=='selected'):
-            P0.extend(p0)
-            PA.extend(pA)
-
-        P0_clean = np.array(P0)
-        
-        P0_clean = P0_clean[P0_clean > 1.e-5] # 
-        print(np.mean(P0_clean), np.std(P0_clean), np.mean(np.array(PA) < 0.05), np.sum(np.array(PA) < 0.05) / (i+1), np.mean(np.array(P0) < 0.05), np.mean(P0_clean < 0.05), np.mean(np.array(P0) < 1e-5), 'null pvalue + power + failure')
-    
-        if i % 3 == 0 and i > 0:
-            U = np.linspace(0, 1, 101)
-            plt.clf()
-            if len(P0_clean) > 0:
-                plt.plot(U, ECDF(P0_clean)(U))
-            if len(PA) > 0:
-                plt.plot(U, ECDF(PA)(U), 'r')
-            plt.plot([0, 1], [0, 1], 'k--')
-            plt.savefig("plot.pdf")
-    plt.show()
-
diff --git a/selectinf/randomized/tests/test_standalone_lasso_mle.py b/selectinf/randomized/tests/test_standalone_lasso_mle.py
index 4151fa8a4..5482460da 100644
--- a/selectinf/randomized/tests/test_standalone_lasso_mle.py
+++ b/selectinf/randomized/tests/test_standalone_lasso_mle.py
@@ -5,9 +5,10 @@
 
 import regreg.api as rr
 
-from selectinf.randomized.lasso import split_lasso, selected_targets
-from selectinf.randomized.query import selective_MLE
-from selectinf.randomized.approx_reference import approximate_grid_inference
+from ..lasso import split_lasso
+from ...base import selected_targets, TargetSpec
+from ..query import selective_MLE
+from ..approx_reference import approximate_grid_inference
 
 def test_standalone_inference(n=2000, 
                               p=100, 
@@ -46,20 +47,14 @@ def test_standalone_inference(n=2000,
     full_hess = cox_full.hessian(padded_soln)
     selected_hess = full_hess[nonzero][:,nonzero]
 
-    (observed_target, 
-     cov_target, 
-     cov_target_score, 
-     alternatives) = selected_targets(cox_lasso.loglike, 
-                                      None,
-                                      nonzero,
-                                      hessian=full_hess,
-                                      dispersion=1)
+    target_spec = selected_targets(cox_lasso.loglike, 
+                                   cox_lasso.observed_soln,
+                                   hessian=full_hess,
+                                   dispersion=1)
 
     if nonzero.sum(): 
         if approx:
-            approx_result = cox_lasso.approximate_grid_inference(observed_target, 
-                                                                 cov_target, 
-                                                                 cov_target_score)
+            approx_result = cox_lasso.approximate_grid_inference(target_spec)
             approx_pval = approx_result['pvalue']
 
             testval = approximate_normalizer_inference(proportion,
@@ -75,9 +70,7 @@ def test_standalone_inference(n=2000,
             approx_pval = np.empty(nonzero.sum())*np.nan
 
         if MLE:
-            MLE_result = cox_lasso.selective_MLE(observed_target, 
-                                                 cov_target, 
-                                                 cov_target_score)[0]
+            MLE_result = cox_lasso.selective_MLE(target_spec)[0]
             MLE_pval = MLE_result['pvalue']
         else:
             MLE_pval = np.empty(nonzero.sum())*np.nan
@@ -125,9 +118,12 @@ def approximate_mle_inference(training_proportion,
     target_score_cov = -np.identity(nselect)
     observed_target = selected_beta_refit
     
-    result = selective_MLE(observed_target, 
-                           target_cov,
-                           target_score_cov, 
+    target_spec = selected_targets(cox_lasso.loglike, 
+                                   cox_lasso.observed_soln,
+                                   hessian=full_hess,
+                                   dispersion=1)
+
+    result = selective_MLE(target_spec,
                            training_betahat * selected_signs,
                            cond_mean,
                            cond_cov,
@@ -168,9 +164,12 @@ def approximate_normalizer_inference(training_proportion,
     target_score_cov = -np.identity(nselect)
     observed_target = selected_beta_refit
     
-    inverse_info = selective_MLE(observed_target, 
-                                 target_cov,
-                                 target_score_cov, 
+    target = TargetSpec(observed_target,
+                        target_cov,
+                        target_score_cov,
+                        None)
+
+    inverse_info = selective_MLE(target_spec,
                                  training_betahat * selected_signs,
                                  cond_mean,
                                  cond_cov,
@@ -180,9 +179,7 @@ def approximate_normalizer_inference(training_proportion,
                                  level=level,
                                  useC=True)[1]
 
-    G = approximate_grid_inference(observed_target,
-                                   target_cov,
-                                   target_score_cov,
+    G = approximate_grid_inference(target_spec,
                                    inverse_info,
                                    training_betahat * selected_signs,
                                    cond_mean,
diff --git a/selectinf/randomized/tests/test_topK.py b/selectinf/randomized/tests/test_topK.py
index 45dbb54b9..2c1def227 100644
--- a/selectinf/randomized/tests/test_topK.py
+++ b/selectinf/randomized/tests/test_topK.py
@@ -46,28 +46,15 @@ def test_topK(n=500,
         if nonzero.sum() > 0:
 
             if marginal:
-                (observed_target, 
-                 cov_target, 
-                 crosscov_target_score, 
-                 dipsersion,
-                 alternatives) = topK_select.marginal_targets(nonzero)
+                target_spec = topK_select.marginal_targets(nonzero)
             else:
-                (observed_target, 
-                 cov_target, 
-                 crosscov_target_score, 
-                 dispersion,
-                 alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
+                target_spec = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
                
             if use_MLE:
-                result = topK_select.selective_MLE(observed_target,
-                                                   cov_target,
-                                                   crosscov_target_score)[0]
+                result = topK_select.selective_MLE(target_spec)[0]
             # run summary
             else:
-                result = topK_select.summary(observed_target, 
-                                             cov_target, 
-                                             crosscov_target_score, 
-                                             alternatives,
+                result = topK_select.summary(target_spec,
                                              compute_intervals=True)
             lower = np.asarray(result['lower_confidence'])
             upper = np.asarray(result['upper_confidence'])
@@ -128,22 +115,12 @@ def test_bias_topK(n=500,
 
             if marginal:
                 beta_target = true_mean[nonzero]
-                (observed_target,
-                 cov_target,
-                 crosscov_target_score,
-                 dispersion,
-                 alternatives) = topK_select.marginal_targets(nonzero)
+                target_spec = topK_select.marginal_targets(nonzero)
             else:
                 beta_target = beta[nonzero]
-                (observed_target,
-                 cov_target,
-                 crosscov_target_score,
-                 dispersion,
-                 alternatives) = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
+                target_spec = topK_select.multivariate_targets(nonzero, dispersion=sigma**2)
 
-            result = topK_select.selective_MLE(observed_target,
-                                               cov_target,
-                                               crosscov_target_score)[0]
+            result = topK_select.selective_MLE(target_spec)[0]
 
             bias_mle = np.asarray(result['MLE'])-beta_target
             bias_indest = np.asarray(result['unbiased'])-beta_target
@@ -151,45 +128,3 @@ def test_bias_topK(n=500,
 
             return bias_mle, bias_indest
 
-
-# def main(nsim=5000, use_MLE=False):
-#
-#     import matplotlib.pyplot as plt
-#     import statsmodels.api as sm
-#     U = np.linspace(0, 1, 101)
-#
-#     P0, PA, cover, length_int = [], [], [], []
-#     for i in range(nsim):
-#         p0, pA, cover_, intervals = test_topK(use_MLE=use_MLE)
-#
-#         cover.extend(cover_)
-#         P0.extend(p0)
-#         PA.extend(pA)
-#         print(np.mean(cover),'coverage so far')
-#
-#         period = 10
-#         if use_MLE:
-#             period = 50
-#         if i % period == 0 and i > 0:
-#             plt.clf()
-#             plt.plot(U, sm.distributions.ECDF(P0)(U), 'b', label='null')
-#             plt.plot(U, sm.distributions.ECDF(PA)(U), 'r', label='alt')
-#             plt.plot([0, 1], [0, 1], 'k--')
-#             plt.legend()
-#             plt.savefig('topK_pvals.pdf')
-
-
-def main(nsim=500):
-    _bias_mle = []
-    _bias_indest = []
-
-    for i in range(nsim):
-        bias_mle, bias_indest = test_bias_topK()
-        _bias_mle.extend(bias_mle)
-        _bias_indest.extend(bias_indest)
-
-        print(np.mean(_bias_mle), np.mean(_bias_indest), 'bias so far: mle and independent estimate ')
-
-
-if __name__ == "__main__":
-    main(nsim=500)
diff --git a/selectinf/randomized/tests/test_unbiased_estimates.py b/selectinf/randomized/tests/test_unbiased_estimates.py
index eb8beac0d..a7a91cb41 100644
--- a/selectinf/randomized/tests/test_unbiased_estimates.py
+++ b/selectinf/randomized/tests/test_unbiased_estimates.py
@@ -1,6 +1,7 @@
 import numpy as np
 
-from ..lasso import lasso, selected_targets
+from ..lasso import lasso
+from ...base import selected_targets
 from ...tests.instance import gaussian_instance
 
 def UMVU(query,
@@ -31,8 +32,8 @@ def UMVU(query,
     _prec = np.linalg.inv(implied_cov[:n][:, :n])
 
     linear_coef = (np.linalg.pinv(X[:, feat]).dot(_prec))
-    offset = -np.linalg.pinv(X[:, feat]).dot(X.dot(query.initial_subgrad)
-                                             - _prec.dot(implied_cov[:n][:, n:]).dot(query.opt_linear.T.dot(query.initial_subgrad))) * (randomizer_prec)
+    offset = -np.linalg.pinv(X[:, feat]).dot(X.dot(query.observed_subgrad)
+                                             - _prec.dot(implied_cov[:n][:, n:]).dot(query.opt_linear.T.dot(query.observed_subgrad))) * (randomizer_prec)
 
     linear_coef *= dispersion
     offset *= dispersion
@@ -47,24 +48,20 @@ def EST(query,
         feat,
         dispersion):
 
-    (observed_target,
-     cov_target,
-     cov_target_score,
-     alternatives) = selected_targets(query.loglike,
-                                      query._W,
-                                      feat,
-                                      dispersion=dispersion)
+    target_spec = selected_targets(query.loglike,
+                                   query.observed_soln,
+                                   dispersion=dispersion)
 
     _, randomizer_prec = query.randomizer.cov_prec
     cond_cov = query.cond_cov
     logdens_linear = query.sampler.logdens_transform[0]
     cond_mean = query.cond_mean
 
-    prec_target = np.linalg.inv(cov_target)
+    prec_target = np.linalg.inv(target_spec.cov_target)
     prec_opt = np.linalg.inv(cond_cov)
 
-    target_linear = cov_target_score.T.dot(prec_target)
-    target_offset = (-X.T.dot(Y) + query.initial_subgrad) - target_linear.dot(observed_target)
+    target_linear = target_spec.regress_target_score.T.dot(prec_target) #XXX problem here just switched cov_target_score to regress_target_score
+    target_offset = (-X.T.dot(Y) + query.observed_subgrad) - target_linear.dot(target_spec.observed_target)
 
     target_lin = - logdens_linear.dot(target_linear)
     target_off = cond_mean - target_lin.dot(observed_target)
@@ -141,10 +138,3 @@ def test_UMVU(n=500,
             print("check ", np.allclose(est-umvu, np.zeros(est.shape[0]), atol=1e-03), est-umvu)
 
             return umvu, est
-
-def main():
-
-    test_UMVU(n=100, p=400, s=5)
-
-if __name__ == "__main__":
-    main()

From 3320bd8a5ccfb613890659a709c0ba5694898e72 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 31 Aug 2021 00:16:52 -0400
Subject: [PATCH 141/187] test scale for posterior log likelihood

---
 selectinf/randomized/approx_reference.py      |   8 +-
 selectinf/randomized/query.py                 | 104 +++++++--------
 .../randomized/tests/test_approx_reference.py |  62 ++++++++-
 selectinf/randomized/tests/test_posterior.py  | 120 ++++++++++++++++++
 4 files changed, 236 insertions(+), 58 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 7d10c4ef1..31c3b88e3 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -172,7 +172,7 @@ def _construct_families(self):
         self._construct_density()
 
         self._families = []
-
+        _log_ref = np.zeros((self.ntarget, 1000))
         for m in range(self.ntarget):
 
             observed_target_uni = (self.observed_target[m]).reshape((1,))
@@ -189,6 +189,7 @@ def _construct_families(self):
 
                 logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
                 logW -= logW.max()
+                _log_ref[m,:] = logW
                 self._families.append(discrete_family(self.stat_grid[m],
                                                       np.exp(logW)))
             else:
@@ -204,10 +205,11 @@ def _construct_families(self):
                         0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
 
                 logW -= logW.max()
+                _log_ref[m, :] = logW
                 self._families.append(discrete_family(grid,
                                                       np.exp(logW)))
 
-
+        self._log_ref = _log_ref
             # construction of families follows `selectinf.learning.core`
 
             # logG = - 0.5 * grid**2 / var_target
@@ -253,7 +255,7 @@ def _approx_pivots(self,
                 pivot.append(_cdf)
             else:
                 raise ValueError('alternative should be in ["twosided", "less", "greater"]')
-        return pivot
+        return pivot, self._log_ref
 
     def _approx_intervals(self,
                           level=0.9):
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index b2cd82373..a7d1908f9 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -106,59 +106,59 @@ def fit(self, perturb=None):
 
     # Private methods
 
-    def _setup_sampler(self,
-                       linear_part,
-                       offset,
-                       opt_linear,
-                       observed_subgrad,
-                       dispersion=1):
-
-        A, b = linear_part, offset
-        if not np.all(A.dot(self.observed_opt_state) - b <= 0):
-            raise ValueError('constraints not satisfied')
-
-        (cond_mean,
-         cond_cov,
-         cond_precision,
-         regress_opt,
-         M1,
-         M2,
-         M3) = self._setup_implied_gaussian(opt_linear,
-                                            observed_subgrad,
-                                            dispersion=dispersion)
+        def _setup_sampler(self,
+                           linear_part,
+                           offset,
+                           opt_linear,
+                           observed_subgrad,
+                           dispersion=1):
+
+            A, b = linear_part, offset
+            if not np.all(A.dot(self.observed_opt_state) - b <= 0):
+                raise ValueError('constraints not satisfied')
+
+            (cond_mean,
+             cond_cov,
+             cond_precision,
+             regress_opt,
+             M1,
+             M2,
+             M3) = self._setup_implied_gaussian(opt_linear,
+                                                observed_subgrad,
+                                                dispersion=dispersion)
+
+            def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
+                if score.ndim == 1:
+                    mean_term = regress_opt.dot(score.T + u).T
+                else:
+                    mean_term = regress_opt.dot(score.T + u[:, None]).T
+                arg = opt - mean_term
+                return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
 
-        def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
-            if score.ndim == 1:
-                mean_term = regress_opt.dot(score.T + u).T
-            else:
-                mean_term = regress_opt.dot(score.T + u[:, None]).T
-            arg = opt - mean_term
-            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-        log_density = functools.partial(log_density,
-                                        regress_opt,
-                                        observed_subgrad,
-                                        cond_precision)
-
-        self.cond_mean, self.cond_cov = cond_mean, cond_cov
-
-        affine_con = constraints(A,
-                                 b,
-                                 mean=cond_mean,
-                                 covariance=cond_cov)
-
-        self.sampler = affine_gaussian_sampler(affine_con,
-                                               self.observed_opt_state,
-                                               self.observed_score_state,
-                                               log_density,
-                                               regress_opt,  # not needed?
-                                               observed_subgrad,
-                                               opt_linear,  # L
-                                               M1,
-                                               M2,
-                                               M3,
-                                               selection_info=self.selection_variable,
-                                               useC=self.useC)
+            log_density = functools.partial(log_density,
+                                            regress_opt,
+                                            observed_subgrad,
+                                            cond_precision)
+
+            self.cond_mean, self.cond_cov = cond_mean, cond_cov
+
+            affine_con = constraints(A,
+                                     b,
+                                     mean=cond_mean,
+                                     covariance=cond_cov)
+
+            self.sampler = affine_gaussian_sampler(affine_con,
+                                                   self.observed_opt_state,
+                                                   self.observed_score_state,
+                                                   log_density,
+                                                   regress_opt,  # not needed?
+                                                   observed_subgrad,
+                                                   opt_linear,  # L
+                                                   M1,
+                                                   M2,
+                                                   M3,
+                                                   selection_info=self.selection_variable,
+                                                   useC=self.useC)
 
     def _setup_implied_gaussian(self,
                                 opt_linear,
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 7dc873368..d0351e017 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -5,7 +5,8 @@
 from ...base import selected_targets
 from ..approx_reference import approximate_grid_inference
 
-def test_inf(n=500,
+def test_inf(seedn,
+             n=500,
              p=100,
              signal_fac=1.,
              s=5,
@@ -16,6 +17,7 @@ def test_inf(n=500,
              useIP=True,
              CI=False):
 
+    np.random.seed(seedn)
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
@@ -65,9 +67,9 @@ def test_inf(n=500,
                                                               useIP=useIP)
 
             if CI is False:
-                pivot = approximate_grid_inf._approx_pivots(beta_target)
+                pivot, log_ref = approximate_grid_inf._approx_pivots(beta_target)
 
-                return pivot
+                return pivot, log_ref
             else:
                 lci, uci = approximate_grid_inf._approx_intervals(level=0.90)
                 beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
@@ -77,3 +79,57 @@ def test_inf(n=500,
                 return np.mean(coverage), np.mean(length)
 
 
+def main(nsim=300, CI = False):
+
+    import matplotlib as mpl
+    mpl.use('tkagg')
+    import matplotlib.pyplot as plt
+    from statsmodels.distributions.empirical_distribution import ECDF
+
+    if CI is False:
+        _pivot = []
+        for i in range(nsim):
+            _pivot.extend(test_inf(n=100,
+                                   p=400,
+                                   signal_fac=0.5,
+                                   s=0,
+                                   sigma=2.,
+                                   rho=0.30,
+                                   randomizer_scale=1.,
+                                   equicorrelated=True,
+                                   useIP=False,
+                                   CI=False))
+
+            print("iteration completed ", i)
+
+        plt.clf()
+        ecdf_MLE = ECDF(np.asarray(_pivot))
+        grid = np.linspace(0, 1, 101)
+        plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^')
+        plt.plot(grid, grid, 'k--')
+        plt.show()
+
+    if CI is True:
+        coverage_ = 0.
+        length_ = 0.
+        for n in range(nsim):
+            cov, len = test_inf(n=100,
+                                p=400,
+                                signal_fac=0.5,
+                                s=5,
+                                sigma=2.,
+                                rho=0.30,
+                                randomizer_scale=1.,
+                                equicorrelated=True,
+                                useIP=True,
+                                CI=True)
+
+            coverage_ += cov
+            length_ += len
+            print("coverage so far ", coverage_ / (n + 1.))
+            print("lengths so far ", length_ / (n + 1.))
+            print("iteration completed ", n + 1)
+
+if __name__ == "__main__":
+    main(nsim=1, CI = False)
+
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index b6c1c8ddb..62e7b783f 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -250,4 +250,124 @@ def prior(target_parameter):
     return samples
 
 
+def test_hiv_data(nsample=10000,
+                  nburnin=500,
+                  level=0.90,
+                  split_proportion=0.50,
+                  seedn=1):
+    np.random.seed(seedn)
+
+    alpha = (1 - level) / 2
+    Z_quantile = ndist.ppf(1 - alpha)
+
+    X, Y, _ = HIV_NRTI(standardize=True)
+    Y *= 15
+    n, p = X.shape
+    X /= np.sqrt(n)
+
+    ols_fit = np.linalg.pinv(X).dot(Y)
+    _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
+
+    const = split_lasso.gaussian
+
+    dispersion = _sigma ** 2
+
+    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma
+
+    conv = const(X,
+                 Y,
+                 W,
+                 proportion=split_proportion)
+
+    signs = conv.fit()
+    nonzero = signs != 0
+
+    (observed_target,
+     cov_target,
+     regress_target_score,
+     dispersion,
+     alternatives) = selected_targets(conv.loglike,
+                                      conv._W,
+                                      nonzero,
+                                      dispersion=dispersion)
+
+    mle, inverse_info = conv.selective_MLE(observed_target,
+                                           cov_target,
+                                           regress_target_score,
+                                           dispersion,
+                                           level=level,
+                                           solve_args={'tol': 1.e-12})[:2]
+
+    approx_inf = conv.approximate_grid_inference(observed_target,
+                                                 cov_target,
+                                                 regress_target_score,
+                                                 dispersion)
+
+    posterior_inf = conv.posterior(observed_target,
+                                   cov_target,
+                                   regress_target_score,
+                                   dispersion=dispersion)
+
+    samples_langevin = langevin_sampler(posterior_inf,
+                                        nsample=nsample,
+                                        nburnin=nburnin,
+                                        step=1.)
+
+    lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0)
+    upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0)
+
+    samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
+                                               nsample=nsample,
+                                               nburnin=nburnin)
+
+    lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0)
+    upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0)
+
+    naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
+    naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
+    naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
+                                 naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
+
+    X_split = X[~conv._selection_idx, :]
+    Y_split = Y[~conv._selection_idx]
+    split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
+    split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
+    split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
+                                 split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
+
+    print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
+          np.mean(upper_langevin - lower_langevin),
+          np.mean(upper_gibbs - lower_gibbs),
+          np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))),
+          np.mean(mle['upper_confidence'] - mle['lower_confidence']),
+          np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
+          )
+
+    print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]))
+
+    print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
+
+    scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100])
+    output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
+                           'Langevin_upper_credible': upper_langevin,
+                           'Gibbs_lower_credible': lower_gibbs,
+                           'Gibbs_upper_credible': upper_gibbs,
+                           'MLE_lower_confidence': mle['lower_confidence'],
+                           'MLE_upper_confidence': mle['upper_confidence'],
+                           'approx_lower_confidence': approx_inf['lower_confidence'],
+                           'approx_upper_confidence': approx_inf['upper_confidence'],
+                           'Split_lower_confidence': split_intervals[:, 0],
+                           'Split_upper_confidence': split_intervals[:, 1],
+                           'Naive_lower_confidence': naive_intervals[:, 0],
+                           'Naive_upper_confidence': naive_intervals[:, 1]
+                           })
+
+    return output, scale_interval, _sigma
+
+
+if __name__ == "__main__":
+    #test_hiv_data(split_proportion=0.50)
+    test_coverage(nsim=1)
+
+
 

From 5fb9c81def15ba009920b4b33dc7c3eb9f456e8d Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Wed, 8 Sep 2021 21:42:23 -0400
Subject: [PATCH 142/187] commit changes before switch

---
 selectinf/randomized/posterior_inference.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 4284f5211..a3bdbd39e 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -1,12 +1,19 @@
 from __future__ import division, print_function
 
 import numpy as np
+import typing
+
 from scipy.stats import norm as ndist, invgamma
 from scipy.linalg import fractional_matrix_power
 
 from ..algorithms.barrier_affine import solve_barrier_affine_py
 
 
+class PosteriorAtt(typing.NamedTuple):
+
+    logPosterior: float
+    grad_logPosterior: np.ndarray
+
 class posterior(object):
     """
     Parameters
@@ -124,8 +131,11 @@ def log_posterior(self,
 
         log_prior, grad_prior = self.prior(target_parameter)
 
-        return (self.dispersion * (log_lik - self.log_ref) / sigmasq + log_prior,
-                self.dispersion * grad_lik / sigmasq + grad_prior)
+        log_posterior = self.dispersion * (log_lik - self.log_ref) / sigmasq + log_prior
+        grad_log_posterior = self.dispersion * grad_lik / sigmasq + grad_prior
+
+        return PosteriorAtt(log_posterior,
+                            grad_log_posterior)
 
     ### Private method
 
@@ -228,7 +238,7 @@ def gibbs_sampler(selective_posterior,
         scale_update_sq = invgamma.rvs(a=(0.1 +
                                           selective_posterior.ntarget +
                                           selective_posterior.ntarget / 2),
-                                       scale=0.1 - ((scale_update ** 2) * sampler.posterior_[0]),
+                                       scale=0.1 - ((scale_update ** 2) * sampler.posterior_.logPosterior),
                                        size=1)
         scale_samples[i] = np.sqrt(scale_update_sq)
         sampler.scaling = np.sqrt(scale_update_sq)
@@ -269,7 +279,7 @@ def __next__(self):
         while True:
             self.posterior_ = self.gradient_map(self.state, self.scaling)
             _proposal = self.proposal_sqrt.dot(self._noise.rvs(self._shape))
-            candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_[1])
+            candidate = (self.state + self.stepsize * self.proposal_scale.dot(self.posterior_.grad_logPosterior)
                          + np.sqrt(2.) * _proposal * self._sqrt_step)
 
             if not np.all(np.isfinite(self.gradient_map(candidate, self.scaling)[1])):

From 68b1aa42340273330da776c4de1efe59d9020ab2 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 21 Sep 2021 12:37:04 -0400
Subject: [PATCH 143/187] fix alignment of _setup_sampler

---
 selectinf/randomized/query.py | 104 +++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index a7d1908f9..b2cd82373 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -106,59 +106,59 @@ def fit(self, perturb=None):
 
     # Private methods
 
-        def _setup_sampler(self,
-                           linear_part,
-                           offset,
-                           opt_linear,
-                           observed_subgrad,
-                           dispersion=1):
-
-            A, b = linear_part, offset
-            if not np.all(A.dot(self.observed_opt_state) - b <= 0):
-                raise ValueError('constraints not satisfied')
-
-            (cond_mean,
-             cond_cov,
-             cond_precision,
-             regress_opt,
-             M1,
-             M2,
-             M3) = self._setup_implied_gaussian(opt_linear,
-                                                observed_subgrad,
-                                                dispersion=dispersion)
-
-            def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
-                if score.ndim == 1:
-                    mean_term = regress_opt.dot(score.T + u).T
-                else:
-                    mean_term = regress_opt.dot(score.T + u[:, None]).T
-                arg = opt - mean_term
-                return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-            log_density = functools.partial(log_density,
-                                            regress_opt,
+    def _setup_sampler(self,
+                       linear_part,
+                       offset,
+                       opt_linear,
+                       observed_subgrad,
+                       dispersion=1):
+
+        A, b = linear_part, offset
+        if not np.all(A.dot(self.observed_opt_state) - b <= 0):
+            raise ValueError('constraints not satisfied')
+
+        (cond_mean,
+         cond_cov,
+         cond_precision,
+         regress_opt,
+         M1,
+         M2,
+         M3) = self._setup_implied_gaussian(opt_linear,
                                             observed_subgrad,
-                                            cond_precision)
-
-            self.cond_mean, self.cond_cov = cond_mean, cond_cov
-
-            affine_con = constraints(A,
-                                     b,
-                                     mean=cond_mean,
-                                     covariance=cond_cov)
-
-            self.sampler = affine_gaussian_sampler(affine_con,
-                                                   self.observed_opt_state,
-                                                   self.observed_score_state,
-                                                   log_density,
-                                                   regress_opt,  # not needed?
-                                                   observed_subgrad,
-                                                   opt_linear,  # L
-                                                   M1,
-                                                   M2,
-                                                   M3,
-                                                   selection_info=self.selection_variable,
-                                                   useC=self.useC)
+                                            dispersion=dispersion)
+
+        def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
+            if score.ndim == 1:
+                mean_term = regress_opt.dot(score.T + u).T
+            else:
+                mean_term = regress_opt.dot(score.T + u[:, None]).T
+            arg = opt - mean_term
+            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
+
+        log_density = functools.partial(log_density,
+                                        regress_opt,
+                                        observed_subgrad,
+                                        cond_precision)
+
+        self.cond_mean, self.cond_cov = cond_mean, cond_cov
+
+        affine_con = constraints(A,
+                                 b,
+                                 mean=cond_mean,
+                                 covariance=cond_cov)
+
+        self.sampler = affine_gaussian_sampler(affine_con,
+                                               self.observed_opt_state,
+                                               self.observed_score_state,
+                                               log_density,
+                                               regress_opt,  # not needed?
+                                               observed_subgrad,
+                                               opt_linear,  # L
+                                               M1,
+                                               M2,
+                                               M3,
+                                               selection_info=self.selection_variable,
+                                               useC=self.useC)
 
     def _setup_implied_gaussian(self,
                                 opt_linear,

From 2ac3f270f82a949388025eeda90f2bf7b1f40e24 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 3 Oct 2021 17:33:43 -0400
Subject: [PATCH 144/187] added setup_inference post fit: can pass dispersion
 argument

---
 selectinf/randomized/lasso.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 6beb26dc0..ad4c1ac57 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -220,22 +220,30 @@ def signed_basis_vector(p, j, s):
         A_scaling = -np.identity(num_opt_var)
         b_scaling = np.zeros(num_opt_var)
 
-        self._setup_sampler_data = (A_scaling[:active.sum()],
-                                    b_scaling[:active.sum()],
-                                    opt_linear,
-                                    self.observed_subgrad)
-
         #### to be fixed -- set the cov_score here without dispersion
 
         self._unscaled_cov_score = _hessian
 
-        #####
-        
-        if num_opt_var > 0:
-            self._setup_sampler(*self._setup_sampler_data)
+        self.num_opt_var = num_opt_var
+
+        self.A_scaling = A_scaling
+        self.b_scaling = b_scaling
+        self.active = active
 
         return active_signs
 
+    def setup_inference(self,
+                        dispersion):
+
+        self._setup_sampler_data = (self.A_scaling[:self.active.sum()],
+                                    self.b_scaling[:self.active.sum()],
+                                    self.opt_linear,
+                                    self.observed_subgrad,
+                                    dispersion)
+
+        if self.num_opt_var > 0:
+            self._setup_sampler(*self._setup_sampler_data)
+
     def _solve_randomized_problem(self, 
                                   perturb=None, 
                                   solve_args={'tol': 1.e-12, 'min_its': 50}):

From 990152c04f446785344d88d90a39e18bc0b52ce1 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 3 Oct 2021 23:27:14 -0400
Subject: [PATCH 145/187] update tests-- selected targets (mle)

---
 selectinf/randomized/query.py                     | 15 +++++++--------
 .../randomized/tests/test_selective_MLE_high.py   |  6 ++++--
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index b2cd82373..c75bee97e 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -314,8 +314,8 @@ def selective_MLE(self,
 
         return self.sampler.selective_MLE(target_spec,
                                           self.observed_opt_state,
-                                          level=level,
-                                          solve_args=solve_args)
+                                          solve_args=solve_args,
+                                          level=level)
 
     def posterior(self,
                   target_spec,
@@ -1363,7 +1363,7 @@ def selective_MLE(target_spec,
     (observed_target,
      cov_target,
      regress_target_score) = target_spec[:3]
-    
+
     if np.asarray(observed_target).shape in [(), (0,)]:
         raise ValueError('no target specified')
 
@@ -1415,13 +1415,12 @@ def selective_MLE(target_spec,
 
     pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
 
-    alpha = 1 - level
+    alpha = 1. - level
+
     quantile = ndist.ppf(1 - alpha / 2.)
 
-    intervals = np.vstack([final_estimator -
-                           quantile * np.sqrt(np.diag(observed_info_mean)),
-                           final_estimator +
-                           quantile * np.sqrt(np.diag(observed_info_mean))]).T
+    intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
+                           final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
     log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 552d2b9ce..389951145 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -131,17 +131,19 @@ def test_selected_targets(n=2000,
         nonzero = signs != 0
         print("dimensions", n, p, nonzero.sum())
 
+
         if nonzero.sum() > 0:
             dispersion = None
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
+            conv.setup_inference(dispersion=dispersion)
+
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=dispersion)
 
-            result = conv.selective_MLE(target_spec,
-                                        dispersion)[0]
+            result = conv.selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])

From add188869574816fd663a26889d85ee9ddf03b48 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 09:28:11 -0400
Subject: [PATCH 146/187] removing dispersion from list returned by target
 forming functions

---
 selectinf/base.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/selectinf/base.py b/selectinf/base.py
index b6fbc182a..3c8100cf5 100644
--- a/selectinf/base.py
+++ b/selectinf/base.py
@@ -53,7 +53,7 @@ class TargetSpec(typing.NamedTuple):
     cov_target : np.ndarray
     regress_target_score : np.ndarray
     alternatives : list
-    dispersion : float = 1
+    #dispersion : float = 1
     
 def selected_targets(loglike, 
                      solution,
@@ -99,8 +99,7 @@ def selected_targets(loglike,
     return TargetSpec(observed_target,
                       cov_target * dispersion,
                       regress_target_score,
-                      alternatives,
-                      dispersion)
+                      alternatives)
 
 def full_targets(loglike, 
                  solution,
@@ -143,8 +142,7 @@ def full_targets(loglike,
     return TargetSpec(observed_target,
                       cov_target * dispersion,
                       regress_target_score,
-                      alternatives,
-                      dispersion)
+                      alternatives)
 
 def debiased_targets(loglike, 
                      solution,
@@ -213,8 +211,7 @@ def debiased_targets(loglike,
     return TargetSpec(observed_target,
                       cov_target * dispersion,
                       Qinv_hat, 
-                      alternatives,
-                      dispersion)
+                      alternatives)
 
 def form_targets(target, 
                  loglike, 

From afa029e373a4c907ea3a59bbd6e22f9460cf5b4e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 09:32:31 -0400
Subject: [PATCH 147/187] give dispersion as an argument for posterior class:
 needs it for sampling when sigma is unknown

---
 selectinf/randomized/posterior_inference.py | 5 ++---
 selectinf/randomized/query.py               | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index a3bdbd39e..194b6c6b4 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -37,6 +37,7 @@ class posterior(object):
     def __init__(self,
                  query,
                  target_spec,
+                 dispersion,
                  prior,
                  solve_args={'tol': 1.e-12}):
 
@@ -44,9 +45,7 @@ def __init__(self,
 
         (observed_target,
          cov_target,
-         regress_target_score,
-         _,
-         dispersion) = target_spec
+         regress_target_score) = target_spec[:3]
 
         linear_part = query.sampler.affine_con.linear_part
         offset = query.sampler.affine_con.offset
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index c75bee97e..0c943604d 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -319,6 +319,7 @@ def selective_MLE(self,
 
     def posterior(self,
                   target_spec,
+                  dispersion=1,
                   prior=None,
                   solve_args={'tol': 1.e-12}):
         """
@@ -350,6 +351,7 @@ def prior(target_parameter):
 
         return posterior(self,
                          target_spec,
+                         dispersion,
                          prior,
                          solve_args=solve_args)
 

From e37d0c2a28b5dcc0e1712c868bffd144d4e9f45b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 10:04:09 -0400
Subject: [PATCH 148/187] add setup_inference to split_lasso, lasso

---
 selectinf/randomized/lasso.py | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index ad4c1ac57..e1774f2c0 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -238,11 +238,11 @@ def setup_inference(self,
         self._setup_sampler_data = (self.A_scaling[:self.active.sum()],
                                     self.b_scaling[:self.active.sum()],
                                     self.opt_linear,
-                                    self.observed_subgrad,
-                                    dispersion)
+                                    self.observed_subgrad)
 
         if self.num_opt_var > 0:
-            self._setup_sampler(*self._setup_sampler_data)
+            self._setup_sampler(*self._setup_sampler_data,
+                                dispersion=dispersion)
 
     def _solve_randomized_problem(self, 
                                   perturb=None, 
@@ -731,19 +731,31 @@ def fit(self,
             n, p = X.shape
             df_fit = len(self.selection_variable['variables'])
 
-            dispersion = 2 * (self.loglike.smooth_objective(self._beta_full, 
+            dispersion = 2 * (self.loglike.smooth_objective(self._beta_full,
                                                             'func') /
-                          (n - df_fit))
+                              (n - df_fit))
 
-            # run setup again after 
-            # estimating dispersion 
-
-            if df_fit > 0:
-                self._setup_sampler(*self._setup_sampler_data, 
-                                     dispersion=dispersion)
+            self.df_fit = df_fit
+            self.dispersion = dispersion
+            # run setup again after
+            # estimating dispersion
 
         return signs
 
+
+    def setup_inference(self,
+                        dispersion=None):
+
+        if self.df_fit>0:
+
+            if dispersion is None:
+                self._setup_sampler(*self._setup_sampler_data,
+                                    dispersion=self.dispersion)
+                
+            else:
+                self._setup_sampler(*self._setup_sampler_data,
+                                    dispersion=dispersion)
+
     def _setup_implied_gaussian(self, 
                                 opt_linear, 
                                 observed_subgrad,

From 66e4e4075b6582e1fb51410a3c21710bd4255f7a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 11:01:34 -0400
Subject: [PATCH 149/187] updated tests: for mle

---
 selectinf/bayesian/utils.py                   |  73 +++++++++++
 selectinf/randomized/lasso.py                 |   2 +-
 .../tests/test_selective_MLE_high.py          |  41 ++++--
 .../bayesian/selection_aware_posterior.py     | 124 ++++++++++++++++++
 4 files changed, 230 insertions(+), 10 deletions(-)
 create mode 100644 selectinf/bayesian/utils.py
 create mode 100644 selection/bayesian/selection_aware_posterior.py

diff --git a/selectinf/bayesian/utils.py b/selectinf/bayesian/utils.py
new file mode 100644
index 000000000..90e51ae8a
--- /dev/null
+++ b/selectinf/bayesian/utils.py
@@ -0,0 +1,73 @@
+import numpy as np
+from scipy.linalg import fractional_matrix_power
+from scipy.stats import norm as ndist
+
+class langevin(object):
+
+    def __init__(self,
+                 initial_condition,
+                 gradient_map,
+                 stepsize,
+                 proposal_scale):
+
+        (self.state,
+         self.gradient_map,
+         self.stepsize) = (np.copy(initial_condition),
+                           gradient_map,
+                           stepsize)
+        self._shape = self.state.shape[0]
+        self._sqrt_step = np.sqrt(self.stepsize)
+        self._noise = ndist(loc=0,scale=1)
+        self.sample = np.copy(initial_condition)
+
+        self.proposal_scale = proposal_scale
+        self.proposal_sqrt = fractional_matrix_power(self.proposal_scale, 0.5)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        return self.__next__()
+
+    def __next__(self):
+        while True:
+
+            gradient_posterior, draw, _ = self.gradient_map(self.state)
+
+            candidate = (self.state + self.stepsize * self.proposal_scale.dot(gradient_posterior)
+                         + np.sqrt(2.) * (self.proposal_sqrt.dot(self._noise.rvs(self._shape))) * self._sqrt_step)
+
+            if not np.all(np.isfinite(self.gradient_map(candidate)[0])):
+                self.stepsize *= 0.5
+                self._sqrt_step = np.sqrt(self.stepsize)
+            else:
+                self.state[:] = candidate
+                self.sample[:] = draw
+                #print(" next sample ", self.state[:], self.sample[:])
+                break
+
+            return self.sample
+
+def langevin_sampler(posterior,
+                     nsample=2000,
+                     nburnin=100,
+                     step_frac=0.3,
+                     start=None):
+
+    if start is None:
+        start = posterior.initialize_sampler(posterior.initial_estimate)
+
+    state = np.append(start, np.ones(posterior.target_size))
+    stepsize = 1. / (step_frac * (2 * posterior.target_size))
+    proposal_scale = np.identity(int(2 * posterior.target_size))
+    sampler = langevin(state, posterior.gradient_log_likelihood, stepsize, proposal_scale)
+
+    samples = np.zeros((nsample, 2 * posterior.target_size))
+
+    for i, sample in enumerate(sampler):
+        samples[i, :] = sampler.sample.copy()
+        print(" next sample ", i, samples[i, :])
+        if i == nsample - 1:
+            break
+
+    return samples[nburnin:, :]
\ No newline at end of file
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index e1774f2c0..6757ffa0e 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -751,7 +751,7 @@ def setup_inference(self,
             if dispersion is None:
                 self._setup_sampler(*self._setup_sampler_data,
                                     dispersion=self.dispersion)
-                
+
             else:
                 self._setup_sampler(*self._setup_sampler_data,
                                     dispersion=dispersion)
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 389951145..a55a86686 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -62,7 +62,7 @@ def test_full_targets(n=200,
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
             else:
-                dispersion = None
+                dispersion = np.linalg.norm(Y - X[:,nonzero].dot(np.linalg.pinv(X[:,nonzero]).dot(Y))) ** 2 / (n - nonzero.sum())
 
             if n > p:
                 target_spec = full_targets(conv.loglike,
@@ -76,6 +76,8 @@ def test_full_targets(n=200,
                                                penalty=conv.penalty,
                                                dispersion=dispersion)
 
+            conv.setup_inference(dispersion=dispersion)
+
             result = conv.selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
@@ -133,9 +135,11 @@ def test_selected_targets(n=2000,
 
 
         if nonzero.sum() > 0:
-            dispersion = None
+
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+            else:
+                dispersion = np.linalg.norm(Y - X[:,nonzero].dot(np.linalg.pinv(X[:,nonzero]).dot(Y))) ** 2 / (n - nonzero.sum())
 
             conv.setup_inference(dispersion=dispersion)
 
@@ -173,6 +177,8 @@ def test_instance():
     print("check ", M)
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
+    L.setup_inference(dispersion=dispersion)
+
     target_spec = selected_targets(L.loglike,
                                    L.observed_soln,
                                    features=M,
@@ -180,8 +186,7 @@ def test_instance():
 
     print("check shapes", target_spec.observed_target.shape, E.sum())
 
-    result = L.selective_MLE(target_spec,
-                             dispersion)[0]
+    result = L.selective_MLE(target_spec)[0]
 
     intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
 
@@ -231,16 +236,18 @@ def test_selected_targets_disperse(n=500,
         print("dimensions", n, p, nonzero.sum())
 
         if nonzero.sum() > 0:
-            dispersion = None
             if full_dispersion:
                 dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
+            else:
+                dispersion = np.linalg.norm(Y - X[:,nonzero].dot(np.linalg.pinv(X[:,nonzero]).dot(Y))) ** 2 / (n - nonzero.sum())
+
+            conv.setup_inference(dispersion=dispersion)
 
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=dispersion)
 
-            result = conv.selective_MLE(target_spec,
-                                        dispersion)[0]
+            result = conv.selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -290,6 +297,8 @@ def test_logistic(n=2000,
 
         if nonzero.sum() > 0:
 
+            conv.setup_inference(dispersion=1)
+
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=1)
@@ -340,6 +349,8 @@ def test_logistic_split(n=2000,
 
         if nonzero.sum() > 0:
 
+            conv.setup_inference(dispersion=1)
+
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=1)
@@ -389,6 +400,7 @@ def test_poisson(n=2000,
         print("dimensions", n, p, nonzero.sum())
 
         if nonzero.sum() > 0:
+            conv.setup_inference(dispersion=1)
 
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
@@ -440,6 +452,8 @@ def test_poisson_split(n=2000,
 
         if nonzero.sum() > 0:
 
+            conv.setup_inference(dispersion=1)
+
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=1)
@@ -493,6 +507,8 @@ def test_cox(n=2000,
             cox_full = rr.glm.cox(X, T, S)
             full_hess = cox_full.hessian(conv.observed_soln)
 
+            conv.setup_inference(dispersion=1)
+
             target_spec = selected_targets(conv.loglike, 
                                            conv.observed_soln,
                                            dispersion=1)
@@ -546,6 +562,8 @@ def test_cox_split(n=2000,
             cox_full = rr.glm.cox(X, T, S)
             full_hess = cox_full.hessian(conv.observed_soln)
 
+            conv.setup_inference(dispersion=1)
+
             target_spec = selected_targets(conv.loglike, 
                                            conv.observed_soln,
                                            dispersion=1)
@@ -606,15 +624,18 @@ def test_scale_invariant_split(n=200,
         print('feature_weights', conv.feature_weights[0] / scale)
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
+        conv.setup_inference(dispersion=dispersion)
+
         target_spec = selected_targets(conv.loglike,
                                        conv.observed_soln,
                                        dispersion=dispersion)
 
-        print('dispersion', target_spec.dispersion/scale**2)
+        #print('dispersion', target_spec.dispersion/scale**2)
         print('target', target_spec.observed_target[0]/scale)
         print('cov_target', target_spec.cov_target[0,0]/scale**2)
         print('regress_target_score',  target_spec.regress_target_score[0,0]/scale**2)
-        
+
+
         result = conv.selective_MLE(target_spec)[0]
 
         print(result['MLE'] / scale)
@@ -682,6 +703,8 @@ def test_scale_invariant(n=200,
         print('perturb', conv._initial_omega[0] / scale)
         dispersion = np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)
 
+        conv.setup_inference(dispersion=dispersion)
+
         target_spec = selected_targets(conv.loglike,
                                        conv.observed_soln,
                                        dispersion=dispersion)
diff --git a/selection/bayesian/selection_aware_posterior.py b/selection/bayesian/selection_aware_posterior.py
new file mode 100644
index 000000000..8e00d3220
--- /dev/null
+++ b/selection/bayesian/selection_aware_posterior.py
@@ -0,0 +1,124 @@
+import numpy as np, sys
+from selection.randomized.selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from scipy.stats import norm as ndist
+
+class posterior_inference():
+
+
+    def __init__(self,
+                 observed_target,
+                 cov_target,
+                 cov_target_score,
+                 feasible_point,
+                 cond_mean,
+                 cond_cov,
+                 logdens_linear,
+                 linear_part,
+                 offset,
+                 ini_estimate):
+
+        self.observed_target = observed_target
+        self.cov_target = cov_target
+        self.cov_target_score = cov_target_score
+
+        self.feasible_point = feasible_point
+        self.cond_mean = cond_mean
+        self.cond_cov = cond_cov
+        self.target_size = cond_cov.shape[0]
+        self.logdens_linear = logdens_linear
+        self.linear_part = linear_part
+        self.offset = offset
+        self.ini_estimate = ini_estimate
+
+    def prior(self, target_parameter, var_parameter, lam):
+
+        std_parameter = np.sqrt(var_parameter)
+        grad_prior_par = -np.true_divide(target_parameter,  var_parameter)
+        grad_prior_std = np.true_divide(target_parameter**2. , 2.*(var_parameter**2))- (lam/2.)-1./(2.*var_parameter)
+        log_prior = -(np.linalg.norm(target_parameter)**2.) / (2.*var_parameter) - (lam * (np.linalg.norm(std_parameter)**2)/2.)-np.log(std_parameter)
+        return grad_prior_par, grad_prior_std, log_prior
+
+    def det_initial_point(self, initial_soln, solve_args={'tol':1.e-12}):
+
+        if np.asarray(self.observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        observed_target = np.atleast_1d(self.observed_target)
+        prec_target = np.linalg.inv(self.cov_target)
+
+        target_lin = - self.logdens_linear.dot(self.cov_target_score.T.dot(prec_target))
+        target_offset = self.cond_mean - target_lin.dot(observed_target)
+
+        prec_opt = np.linalg.inv(self.cond_cov)
+        mean_opt = target_lin.dot(initial_soln) + target_offset
+        conjugate_arg = prec_opt.dot(mean_opt)
+
+        solver = solve_barrier_affine_py
+
+        val, soln, hess = solver(conjugate_arg,
+                                 prec_opt,
+                                 self.feasible_point,
+                                 self.linear_part,
+                                 self.offset,
+                                 **solve_args)
+
+        initial_point = initial_soln + self.cov_target.dot(target_lin.T.dot(prec_opt.dot(mean_opt - soln)))
+        return initial_point
+
+    def gradient_log_likelihood(self, parameters, solve_args={'tol':1.e-15}):
+
+        npar = self.target_size
+        target_parameter = parameters[:npar]
+        var_parameter = parameters[npar:]
+        if np.asarray(self.observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        observed_target = np.atleast_1d(self.observed_target)
+        prec_target = np.linalg.inv(self.cov_target)
+
+        target_lin = - self.logdens_linear.dot(self.cov_target_score.T.dot(prec_target))
+        target_offset = self.cond_mean - target_lin.dot(observed_target)
+
+        prec_opt = np.linalg.inv(self.cond_cov)
+        mean_opt = target_lin.dot(target_parameter) + target_offset
+        conjugate_arg = prec_opt.dot(mean_opt)
+
+        solver = solve_barrier_affine_C
+
+        val, soln, hess = solver(conjugate_arg,
+                                 prec_opt,
+                                 self.feasible_point,
+                                 self.linear_part,
+                                 self.offset,
+                                 **solve_args)
+
+        reparam = target_parameter + self.cov_target.dot(target_lin.T.dot(prec_opt.dot(mean_opt - soln)))
+        neg_normalizer = (target_parameter - reparam).T.dot(prec_target).dot(target_parameter - reparam)/2. \
+                         + val + mean_opt.T.dot(prec_opt).dot(mean_opt) / 2.
+
+        grad_barrier = np.diag(2. / ((1. + soln) ** 3.) - 2. / (soln ** 3.))
+
+        L = target_lin.T.dot(prec_opt)
+        N = L.dot(hess)
+        jacobian = (np.identity(observed_target.shape[0]) + self.cov_target.dot(L).dot(target_lin)) - \
+                   self.cov_target.dot(N).dot(L.T)
+
+        log_lik = -((observed_target - reparam).T.dot(prec_target).dot(observed_target - reparam)) / 2. + neg_normalizer \
+                  + np.log(np.linalg.det(jacobian))
+
+        grad_lik = jacobian.T.dot(prec_target).dot(observed_target)
+        grad_neg_normalizer = -jacobian.T.dot(prec_target).dot(target_parameter)
+
+        opt_num = self.cond_cov.shape[0]
+        grad_jacobian = np.zeros(opt_num)
+        A = np.linalg.inv(jacobian).dot(self.cov_target).dot(N)
+        for j in range(opt_num):
+            M = grad_barrier.dot(np.diag(N.T[:, j]))
+            grad_jacobian[j] = np.trace(A.dot(M).dot(N.T))
+
+        prior_info = self.hierarchical_prior(reparam, var_parameter, lam=0.01)
+        return np.append(grad_lik + grad_neg_normalizer + grad_jacobian + jacobian.T.dot(prior_info[0]), prior_info[1]),\
+               np.append(reparam, var_parameter), log_lik + prior_info[2]
+
+
+

From 4c665702de3a29c346ff6aa63e42496acefc520f Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 16:07:50 -0400
Subject: [PATCH 150/187] updated tests for posterior inference

---
 selectinf/randomized/query.py                |  2 +-
 selectinf/randomized/tests/test_posterior.py | 73 +++++++++++---------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 0c943604d..8573cda2b 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -358,7 +358,7 @@ def prior(target_parameter):
     def approximate_grid_inference(self,
                                    target_spec,
                                    solve_args={'tol': 1.e-12},
-                                   useIP=False):
+                                   useIP=True):
 
         """
         Parameters
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 62e7b783f..1fc38bd32 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -51,24 +51,29 @@ def test_Langevin(n=500,
     signs = conv.fit()
     nonzero = signs != 0
 
-    beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
+    if nonzero.sum()>0:
 
-    target_spec = selected_targets(conv.loglike,
-                                   conv.observed_soln,
-                                   dispersion=dispersion)
+        conv.setup_inference(dispersion=dispersion)
 
-    posterior_inf = conv.posterior(target_spec)
+        beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
-    samples = langevin_sampler(posterior_inf,
-                               nsample=nsample,
-                               nburnin=nburnin)
+        target_spec = selected_targets(conv.loglike,
+                                       conv.observed_soln,
+                                       dispersion=dispersion)
 
-    lci = np.percentile(samples, 5, axis=0)
-    uci = np.percentile(samples, 95, axis=0)
-    coverage = (lci < beta_target) * (uci > beta_target)
-    length = uci - lci
+        posterior_inf = conv.posterior(target_spec,
+                                       dispersion=dispersion)
 
-    return np.mean(coverage), np.mean(length)
+        samples = langevin_sampler(posterior_inf,
+                                   nsample=nsample,
+                                   nburnin=nburnin)
+
+        lci = np.percentile(samples, 5, axis=0)
+        uci = np.percentile(samples, 95, axis=0)
+        coverage = (lci < beta_target) * (uci > beta_target)
+        length = uci - lci
+
+        return np.mean(coverage), np.mean(length)
 
 
 @set_seed_iftrue(SET_SEED)
@@ -116,13 +121,17 @@ def test_instance(nsample=100, nburnin=50):
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
+    L.setup_inference(dispersion=dispersion)
+
     target_spec = selected_targets(L.loglike,
                                    L.observed_soln,
                                    features=M,
                                    dispersion=dispersion)
+
     print(target_spec.dispersion, dispersion)
     
-    posterior_inf = L.posterior(target_spec)
+    posterior_inf = L.posterior(target_spec,
+                                dispersion=dispersion)
 
     samples = langevin_sampler(posterior_inf,
                                nsample=nsample,
@@ -166,6 +175,8 @@ def test_flexible_prior1(nsample=100,
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
+    L.setup_inference(dispersion=dispersion)
+
     target_spec = selected_targets(L.loglike,
                                    L.observed_soln,
                                    features=M,
@@ -185,6 +196,7 @@ def prior(target_parameter):
     Z1 = np.random.standard_normal()
 
     posterior_inf1 = L.posterior(target_spec,
+                                 dispersion=dispersion,
                                  prior=prior)
 
     W1 = np.random.standard_normal()
@@ -226,6 +238,8 @@ def test_flexible_prior2(nsample=1000, nburnin=50):
     M[-3:] = 1
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
+    L.setup_inference(dispersion=dispersion)
+
     target_spec = selected_targets(L.loglike,
                                    L.observed_soln,
                                    features=M,
@@ -239,6 +253,7 @@ def prior(target_parameter):
         return log_prior, grad_prior
 
     posterior_inf = L.posterior(target_spec,
+                                dispersion=dispersion,
                                 prior=prior)
 
     adaptive_proposal = np.linalg.inv(np.linalg.inv(posterior_inf.inverse_info) +
@@ -282,30 +297,20 @@ def test_hiv_data(nsample=10000,
     signs = conv.fit()
     nonzero = signs != 0
 
-    (observed_target,
-     cov_target,
-     regress_target_score,
-     dispersion,
-     alternatives) = selected_targets(conv.loglike,
-                                      conv._W,
-                                      nonzero,
-                                      dispersion=dispersion)
-
-    mle, inverse_info = conv.selective_MLE(observed_target,
-                                           cov_target,
-                                           regress_target_score,
-                                           dispersion,
+    conv.setup_inference(dispersion=dispersion)
+
+    target_spec = selected_targets(conv.loglike,
+                                   conv._W,
+                                   nonzero,
+                                   dispersion=dispersion)
+
+    mle, inverse_info = conv.selective_MLE(target_spec,
                                            level=level,
                                            solve_args={'tol': 1.e-12})[:2]
 
-    approx_inf = conv.approximate_grid_inference(observed_target,
-                                                 cov_target,
-                                                 regress_target_score,
-                                                 dispersion)
+    approx_inf = conv.approximate_grid_inference(target_spec)
 
-    posterior_inf = conv.posterior(observed_target,
-                                   cov_target,
-                                   regress_target_score,
+    posterior_inf = conv.posterior(target_spec,
                                    dispersion=dispersion)
 
     samples_langevin = langevin_sampler(posterior_inf,

From a828983296a4dde82b64a4f6e055d38aaf871740 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 16:10:38 -0400
Subject: [PATCH 151/187] updated tests for approx and exact reference

---
 selectinf/randomized/tests/test_approx_reference.py | 4 +++-
 selectinf/randomized/tests/test_exact_reference.py  | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index d0351e017..906b43fc9 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -56,6 +56,8 @@ def test_inf(seedn,
         if nonzero.sum() > 0:
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
+            conv.setup_inference(dispersion=dispersion)
+
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=dispersion)
@@ -97,7 +99,7 @@ def main(nsim=300, CI = False):
                                    rho=0.30,
                                    randomizer_scale=1.,
                                    equicorrelated=True,
-                                   useIP=False,
+                                   useIP=True,
                                    CI=False))
 
             print("iteration completed ", i)
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 534e4beaf..39931e75f 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -54,6 +54,8 @@ def test_inf(n=500,
         if nonzero.sum() > 0:
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
+            conv.setup_inference(dispersion=dispersion)
+            
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=dispersion)

From 77b06507d7c25af5c9f91577ba50261206c5c1f8 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 4 Oct 2021 21:58:51 -0400
Subject: [PATCH 152/187] all tests pass

---
 selectinf/randomized/lasso.py                 | 19 ++++++++-----------
 .../randomized/tests/test_approx_reference.py |  4 +---
 .../randomized/tests/test_exact_reference.py  |  2 +-
 selectinf/randomized/tests/test_posterior.py  |  6 ++----
 .../tests/test_selective_MLE_high.py          |  2 +-
 5 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 6757ffa0e..1cca12f32 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -226,20 +226,16 @@ def signed_basis_vector(p, j, s):
 
         self.num_opt_var = num_opt_var
 
-        self.A_scaling = A_scaling
-        self.b_scaling = b_scaling
-        self.active = active
+        self._setup_sampler_data = (A_scaling[:active.sum()],
+                                    b_scaling[:active.sum()],
+                                    opt_linear,
+                                    self.observed_subgrad)
 
         return active_signs
 
     def setup_inference(self,
                         dispersion):
 
-        self._setup_sampler_data = (self.A_scaling[:self.active.sum()],
-                                    self.b_scaling[:self.active.sum()],
-                                    self.opt_linear,
-                                    self.observed_subgrad)
-
         if self.num_opt_var > 0:
             self._setup_sampler(*self._setup_sampler_data,
                                 dispersion=dispersion)
@@ -724,29 +720,30 @@ def fit(self,
         # we need to estimate a dispersion parameter
 
         # we then setup up the sampler again
+        df_fit = len(self.selection_variable['variables'])
 
         if self.estimate_dispersion:
 
             X, y = self.loglike.data
             n, p = X.shape
-            df_fit = len(self.selection_variable['variables'])
 
             dispersion = 2 * (self.loglike.smooth_objective(self._beta_full,
                                                             'func') /
                               (n - df_fit))
 
-            self.df_fit = df_fit
             self.dispersion = dispersion
             # run setup again after
             # estimating dispersion
 
+        self.df_fit = df_fit
+
         return signs
 
 
     def setup_inference(self,
                         dispersion=None):
 
-        if self.df_fit>0:
+        if self.df_fit > 0:
 
             if dispersion is None:
                 self._setup_sampler(*self._setup_sampler_data,
diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py
index 906b43fc9..a4b6ec87b 100644
--- a/selectinf/randomized/tests/test_approx_reference.py
+++ b/selectinf/randomized/tests/test_approx_reference.py
@@ -5,8 +5,7 @@
 from ...base import selected_targets
 from ..approx_reference import approximate_grid_inference
 
-def test_inf(seedn,
-             n=500,
+def test_inf(n=500,
              p=100,
              signal_fac=1.,
              s=5,
@@ -17,7 +16,6 @@ def test_inf(seedn,
              useIP=True,
              CI=False):
 
-    np.random.seed(seedn)
     inst, const = gaussian_instance, lasso.gaussian
     signal = np.sqrt(signal_fac * 2 * np.log(p))
 
diff --git a/selectinf/randomized/tests/test_exact_reference.py b/selectinf/randomized/tests/test_exact_reference.py
index 39931e75f..ad1dee613 100644
--- a/selectinf/randomized/tests/test_exact_reference.py
+++ b/selectinf/randomized/tests/test_exact_reference.py
@@ -55,7 +55,7 @@ def test_inf(n=500,
             beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta))
 
             conv.setup_inference(dispersion=dispersion)
-            
+
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=dispersion)
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 1fc38bd32..3d972a585 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -127,8 +127,6 @@ def test_instance(nsample=100, nburnin=50):
                                    L.observed_soln,
                                    features=M,
                                    dispersion=dispersion)
-
-    print(target_spec.dispersion, dispersion)
     
     posterior_inf = L.posterior(target_spec,
                                 dispersion=dispersion)
@@ -297,10 +295,10 @@ def test_hiv_data(nsample=10000,
     signs = conv.fit()
     nonzero = signs != 0
 
-    conv.setup_inference(dispersion=dispersion)
+    conv.setup_inference()
 
     target_spec = selected_targets(conv.loglike,
-                                   conv._W,
+                                   conv.observed_soln,
                                    nonzero,
                                    dispersion=dispersion)
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index a55a86686..947e75bcd 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -709,7 +709,7 @@ def test_scale_invariant(n=200,
                                        conv.observed_soln,
                                        dispersion=dispersion)
 
-        print('dispersion', target_spec.dispersion/scale**2)
+        #print('dispersion', target_spec.dispersion/scale**2)
         print('target', target_spec.observed_target[0]/scale)
         print('cov_target', target_spec.cov_target[0,0]/scale**2)
         print('regress_target_score',  target_spec.regress_target_score[0,0]/scale**2)

From 7fd440186b0dcab2aa147c2a65613228f161d4ce Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 31 Oct 2021 20:28:36 -0400
Subject: [PATCH 153/187] deleted unused methods for sampling from query

---
 selectinf/randomized/query.py | 851 +---------------------------------
 1 file changed, 22 insertions(+), 829 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 8573cda2b..f9237e562 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1,21 +1,9 @@
-import functools
-from itertools import product
-
 import numpy as np
 import pandas as pd
 from scipy.stats import norm as ndist
-from scipy.optimize import bisect
-
-from regreg.affine import power_L
-import regreg.api as rr
 
-from ..distributions.api import discrete_family
-from ..constraints.affine import (sample_from_constraints,
-                                  constraints)
+from ..constraints.affine import constraints
 from ..algorithms.barrier_affine import solve_barrier_affine_py
-from ..base import (selected_targets,
-                    full_targets,
-                    debiased_targets)
 
 from .posterior_inference import posterior
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
@@ -87,7 +75,6 @@ def solve(self):
 
 
 class gaussian_query(query):
-    useC = True
 
     """
     A class with Gaussian perturbation to the objective -- 
@@ -96,8 +83,6 @@ class gaussian_query(query):
 
     def fit(self, perturb=None):
 
-        p = self.nfeature
-
         # take a new perturbation if supplied
         if perturb is not None:
             self._initial_omega = perturb
@@ -114,6 +99,7 @@ def _setup_sampler(self,
                        dispersion=1):
 
         A, b = linear_part, offset
+
         if not np.all(A.dot(self.observed_opt_state) - b <= 0):
             raise ValueError('constraints not satisfied')
 
@@ -127,19 +113,6 @@ def _setup_sampler(self,
                                             observed_subgrad,
                                             dispersion=dispersion)
 
-        def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
-            if score.ndim == 1:
-                mean_term = regress_opt.dot(score.T + u).T
-            else:
-                mean_term = regress_opt.dot(score.T + u[:, None]).T
-            arg = opt - mean_term
-            return - 0.5 * np.sum(arg * cond_prec.dot(arg.T).T, 1)
-
-        log_density = functools.partial(log_density,
-                                        regress_opt,
-                                        observed_subgrad,
-                                        cond_precision)
-
         self.cond_mean, self.cond_cov = cond_mean, cond_cov
 
         affine_con = constraints(A,
@@ -147,18 +120,9 @@ def log_density(regress_opt, u, cond_prec, opt, score):  # u == subgrad
                                  mean=cond_mean,
                                  covariance=cond_cov)
 
-        self.sampler = affine_gaussian_sampler(affine_con,
-                                               self.observed_opt_state,
-                                               self.observed_score_state,
-                                               log_density,
-                                               regress_opt,  # not needed?
-                                               observed_subgrad,
-                                               opt_linear,  # L
-                                               M1,
-                                               M2,
-                                               M3,
-                                               selection_info=self.selection_variable,
-                                               useC=self.useC)
+        self.affine_con = affine_con
+        self.opt_linear = opt_linear
+        self.observed_subgrad = observed_subgrad
 
     def _setup_implied_gaussian(self,
                                 opt_linear,
@@ -201,121 +165,26 @@ def _setup_implied_gaussian(self,
                 M2,
                 M3)
 
-    def summary(self,
-                target_spec,
-                opt_sample=None,
-                target_sample=None,
-                parameter=None,
-                level=0.9,
-                ndraw=10000,
-                burnin=2000,
-                compute_intervals=False):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        regress_target_score : ndarray
-            Estimated regression coefficient of target on score.
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        ndraw : int (optional)
-            Defaults to 1000.
-        burnin : int (optional)
-            Defaults to 1000.
-        compute_intervals : bool
-            Compute confidence intervals?
-        """
-
-        if parameter is None:
-            parameter = np.zeros_like(target_spec.observed_target)
-
-        if opt_sample is None:
-            opt_sample, logW = self.sampler.sample(ndraw, burnin)
-        else:
-            if len(opt_sample) == 1:  # only a sample, so weights are 1s
-                opt_sample = opt_sample[0]
-                logW = np.zeros(ndraw)
-            else:
-                opt_sample, logW = opt_sample
-            ndraw = opt_sample.shape[0]
-
-        pivots = self.sampler.coefficient_pvalues(target_spec.observed_target,
-                                                  target_spec.cov_target,
-                                                  target_spec.regress_target_score,
-                                                  parameter=parameter,
-                                                  sample=(opt_sample, logW),
-                                                  normal_sample=target_sample,
-                                                  alternatives=target_spec.alternatives)
-
-        if not np.all(parameter == 0):
-            pvalues = self.sampler.coefficient_pvalues(target_spec.observed_target,
-                                                       target_spec.cov_target,
-                                                       target_spec.regress_target_score,
-                                                       parameter=np.zeros_like(parameter),
-                                                       sample=(opt_sample, logW),
-                                                       normal_sample=target_sample,
-                                                       alternatives=target_spec.alternatives)
-        else:
-            pvalues = pivots
-
-        result = pd.DataFrame({'target': target_spec.observed_target,
-                               'pvalue': pvalues})
-
-        if compute_intervals:
-            MLE = self.selective_MLE(target_spec)[0]
-            MLE_intervals = np.asarray(MLE[['lower_confidence', 'upper_confidence']])
-
-            intervals = self.sampler.confidence_intervals(
-                target_spec.observed_target,
-                target_spec.cov_target,
-                target_spec.regress_target_score,
-                sample=(opt_sample, logW),
-                normal_sample=target_sample,
-                initial_guess=MLE_intervals,
-                level=level)
-
-            result.insert(2, 'lower_confidence', intervals[:, 0])
-            result.insert(3, 'upper_confidence', intervals[:, 1])
-
-        if not np.all(parameter == 0):
-            result.insert(4, 'pivot', pivots)
-            result.insert(5, 'parameter', parameter)
-
-        return result
-
     def selective_MLE(self,
                       target_spec,
-                      level=0.9,
+                      level=0.90,
                       solve_args={'tol': 1.e-12}):
-        """
-        Parameters
-        ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        regress_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
-        level : float, optional
-            Confidence level.
-        solve_args : dict, optional
-            Arguments passed to solver.
-        """
 
-        return self.sampler.selective_MLE(target_spec,
-                                          self.observed_opt_state,
-                                          solve_args=solve_args,
-                                          level=level)
+        return selective_MLE(target_spec,
+                             self.observed_opt_state,
+                             self.affine_con.mean,
+                             self.affine_con.covariance,
+                             self.affine_con.linear_part,
+                             self.affine_con.offset,
+                             self.opt_linear,
+                             self.M1,
+                             self.M2,
+                             self.M3,
+                             self.observed_score_state + self.observed_subgrad,
+                             solve_args=solve_args,
+                             level=level,
+                             useC=False)
+
 
     def posterior(self,
                   target_spec,
@@ -380,6 +249,7 @@ def approximate_grid_inference(self,
                                        target_spec,
                                        solve_args=solve_args,
                                        useIP=useIP)
+
         return G.summary(alternatives=target_spec.alternatives)
 
 
@@ -597,683 +467,6 @@ def confidence_intervals(self,
         return np.array(limits)
 
 
-class optimization_sampler(object):
-
-    def __init__(self):
-        raise NotImplementedError("abstract method")
-
-    def sample(self):
-        raise NotImplementedError("abstract method")
-
-    def log_cond_density(self,
-                         opt_sample,
-                         target_sample,
-                         transform=None):
-        """
-        Density of opt_sample | target_sample
-        """
-        raise NotImplementedError("abstract method")
-
-    def hypothesis_test(self,
-                        test_stat,
-                        observed_value,
-                        cov_target,
-                        score_cov,
-                        sample_args=(),
-                        sample=None,
-                        parameter=0,
-                        alternative='twosided'):
-
-        '''
-        Sample `target` from selective density
-        using sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-        Parameters
-        ----------
-        test_stat : callable
-           Test statistic to evaluate on sample from
-           selective distribution.
-        observed_value : float
-           Observed value of test statistic.
-           Used in p-value calculation.
-        sample_args : sequence
-           Arguments to `self.sample` if sample is None.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc. If not None,
-           `ndraw, burnin, stepsize` are ignored.
-        parameter : np.float (optional)
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalue : float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        if sample is None:
-            sample, logW = self.sample(*sample_args)
-            sample = np.atleast_2d(sample)
-
-        if parameter is None:
-            parameter = self.reference
-
-        sample_test_stat = np.squeeze(np.array([test_stat(x) for x in sample]))
-
-        target_inv_cov = np.linalg.inv(cov_target)
-        delta = target_inv_cov.dot(parameter - self.reference)
-        W = np.exp(sample.dot(delta) + logW)
-
-        family = discrete_family(sample_test_stat, W)
-        pval = family.cdf(0, observed_value)
-
-        if alternative == 'greater':
-            return 1 - pval
-        elif alternative == 'less':
-            return pval
-        else:
-            return 2 * min(pval, 1 - pval)
-
-    def confidence_intervals(self,
-                             observed_target,
-                             cov_target,
-                             score_cov,
-                             sample_args=(),
-                             sample=None,
-                             normal_sample=None,
-                             level=0.9,
-                             initial_guess=None):
-        '''
-        Parameters
-        ----------
-
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        sample_args : sequence
-           Arguments to `self.sample` if sample is None.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        level : float (optional)
-            Specify the
-            confidence level.
-        initial_guess : np.float
-            Initial guesses at upper and lower limits, optional.
-        Notes
-        -----
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Returns
-        -------
-        intervals : [(float, float)]
-            List of confidence intervals.
-        '''
-
-        if sample is None:
-            sample, logW = self.sample(*sample_args)
-            sample = np.vstack([sample] * 5)  # why times 5?
-            logW = np.hstack([logW] * 5)
-        else:
-            sample, logW = sample
-
-        ndraw = sample.shape[0]
-
-        _intervals = optimization_intervals([(self,
-                                              sample,
-                                              logW,
-                                              cov_target,
-                                              score_cov)],
-                                            observed_target,
-                                            ndraw,
-                                            normal_sample=normal_sample)
-
-        limits = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-            if initial_guess is None:
-                l, u = _intervals.confidence_interval(keep, level=level)
-            else:
-                l, u = _intervals.confidence_interval(keep, level=level,
-                                                      guess=initial_guess[i])
-            limits.append((l, u))
-
-        return np.array(limits)
-
-    def coefficient_pvalues(self,
-                            observed_target,
-                            cov_target,
-                            score_cov,
-                            parameter=None,
-                            sample_args=(),
-                            sample=None,
-                            normal_sample=None,
-                            alternatives=None):
-        '''
-        Construct selective p-values
-        for each parameter of the target.
-        Parameters
-        ----------
-        observed : np.float
-            A vector of parameters with shape `self.shape`,
-            representing coordinates of the target.
-        parameter : np.float (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        sample_args : sequence
-           Arguments to `self.sample` if sample is None.
-        sample : np.array (optional)
-           If not None, assumed to be a sample of shape (-1,) + `self.shape`
-           representing a sample of the target from parameters `self.reference`.
-           Allows reuse of the same sample for construction of confidence
-           intervals, hypothesis tests, etc.
-        alternatives : list of ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalues : np.float
-        '''
-
-        if alternatives is None:
-            alternatives = ['twosided'] * observed_target.shape[0]
-
-        if sample is None:
-            sample, logW = self.sample(*sample_args)
-        else:
-            sample, logW = sample
-            ndraw = sample.shape[0]
-
-        if parameter is None:
-            parameter = np.zeros(observed_target.shape[0])
-
-        _intervals = optimization_intervals([(self,
-                                              sample,
-                                              logW,
-                                              cov_target,
-                                              score_cov)],
-                                            observed_target,
-                                            ndraw,
-                                            normal_sample=normal_sample)
-        pvals = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-            pvals.append(_intervals.pivot(keep,
-                                          candidate=parameter[i],
-                                          alternative=alternatives[i]))
-
-        return np.array(pvals)
-
-    def _reconstruct_score_from_target(self,
-                                       target_sample,
-                                       transform=None):
-        if transform is not None:
-            direction, nuisance = transform
-            score_sample = (np.multiply.outer(target_sample,
-                                              direction) +
-                            nuisance[None, :])
-        else:
-            score_sample = target_sample
-        return score_sample
-
-
-class affine_gaussian_sampler(optimization_sampler):
-    '''
-    Sample from an affine truncated Gaussian
-    '''
-
-    def __init__(self,
-                 affine_con,
-                 initial_point,
-                 observed_score_state,
-                 log_cond_density,
-                 regress_opt,
-                 observed_subgrad,
-                 opt_linear,
-                 M1,
-                 M2,
-                 M3,
-                 selection_info=None,
-                 useC=False):
-
-        '''
-        Parameters
-        ----------
-        affine_con : `selection.constraints.affine.constraints`
-             Affine constraints
-        initial_point : ndarray
-             Feasible point for affine constraints.
-        observed_score_state : ndarray
-             Observed score of convex loss (slightly modified).
-             Essentially (asymptotically) equivalent
-             to $\nabla \ell(\beta^*) +
-             Q(\beta^*)\beta^*$ where $\beta^*$ is population
-             minimizer. For linear regression, it is always
-             $-X^Ty$.
-        log_cond_density : callable
-             Density of optimization variables given score
-        regress_opt: ndarray
-             Regression coefficient of opt on to score
-        observed_subgrad : ndarray
-        selection_info : optional
-             Function of optimization variables that
-             will be conditioned on.
-        useC : bool, optional
-            Use python or C solver.
-
-        '''
-
-        self.affine_con = affine_con
-
-        self.covariance = self.affine_con.covariance
-        self.mean = self.affine_con.mean
-
-        self.initial_point = initial_point
-        self.observed_score_state = observed_score_state
-        self.selection_info = selection_info
-        self._log_cond_density = log_cond_density
-        self.regress_opt = regress_opt
-        self.observed_subgrad = observed_subgrad
-        self.useC = useC
-        self.opt_linear = opt_linear
-        self.M1, self.M2, self.M3 = M1, M2, M3
-
-    def log_cond_density(self,
-                         opt_sample,
-                         target_sample,
-                         transform=None):
-
-        if transform is not None:
-            direction, nuisance = transform
-            return self._log_density_ray(0,  # candidate
-                                         # has been added to
-                                         # target
-                                         direction,
-                                         nuisance,
-                                         target_sample,
-                                         opt_sample)
-        else:
-            # target must be in score coordinates
-            score_sample = target_sample
-
-            # probably should switch
-            # order of signature
-            return self._log_cond_density(opt_sample,
-                                          score_sample)
-
-    def sample(self, ndraw, burnin):
-        '''
-        Sample `target` from selective density
-        using projected Langevin sampler with
-        gradient map `self.gradient` and
-        projection map `self.projection`.
-        Parameters
-        ----------
-        ndraw : int
-           How long a chain to return?
-        burnin : int
-           How many samples to discard?
-        '''
-
-        _sample = sample_from_constraints(self.affine_con,
-                                          self.initial_point,
-                                          ndraw=ndraw,
-                                          burnin=burnin)
-        return _sample, np.zeros(_sample.shape[0])
-
-    def selective_MLE(self,
-                      target_spec,
-                      # initial (observed) value of optimization variables --
-                      # used as a feasible point.
-                      # precise value used only for independent estimator
-                      observed_soln,
-                      solve_args={'tol': 1.e-12},
-                      level=0.9):
-        """
-        Selective MLE based on approximation of
-        CGF.
-        Parameters
-        ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        regress_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
-        observed_soln : ndarray
-            Feasible point for optimization problem.
-        level : float, optional
-            Confidence level.
-        solve_args : dict, optional
-            Arguments passed to solver.
-        """
-
-        return selective_MLE(target_spec,
-                             observed_soln,
-                             self.mean,
-                             self.covariance,
-                             self.affine_con.linear_part,
-                             self.affine_con.offset,
-                             self.opt_linear,
-                             self.M1,
-                             self.M2,
-                             self.M3,
-                             self.observed_score_state + self.observed_subgrad,
-                             solve_args=solve_args,
-                             level=level,
-                             useC=self.useC)
-
-    def _log_density_ray(self,
-                         candidate,
-                         direction,
-                         nuisance,
-                         gaussian_sample,
-                         opt_sample):
-
-        # implicitly caching (opt_sample, gaussian_sample) ?
-
-        if (not hasattr(self, "_direction") or not
-        np.all(self._direction == direction)):
-
-            regress_opt, subgrad = self.regress_opt, self.observed_subgrad
-
-            if opt_sample.shape[1] == 1:
-
-                prec = 1. / self.covariance[0, 0]
-                quadratic_term = regress_opt.dot(direction) ** 2 * prec
-                arg = (opt_sample[:, 0] -
-                       regress_opt.dot(nuisance + subgrad) -
-                       regress_opt.dot(direction) * gaussian_sample) 
-                linear_term = -regress_opt.dot(direction) * prec * arg
-                constant_term = arg ** 2 * prec
-
-                self._cache = {'linear_term': linear_term,
-                               'quadratic_term': quadratic_term,
-                               'constant_term': constant_term}
-            else:
-                self._direction = direction.copy()
-
-                # density is a Gaussian evaluated at
-                # O_i - A(N + (Z_i + theta) * gamma + u)
-
-                # u is observed_subgrad
-                # A is regress_opt
-                # Z_i is gaussian_sample[i] (real-valued)
-                # gamma is direction
-                # O_i is opt_sample[i]
-
-                # let arg1 = O_i
-                # let arg2 = A(N+u + Z_i \cdot gamma)
-                # then it is of the form (arg1 - arg2 - theta * A gamma)
-
-                regress_opt, subgrad = self.regress_opt, self.observed_subgrad
-                cov = self.covariance
-                prec = np.linalg.inv(cov)
-                linear_part = -regress_opt.dot(direction)  # -A gamma
-
-                if 1 in opt_sample.shape:
-                    pass  # stop3 what's this for?
-                cov = self.covariance
-
-                quadratic_term = linear_part.T.dot(prec).dot(linear_part)
-
-                arg1 = opt_sample.T
-                arg2 = -regress_opt.dot(np.multiply.outer(direction, gaussian_sample) +
-                                        (nuisance + subgrad)[:, None])
-                arg = arg1 + arg2
-                linear_term = -regress_opt.T.dot(prec).dot(arg)
-                constant_term = np.sum(prec.dot(arg) * arg, 0)
-
-                self._cache = {'linear_term': linear_term,
-                               'quadratic_term': quadratic_term,
-                               'constant_term': constant_term}
-        (linear_term,
-         quadratic_term,
-         constant_term) = (self._cache['linear_term'],
-                           self._cache['quadratic_term'],
-                           self._cache['constant_term'])
-        return (-0.5 * candidate ** 2 * quadratic_term -
-                candidate * linear_term - 0.5 * constant_term)
-
-
-class optimization_intervals(object):
-
-    def __init__(self,
-                 opt_sampling_info,  # a sequence of
-                 # (opt_sampler,
-                 #  opt_sample,
-                 #  opt_logweights,
-                 #  cov_target,
-                 #  score_cov) objects
-                 #  in theory all cov_target
-                 #  should be about the same...
-                 observed,
-                 nsample,  # how large a normal sample
-                 cov_target=None,
-                 normal_sample=None):
-
-        # not all opt_samples will be of the same size as nsample
-        # let's repeat them as necessary
-
-        tiled_sampling_info = []
-        for (opt_sampler,
-             opt_sample,
-             opt_logW,
-             t_cov,
-             t_score_cov) in opt_sampling_info:
-            if opt_sample is not None:
-                if opt_sample.shape[0] < nsample:
-                    if opt_sample.ndim == 1:
-                        tiled_opt_sample = np.tile(opt_sample,
-                                                   int(np.ceil(nsample /
-                                                               opt_sample.shape[0])))[:nsample]
-                        tiled_opt_logW = np.tile(opt_logW,
-                                                 int(np.ceil(nsample /
-                                                             opt_logW.shape[0])))[:nsample]
-                    else:
-                        tiled_opt_sample = np.tile(opt_sample,
-                                                   (int(np.ceil(nsample /
-                                                                opt_sample.shape[0])), 1))[:nsample]
-                        tiled_opt_logW = np.tile(opt_logW,
-                                                 (int(np.ceil(nsample /
-                                                              opt_logW.shape[0])), 1))[:nsample]
-                else:
-                    tiled_opt_sample = opt_sample[:nsample]
-                    tiled_opt_logW = opt_logW[:nsample]
-            else:
-                tiled_sample = None
-            tiled_sampling_info.append((opt_sampler,
-                                        tiled_opt_sample,
-                                        tiled_opt_logW,
-                                        t_cov,
-                                        t_score_cov))
-
-        self.opt_sampling_info = tiled_sampling_info
-        self._logden = 0
-        for opt_sampler, opt_sample, opt_logW, _, _ in opt_sampling_info:
-
-            self._logden += opt_sampler.log_cond_density(
-                opt_sample,
-                opt_sampler.observed_score_state,
-                transform=None)
-            self._logden -= opt_logW
-            if opt_sample.shape[0] < nsample:
-                self._logden = np.tile(self._logden,
-                                       int(np.ceil(nsample /
-                                                   opt_sample.shape[0])))[:nsample]
-
-        # this is our observed unpenalized estimator
-        self.observed = observed.copy()
-
-        # average covariances in case they might be different
-
-        if cov_target is None:
-            self.cov_target = 0
-            for _, _, _, cov_target, _ in opt_sampling_info:
-                self.cov_target += cov_target
-            self.cov_target /= len(opt_sampling_info)
-
-        if normal_sample is None:
-            self._normal_sample = np.random.multivariate_normal(
-                mean=np.zeros(self.cov_target.shape[0]),
-                cov=self.cov_target,
-                size=(nsample,))
-        else:
-            self._normal_sample = normal_sample
-
-    def pivot(self,
-              linear_func,
-              candidate,
-              alternative='twosided'):
-        '''
-        alternative : ['greater', 'less', 'twosided']
-            What alternative to use.
-        Returns
-        -------
-        pvalue : np.float
-        '''
-
-        if alternative not in ['greater', 'less', 'twosided']:
-            raise ValueError("alternative should be one of ['greater', 'less', 'twosided']")
-
-        observed_stat = self.observed.dot(linear_func)
-        sample_stat = self._normal_sample.dot(linear_func)
-
-        cov_target = linear_func.dot(self.cov_target.dot(linear_func))
-
-        nuisance = []
-        translate_dirs = []
-
-        for (opt_sampler,
-             opt_sample,
-             _,
-             _,
-             regress_target_score) in self.opt_sampling_info:
-            cur_score_cov = linear_func.dot(regress_target_score)
-
-            # cur_nuisance is in the view's score coordinates
-            cur_nuisance = opt_sampler.observed_score_state - cur_score_cov * observed_stat / cov_target
-            nuisance.append(cur_nuisance)
-            translate_dirs.append(cur_score_cov / cov_target)
-
-        weights = self._weights(sample_stat,  # normal sample
-                                candidate,  # candidate value
-                                nuisance,  # nuisance sufficient stats for each view
-                                translate_dirs)  # points will be moved like sample * regress_target_score
-
-        pivot = np.mean((sample_stat + candidate <= observed_stat) * weights) / np.mean(weights)
-
-        if alternative == 'twosided':
-            return 2 * min(pivot, 1 - pivot)
-        elif alternative == 'less':
-            return pivot
-        else:
-            return 1 - pivot
-
-    def confidence_interval(self,
-                            linear_func,
-                            level=0.90,
-                            how_many_sd=20,
-                            guess=None):
-
-        sample_stat = self._normal_sample.dot(linear_func)
-        observed_stat = self.observed.dot(linear_func)
-
-        def _rootU(gamma):
-            return self.pivot(linear_func,
-                              observed_stat + gamma,
-                              alternative='less') - (1 - level) / 2.
-
-        def _rootL(gamma):
-            return self.pivot(linear_func,
-                              observed_stat + gamma,
-                              alternative='less') - (1 + level) / 2.
-
-        if guess is None:
-            grid_min, grid_max = -how_many_sd * np.std(sample_stat), how_many_sd * np.std(sample_stat)
-            upper = bisect(_rootU, grid_min, grid_max)
-            lower = bisect(_rootL, grid_min, grid_max)
-
-        else:
-            delta = 0.5 * (guess[1] - guess[0])
-
-            # find interval bracketing upper solution
-            count = 0
-            while True:
-                Lu, Uu = guess[1] - delta, guess[1] + delta
-                valU = _rootU(Uu)
-                valL = _rootU(Lu)
-                if valU * valL < 0:
-                    break
-                delta *= 2
-                count += 1
-            upper = bisect(_rootU, Lu, Uu)
-
-            # find interval bracketing lower solution
-            count = 0
-            while True:
-                Ll, Ul = guess[0] - delta, guess[0] + delta
-                valU = _rootL(Ul)
-                valL = _rootL(Ll)
-                if valU * valL < 0:
-                    break
-                delta *= 2
-                count += 1
-            lower = bisect(_rootL, Ll, Ul)
-        return lower + observed_stat, upper + observed_stat
-
-    # Private methods
-
-    def _weights(self,
-                 stat_sample,
-                 candidate,
-                 nuisance,
-                 translate_dirs):
-
-        # Here we should loop through the views
-        # and move the score of each view
-        # for each projected (through linear_func) normal sample
-        # using the linear decomposition
-
-        # We need access to the map that takes observed_score for each view
-        # and constructs the full randomization -- this is the reconstruction map
-        # for each view
-
-        # The data state for each view will be set to be N_i + A_i \hat{\theta}_i
-        # where N_i is the nuisance sufficient stat for the i-th view's
-        # data with respect to \hat{\theta} and N_i  will not change because
-        # it depends on the observed \hat{\theta} and observed score of i-th view
-
-        # In this function, \hat{\theta}_i will change with the Monte Carlo sample
-
-        score_sample = []
-        _lognum = 0
-        for i, opt_info in enumerate(self.opt_sampling_info):
-            opt_sampler, opt_sample = opt_info[:2]
-
-            _lognum += opt_sampler.log_cond_density(opt_sample,
-                                                    stat_sample + candidate,
-                                                    transform=
-                                                    (translate_dirs[i],
-                                                     nuisance[i]))
-
-        _logratio = _lognum - self._logden
-        _logratio -= _logratio.max()
-
-        return np.exp(_logratio)
-
-
 def naive_confidence_intervals(diag_cov, observed, level=0.9):
     """
     Compute naive Gaussian based confidence

From d02f25bc518b62930ad4930706dda595ee3f356e Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Sun, 31 Oct 2021 20:47:31 -0400
Subject: [PATCH 154/187] some more clean up

---
 selectinf/randomized/exact_reference.py       |   4 +-
 selectinf/randomized/posterior_inference.py   |   4 +-
 selectinf/randomized/query.py                 |  34 ++-
 selectinf/randomized/tests/test_posterior.py  | 206 +++++++++---------
 .../tests/test_selective_MLE_high.py          |   2 -
 5 files changed, 138 insertions(+), 112 deletions(-)

diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 13fdbd4a6..bcc78f3b7 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -27,6 +27,8 @@ def __init__(self,
             Estimated covaraince of target.
         cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
+        level : float, optional
+            Confidence level.
         solve_args : dict, optional
             Arguments passed to solver.
         """
@@ -83,7 +85,7 @@ def __init__(self,
     def summary(self,
                 alternatives=None,
                 parameter=None,
-                level=0.9):
+                level=0.90):
         """
         Produce p-values and confidence intervals for targets
         of model including selected features
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 194b6c6b4..2467a35e4 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -82,7 +82,7 @@ def __init__(self,
         self.offset = offset
 
         self.initial_estimate = np.asarray(result['MLE'])
-        self.dispersion = dispersion # why is this needed?
+        self.dispersion = dispersion
         self.log_ref = log_ref
 
         self._set_marginal_parameters()
@@ -159,12 +159,14 @@ def _set_marginal_parameters(self):
         bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
         ###set parameters for the marginal distribution of optimization variables
+
         _Q = np.linalg.inv(prec_target_nosel + T3)
         self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
         self.linear_coef = self.cond_cov.dot(T5.T)
         self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
 
         ###set parameters for the marginal distribution of target
+
         r = np.linalg.inv(prec_target_nosel).dot(self.prec_target.dot(bias_target))
         S = np.linalg.inv(prec_target_nosel).dot(self.prec_target)
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index f9237e562..a423a0fb8 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -8,6 +8,7 @@
 from .posterior_inference import posterior
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from .approx_reference import approximate_grid_inference
+from .exact_reference import exact_grid_inference
 
 class query(object):
     r"""
@@ -226,8 +227,8 @@ def prior(target_parameter):
 
     def approximate_grid_inference(self,
                                    target_spec,
-                                   solve_args={'tol': 1.e-12},
-                                   useIP=True):
+                                   useIP=True,
+                                   solve_args={'tol': 1.e-12}):
 
         """
         Parameters
@@ -252,6 +253,32 @@ def approximate_grid_inference(self,
 
         return G.summary(alternatives=target_spec.alternatives)
 
+    def exact_grid_inference(self,
+                             target_spec,
+                             solve_args={'tol': 1.e-12}):
+
+        """
+        Parameters
+        ----------
+        observed_target : ndarray
+            Observed estimate of target.
+        cov_target : ndarray
+            Estimated covaraince of target.
+        regress_target_score : ndarray
+            Estimated covariance of target and score of randomized query.
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+        solve_args : dict, optional
+            Arguments passed to solver.
+        """
+
+        G = exact_grid_inference(self,
+                                 target_spec,
+                                 solve_args=solve_args)
+
+        return G.summary(alternatives=target_spec.alternatives)
+
 
 class multiple_queries(object):
     '''
@@ -540,9 +567,6 @@ def selective_MLE(target_spec,
         Conditional mean of optimization variables given target.
     cond_cov : ndarray
         Conditional covariance of optimization variables given target.
-    regress_opt : ndarray
-        Describes how conditional mean of optimization
-        variables varies with target.
     linear_part : ndarray
         Linear part of affine constraints: $\{o:Ao \leq b\}$
     offset : ndarray
diff --git a/selectinf/randomized/tests/test_posterior.py b/selectinf/randomized/tests/test_posterior.py
index 3d972a585..2757c06da 100644
--- a/selectinf/randomized/tests/test_posterior.py
+++ b/selectinf/randomized/tests/test_posterior.py
@@ -263,109 +263,109 @@ def prior(target_parameter):
     return samples
 
 
-def test_hiv_data(nsample=10000,
-                  nburnin=500,
-                  level=0.90,
-                  split_proportion=0.50,
-                  seedn=1):
-    np.random.seed(seedn)
-
-    alpha = (1 - level) / 2
-    Z_quantile = ndist.ppf(1 - alpha)
-
-    X, Y, _ = HIV_NRTI(standardize=True)
-    Y *= 15
-    n, p = X.shape
-    X /= np.sqrt(n)
-
-    ols_fit = np.linalg.pinv(X).dot(Y)
-    _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
-
-    const = split_lasso.gaussian
-
-    dispersion = _sigma ** 2
-
-    W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma
-
-    conv = const(X,
-                 Y,
-                 W,
-                 proportion=split_proportion)
-
-    signs = conv.fit()
-    nonzero = signs != 0
-
-    conv.setup_inference()
-
-    target_spec = selected_targets(conv.loglike,
-                                   conv.observed_soln,
-                                   nonzero,
-                                   dispersion=dispersion)
-
-    mle, inverse_info = conv.selective_MLE(target_spec,
-                                           level=level,
-                                           solve_args={'tol': 1.e-12})[:2]
-
-    approx_inf = conv.approximate_grid_inference(target_spec)
-
-    posterior_inf = conv.posterior(target_spec,
-                                   dispersion=dispersion)
-
-    samples_langevin = langevin_sampler(posterior_inf,
-                                        nsample=nsample,
-                                        nburnin=nburnin,
-                                        step=1.)
-
-    lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0)
-    upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0)
-
-    samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
-                                               nsample=nsample,
-                                               nburnin=nburnin)
-
-    lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0)
-    upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0)
-
-    naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
-    naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
-    naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
-                                 naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
-
-    X_split = X[~conv._selection_idx, :]
-    Y_split = Y[~conv._selection_idx]
-    split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
-    split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
-    split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
-                                 split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
-
-    print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
-          np.mean(upper_langevin - lower_langevin),
-          np.mean(upper_gibbs - lower_gibbs),
-          np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))),
-          np.mean(mle['upper_confidence'] - mle['lower_confidence']),
-          np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
-          )
-
-    print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]))
-
-    print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
-
-    scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100])
-    output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
-                           'Langevin_upper_credible': upper_langevin,
-                           'Gibbs_lower_credible': lower_gibbs,
-                           'Gibbs_upper_credible': upper_gibbs,
-                           'MLE_lower_confidence': mle['lower_confidence'],
-                           'MLE_upper_confidence': mle['upper_confidence'],
-                           'approx_lower_confidence': approx_inf['lower_confidence'],
-                           'approx_upper_confidence': approx_inf['upper_confidence'],
-                           'Split_lower_confidence': split_intervals[:, 0],
-                           'Split_upper_confidence': split_intervals[:, 1],
-                           'Naive_lower_confidence': naive_intervals[:, 0],
-                           'Naive_upper_confidence': naive_intervals[:, 1]
-                           })
-
-    return output, scale_interval, _sigma
+# def test_hiv_data(nsample=10000,
+#                   nburnin=500,
+#                   level=0.90,
+#                   split_proportion=0.50,
+#                   seedn=1):
+#     np.random.seed(seedn)
+#
+#     alpha = (1 - level) / 2
+#     Z_quantile = ndist.ppf(1 - alpha)
+#
+#     X, Y, _ = HIV_NRTI(standardize=True)
+#     Y *= 15
+#     n, p = X.shape
+#     X /= np.sqrt(n)
+#
+#     ols_fit = np.linalg.pinv(X).dot(Y)
+#     _sigma = np.linalg.norm(Y - X.dot(ols_fit)) / np.sqrt(n - p - 1)
+#
+#     const = split_lasso.gaussian
+#
+#     dispersion = _sigma ** 2
+#
+#     W = 1 * np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * _sigma
+#
+#     conv = const(X,
+#                  Y,
+#                  W,
+#                  proportion=split_proportion)
+#
+#     signs = conv.fit()
+#     nonzero = signs != 0
+#
+#     conv.setup_inference()
+#
+#     target_spec = selected_targets(conv.loglike,
+#                                    conv.observed_soln,
+#                                    nonzero,
+#                                    dispersion=dispersion)
+#
+#     mle, inverse_info = conv.selective_MLE(target_spec,
+#                                            level=level,
+#                                            solve_args={'tol': 1.e-12})[:2]
+#
+#     approx_inf = conv.approximate_grid_inference(target_spec)
+#
+#     posterior_inf = conv.posterior(target_spec,
+#                                    dispersion=dispersion)
+#
+#     samples_langevin = langevin_sampler(posterior_inf,
+#                                         nsample=nsample,
+#                                         nburnin=nburnin,
+#                                         step=1.)
+#
+#     lower_langevin = np.percentile(samples_langevin, int(alpha * 100), axis=0)
+#     upper_langevin = np.percentile(samples_langevin, int((1 - alpha) * 100), axis=0)
+#
+#     samples_gibbs, scale_gibbs = gibbs_sampler(posterior_inf,
+#                                                nsample=nsample,
+#                                                nburnin=nburnin)
+#
+#     lower_gibbs = np.percentile(samples_gibbs, int(alpha * 100), axis=0)
+#     upper_gibbs = np.percentile(samples_gibbs, int((1 - alpha) * 100), axis=0)
+#
+#     naive_est = np.linalg.pinv(X[:, nonzero]).dot(Y)
+#     naive_cov = dispersion * np.linalg.inv(X[:, nonzero].T.dot(X[:, nonzero]))
+#     naive_intervals = np.vstack([naive_est - Z_quantile * np.sqrt(np.diag(naive_cov)),
+#                                  naive_est + Z_quantile * np.sqrt(np.diag(naive_cov))]).T
+#
+#     X_split = X[~conv._selection_idx, :]
+#     Y_split = Y[~conv._selection_idx]
+#     split_est = np.linalg.pinv(X_split[:, nonzero]).dot(Y_split)
+#     split_cov = dispersion * np.linalg.inv(X_split[:, nonzero].T.dot(X_split[:, nonzero]))
+#     split_intervals = np.vstack([split_est - Z_quantile * np.sqrt(np.diag(split_cov)),
+#                                  split_est + Z_quantile * np.sqrt(np.diag(split_cov))]).T
+#
+#     print("lengths: adjusted intervals Langevin, Gibbs, MLE1, MLE2, approx ",
+#           np.mean(upper_langevin - lower_langevin),
+#           np.mean(upper_gibbs - lower_gibbs),
+#           np.mean((2 * Z_quantile) * np.sqrt(np.diag(posterior_inf.inverse_info))),
+#           np.mean(mle['upper_confidence'] - mle['lower_confidence']),
+#           np.mean(approx_inf['upper_confidence'] - approx_inf['lower_confidence'])
+#           )
+#
+#     print("lengths: naive intervals ", np.mean(naive_intervals[:, 1] - naive_intervals[:, 0]))
+#
+#     print("lengths: split intervals ", np.mean(split_intervals[:, 1] - split_intervals[:, 0]))
+#
+#     scale_interval = np.percentile(scale_gibbs, [alpha * 100, (1 - alpha) * 100])
+#     output = pd.DataFrame({'Langevin_lower_credible': lower_langevin,
+#                            'Langevin_upper_credible': upper_langevin,
+#                            'Gibbs_lower_credible': lower_gibbs,
+#                            'Gibbs_upper_credible': upper_gibbs,
+#                            'MLE_lower_confidence': mle['lower_confidence'],
+#                            'MLE_upper_confidence': mle['upper_confidence'],
+#                            'approx_lower_confidence': approx_inf['lower_confidence'],
+#                            'approx_upper_confidence': approx_inf['upper_confidence'],
+#                            'Split_lower_confidence': split_intervals[:, 0],
+#                            'Split_upper_confidence': split_intervals[:, 1],
+#                            'Naive_lower_confidence': naive_intervals[:, 0],
+#                            'Naive_upper_confidence': naive_intervals[:, 1]
+#                            })
+#
+#     return output, scale_interval, _sigma
 
 
 if __name__ == "__main__":
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 947e75bcd..444748b8d 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -174,7 +174,6 @@ def test_instance():
 
     M = E.copy()
     M[-3:] = 1
-    print("check ", M)
     dispersion = np.linalg.norm(Y - X[:, M].dot(np.linalg.pinv(X[:, M]).dot(Y))) ** 2 / (n - M.sum())
 
     L.setup_inference(dispersion=dispersion)
@@ -196,7 +195,6 @@ def test_instance():
 
     return coverage
 
-
 def test_selected_targets_disperse(n=500,
                                    p=100,
                                    s=5,

From 15aaa6e69c9235934b91733a85a6a1541a8a8a6c Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Wed, 3 Nov 2021 17:28:13 -0400
Subject: [PATCH 155/187] added class for MLE based inference

---
 selectinf/randomized/selective_MLE.py | 118 ++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 selectinf/randomized/selective_MLE.py

diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
new file mode 100644
index 000000000..2b8f6f9e9
--- /dev/null
+++ b/selectinf/randomized/selective_MLE.py
@@ -0,0 +1,118 @@
+from __future__ import division, print_function
+
+import numpy as np, pandas as pd
+from scipy.stats import norm as ndist
+from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from ..algorithms.barrier_affine import solve_barrier_affine_py
+
+class selective_MLE(object):
+
+    def __init__(self,
+                 query,
+                 target_spec,
+                 solve_args={'tol': 1.e-12}):
+
+        self.solve_args = solve_args
+
+        (observed_target,
+         cov_target,
+         regress_target_score) = target_spec[:3]
+
+        self.observed_target = observed_target
+        self.cov_target = cov_target
+        self.prec_target = np.linalg.inv(cov_target)
+        self.regress_target_score = regress_target_score
+
+        self.cond_mean = query.cond_mean
+        self.cond_cov = query.cond_cov
+        self.prec_opt = np.linalg.inv(self.cond_cov)
+        self.opt_linear = query.opt_linear
+
+        self.linear_part = query.sampler.affine_con.linear_part
+        self.offset = query.sampler.affine_con.offset
+
+        self.M1 = query.M1
+        self.M2 = query.M2
+        self.M3 = query.M3
+        self.observed_soln = query.observed_opt_state
+
+        self.observed_score = query.observed_score_state + query.observed_subgrad
+
+        self._setup_estimating_eqn()
+
+    def mle_inference(self, useC= False, level=0.90):
+
+        conjugate_arg = self.prec_opt.dot(self.cond_mean)
+        if useC:
+            solver = solve_barrier_affine_C
+        else:
+            solver = solve_barrier_affine_py
+
+        val, soln, hess = solver(conjugate_arg,
+                                 self.prec_opt,
+                                 self.observed_soln,
+                                 self.linear_part,
+                                 self.offset,
+                                 **self.solve_args)
+
+        final_estimator = self.cov_target.dot(self.prec_target_nosel).dot(self.observed_target) \
+                          + self.regress_target_score.dot(self.M1.dot(self.opt_linear)).dot(self.cond_mean - soln) \
+                          - self.bias_target
+
+        observed_info_natural = self.prec_target_nosel + self.T3 - self.T5.dot(self.hess.dot(self.T5.T))
+
+        unbiased_estimator = self.cov_target.dot(self.prec_target_nosel).dot(self.observed_target) - self.bias_target
+
+        observed_info_mean = self.cov_target.dot(observed_info_natural.dot(self.cov_target))
+
+        Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+
+        pvalues = ndist.cdf(Z_scores)
+
+        pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
+
+        alpha = 1. - level
+
+        quantile = ndist.ppf(1 - alpha / 2.)
+
+        intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
+                               final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
+
+        log_ref = val + conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.
+
+        result = pd.DataFrame({'MLE': final_estimator,
+                               'SE': np.sqrt(np.diag(observed_info_mean)),
+                               'Zvalue': Z_scores,
+                               'pvalue': pvalues,
+                               'lower_confidence': intervals[:, 0],
+                               'upper_confidence': intervals[:, 1],
+                               'unbiased': unbiased_estimator})
+
+        return result, observed_info_mean, log_ref
+
+    def _setup_estimating_eqn(self):
+
+        T1 = self.regress_target_score.T.dot(self.prec_target)
+        T2 = T1.T.dot(self.M2.dot(T1))
+        T3 = T1.T.dot(self.M3.dot(T1))
+        T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
+        T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+
+        self.prec_target_nosel = self.prec_target + T2 - T3
+
+        _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(self.observed_target))
+
+        self.bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target)
+                                                   + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
+
+        self.T3 = T3
+        self.T5 = T5
+
+
+
+
+
+
+
+
+

From 56b0902d630f207ac4b002a2b173f53de77600de Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Wed, 3 Nov 2021 17:28:44 -0400
Subject: [PATCH 156/187] other changes

---
 selectinf/randomized/approx_reference.py    |   4 +-
 selectinf/randomized/exact_reference.py     |   5 +-
 selectinf/randomized/posterior_inference.py |   4 +-
 selectinf/randomized/query.py               | 253 +-------------------
 selectinf/randomized/tests/test_naive.py    |  43 +++-
 5 files changed, 47 insertions(+), 262 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 31c3b88e3..38483f4c6 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -39,8 +39,8 @@ def __init__(self,
         
         self.solve_args = solve_args
 
-        linear_part = query.sampler.affine_con.linear_part
-        offset = query.sampler.affine_con.offset
+        linear_part = query.affine_con.linear_part
+        offset = query.affine_con.offset
 
         opt_linear = query.opt_linear
 
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index bcc78f3b7..ce799a47c 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -1,7 +1,6 @@
 from __future__ import division, print_function
 
 import numpy as np, pandas as pd
-from scipy.interpolate import interp1d
 from scipy.stats import norm as ndist
 
 from ..distributions.discrete_family import discrete_family
@@ -39,8 +38,8 @@ def __init__(self,
         
         self.solve_args = solve_args
 
-        linear_part = query.sampler.affine_con.linear_part
-        offset = query.sampler.affine_con.offset
+        linear_part = query.affine_con.linear_part
+        offset = query.affine_con.offset
 
         opt_linear = query.opt_linear
 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 2467a35e4..7fa5b377b 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -47,8 +47,8 @@ def __init__(self,
          cov_target,
          regress_target_score) = target_spec[:3]
 
-        linear_part = query.sampler.affine_con.linear_part
-        offset = query.sampler.affine_con.offset
+        linear_part = query.affine_con.linear_part
+        offset = query.affine_con.offset
 
         opt_linear = query.opt_linear
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index a423a0fb8..9a562c8e4 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -6,7 +6,6 @@
 from ..algorithms.barrier_affine import solve_barrier_affine_py
 
 from .posterior_inference import posterior
-from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from .approx_reference import approximate_grid_inference
 from .exact_reference import exact_grid_inference
 
@@ -280,257 +279,7 @@ def exact_grid_inference(self,
         return G.summary(alternatives=target_spec.alternatives)
 
 
-class multiple_queries(object):
-    '''
-    Combine several queries of a given data
-    through randomized algorithms.
-    '''
-
-    def __init__(self, objectives):
-        '''
-        Parameters
-        ----------
-        objectives : sequence
-           A sequences of randomized objective functions.
-        Notes
-        -----
-        Each element of `objectives` must
-        have a `setup_sampler` method that returns
-        a description of the distribution of the
-        data implicated in the objective function,
-        typically through the score or gradient
-        of the objective function.
-        These descriptions are passed to a function
-        `form_covariances` to linearly decompose
-        each score in terms of a target
-        and an asymptotically independent piece.
-        Returns
-        -------
-        None
-        '''
-
-        self.objectives = objectives
-
-    def fit(self):
-        for objective in self.objectives:
-            if not objective._setup:
-                objective.fit()
-
-    def summary(self,
-                target_specs,
-                # a sequence of target_specs
-                # objects in theory all cov_target
-                # should be about the same. as should the observed_target
-                alternatives=None,
-                parameter=None,
-                level=0.9,
-                ndraw=5000,
-                burnin=2000,
-                compute_intervals=False):
-
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        ndraw : int (optional)
-            Defaults to 1000.
-        burnin : int (optional)
-            Defaults to 1000.
-        compute_intervals : bool
-            Compute confidence intervals?
-        """
-
-        observed_target = target_specs[0].observed_target
-        alternatives = target_specs[0].alternatives
-        
-        if parameter is None:
-            parameter = np.zeros_like(observed_target)
-
-        if alternatives is None:
-            alternatives = ['twosided'] * observed_target.shape[0]
-
-        if len(self.objectives) != len(target_specs):
-            raise ValueError("number of objectives and sampling cov infos do not match")
-
-        self.opt_sampling_info = []
-        for i in range(len(self.objectives)):
-            if target_specs[i].cov_target is None or target_specs[i].regress_target_score is None:
-                raise ValueError("did not input target and score covariance info")
-            opt_sample, opt_logW = self.objectives[i].sampler.sample(ndraw, burnin)
-            self.opt_sampling_info.append((self.objectives[i].sampler,
-                                           opt_sample,
-                                           opt_logW,
-                                           target_specs[i].cov_target,
-                                           target_specs[i].regress_target_score))
-
-        pivots = self.coefficient_pvalues(observed_target,
-                                          parameter=parameter,
-                                          alternatives=alternatives)
-
-        if not np.all(parameter == 0):
-            pvalues = self.coefficient_pvalues(observed_target,
-                                               parameter=np.zeros_like(observed_target),
-                                               alternatives=alternatives)
-        else:
-            pvalues = pivots
-
-        intervals = None
-        if compute_intervals:
-            intervals = self.confidence_intervals(observed_target,
-                                                  level)
-
-        result = pd.DataFrame({'target': observed_target,
-                               'pvalue': pvalues,
-                               'lower_confidence': intervals[:, 0],
-                               'upper_confidence': intervals[:, 1]})
-
-        if not np.all(parameter == 0):
-            result.insert(4, 'pivot', pivots)
-            result.insert(5, 'parameter', parameter)
-
-        return result
-
-    def coefficient_pvalues(self,
-                            observed_target,
-                            parameter=None,
-                            sample_args=(),
-                            alternatives=None):
-
-        '''
-        Construct selective p-values
-        for each parameter of the target.
-        Parameters
-        ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        parameter : ndarray (optional)
-            A vector of parameters with shape `self.shape`
-            at which to evaluate p-values. Defaults
-            to `np.zeros(self.shape)`.
-        sample_args : sequence
-           Arguments to `self.sample` if sample is not found
-           for a given objective.
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-        Returns
-        -------
-        pvalues : ndarray
-        '''
-
-        for i in range(len(self.objectives)):
-            if self.opt_sampling_info[i][1] is None:
-                _sample, _logW = self.objectives[i].sampler.sample(*sample_args)
-                self.opt_sampling_info[i][1] = _sample
-                self.opt_sampling_info[i][2] = _logW
-
-        ndraw = self.opt_sampling_info[0][1].shape[0]  # nsample for normal samples taken from the 1st objective
-
-        _intervals = optimization_intervals(self.opt_sampling_info,
-                                            observed_target,
-                                            ndraw)
-
-        pvals = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-            pvals.append(_intervals.pivot(keep, candidate=parameter[i], alternative=alternatives[i]))
-
-        return np.array(pvals)
-
-    def confidence_intervals(self,
-                             target_specs,
-                             sample_args=(),
-                             level=0.9):
-
-        '''
-        Construct selective confidence intervals
-        for each parameter of the target.
-        Parameters
-        ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        sample_args : sequence
-           Arguments to `self.sample` if sample is not found
-           for a given objective.
-        level : float
-            Confidence level.
-        Returns
-        -------
-        limits : ndarray
-            Confidence intervals for each target.
-        '''
-
-        for i in range(len(self.objectives)):
-            if self.opt_sampling_info[i][1] is None:
-                _sample, _logW = self.objectives[i].sampler.sample(*sample_args)
-                self.opt_sampling_info[i][1] = _sample
-                self.opt_sampling_info[i][2] = _logW
-
-        ndraw = self.opt_sampling_info[0][1].shape[0]  # nsample for normal samples taken from the 1st objective
-
-        _intervals = optimization_intervals(self.opt_sampling_info,
-                                            observed_target,
-                                            ndraw)
-
-        limits = []
-
-        for i in range(observed_target.shape[0]):
-            keep = np.zeros_like(observed_target)
-            keep[i] = 1.
-            limits.append(_intervals.confidence_interval(keep, level=level))
-
-        return np.array(limits)
-
-
-def naive_confidence_intervals(diag_cov, observed, level=0.9):
-    """
-    Compute naive Gaussian based confidence
-    intervals for target.
-    Parameters
-    ----------
-    diag_cov : diagonal of a covariance matrix
-    observed : np.float
-        A vector of observed data of shape `target.shape`
-    alpha : float (optional)
-        1 - confidence level.
-    Returns
-    -------
-    intervals : np.float
-        Gaussian based confidence intervals.
-    """
-    alpha = 1 - level
-    diag_cov = np.asarray(diag_cov)
-    p = diag_cov.shape[0]
-    quantile = - ndist.ppf(alpha / 2)
-    LU = np.zeros((2, p))
-    for j in range(p):
-        sigma = np.sqrt(diag_cov[j])
-        LU[0, j] = observed[j] - sigma * quantile
-        LU[1, j] = observed[j] + sigma * quantile
-    return LU.T
-
-
-def naive_pvalues(diag_cov, observed, parameter):
-    diag_cov = np.asarray(diag_cov)
-    p = diag_cov.shape[0]
-    pvalues = np.zeros(p)
-    for j in range(p):
-        sigma = np.sqrt(diag_cov[j])
-        pval = ndist.cdf((observed[j] - parameter[j]) / sigma)
-        pvalues[j] = 2 * min(pval, 1 - pval)
-    return pvalues
+from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 
 def selective_MLE(target_spec,
                   observed_soln,  # initial (observed) value of
diff --git a/selectinf/randomized/tests/test_naive.py b/selectinf/randomized/tests/test_naive.py
index 584535dc7..56f7f5515 100644
--- a/selectinf/randomized/tests/test_naive.py
+++ b/selectinf/randomized/tests/test_naive.py
@@ -1,15 +1,52 @@
 import numpy as np
 import regreg.api as rr
-import pandas as pd
 from scipy.stats import norm as ndist
-from scipy.optimize import bisect
+
 
 from ...tests.instance import gaussian_instance
 from ...algorithms.lasso import lasso
 from ...tests.flags import SMALL_SAMPLES, SET_SEED
 from ...tests.decorators import wait_for_return_value, set_seed_iftrue, set_sampling_params_iftrue
 from ..cv_view import CV_view, have_glmnet
-from ..query import (naive_pvalues, naive_confidence_intervals)
+
+
+def naive_confidence_intervals(diag_cov, observed, level=0.9):
+    """
+    Compute naive Gaussian based confidence
+    intervals for target.
+    Parameters
+    ----------
+    diag_cov : diagonal of a covariance matrix
+    observed : np.float
+        A vector of observed data of shape `target.shape`
+    alpha : float (optional)
+        1 - confidence level.
+    Returns
+    -------
+    intervals : np.float
+        Gaussian based confidence intervals.
+    """
+    alpha = 1 - level
+    diag_cov = np.asarray(diag_cov)
+    p = diag_cov.shape[0]
+    quantile = - ndist.ppf(alpha / 2)
+    LU = np.zeros((2, p))
+    for j in range(p):
+        sigma = np.sqrt(diag_cov[j])
+        LU[0, j] = observed[j] - sigma * quantile
+        LU[1, j] = observed[j] + sigma * quantile
+    return LU.T
+
+
+def naive_pvalues(diag_cov, observed, parameter):
+    diag_cov = np.asarray(diag_cov)
+    p = diag_cov.shape[0]
+    pvalues = np.zeros(p)
+    for j in range(p):
+        sigma = np.sqrt(diag_cov[j])
+        pval = ndist.cdf((observed[j] - parameter[j]) / sigma)
+        pvalues[j] = 2 * min(pval, 1 - pval)
+    return pvalues
 
 def compute_projection_parameters(n, p, s, signal, rho, sigma, active):
     multiple = 10**2

From 91353fc2ce3bf9637adc975263ea0a0ddcef4f87 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 4 Nov 2021 16:12:16 -0400
Subject: [PATCH 157/187] some more clean up for query

---
 selectinf/randomized/query.py         | 287 +++++++++++++-------------
 selectinf/randomized/selective_MLE.py |   2 +-
 2 files changed, 147 insertions(+), 142 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 9a562c8e4..3f0610abb 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1,13 +1,10 @@
 import numpy as np
-import pandas as pd
-from scipy.stats import norm as ndist
 
 from ..constraints.affine import constraints
-from ..algorithms.barrier_affine import solve_barrier_affine_py
-
 from .posterior_inference import posterior
 from .approx_reference import approximate_grid_inference
 from .exact_reference import exact_grid_inference
+from .selective_MLE import mle_inference
 
 class query(object):
     r"""
@@ -170,21 +167,31 @@ def selective_MLE(self,
                       level=0.90,
                       solve_args={'tol': 1.e-12}):
 
-        return selective_MLE(target_spec,
-                             self.observed_opt_state,
-                             self.affine_con.mean,
-                             self.affine_con.covariance,
-                             self.affine_con.linear_part,
-                             self.affine_con.offset,
-                             self.opt_linear,
-                             self.M1,
-                             self.M2,
-                             self.M3,
-                             self.observed_score_state + self.observed_subgrad,
-                             solve_args=solve_args,
-                             level=level,
-                             useC=False)
-
+        G = mle_inference(self,
+                          target_spec,
+                          solve_args=solve_args)
+
+        return G.mle_inference(level=level)
+
+    # def selective_MLE(self,
+    #                   target_spec,
+    #                   level=0.90,
+    #                   solve_args={'tol': 1.e-12}):
+    #
+    #     return selective_MLE(target_spec,
+    #                          self.observed_opt_state,
+    #                          self.affine_con.mean,
+    #                          self.affine_con.covariance,
+    #                          self.affine_con.linear_part,
+    #                          self.affine_con.offset,
+    #                          self.opt_linear,
+    #                          self.M1,
+    #                          self.M2,
+    #                          self.M3,
+    #                          self.observed_score_state + self.observed_subgrad,
+    #                          solve_args=solve_args,
+    #                          level=level,
+    #                          useC=False)
 
     def posterior(self,
                   target_spec,
@@ -279,126 +286,124 @@ def exact_grid_inference(self,
         return G.summary(alternatives=target_spec.alternatives)
 
 
-from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
-
-def selective_MLE(target_spec,
-                  observed_soln,  # initial (observed) value of
-                  # optimization variables -- used as a
-                  # feasible point.  precise value used
-                  # only for independent estimator
-                  cond_mean,
-                  cond_cov,
-                  linear_part,
-                  offset,
-                  opt_linear,
-                  M1,   
-                  M2,
-                  M3,
-                  observed_score,
-                  solve_args={'tol': 1.e-12},
-                  level=0.9,
-                  useC=False):
-
-    """
-    Selective MLE based on approximation of
-    CGF.
-    Parameters
-    ----------
-    observed_target : ndarray
-        Observed estimate of target.
-    cov_target : ndarray
-        Estimated covaraince of target.
-    regress_target_score : ndarray
-        Estimated regression coefficient of target on score.
-    observed_soln : ndarray
-        Feasible point for optimization problem.
-    cond_mean : ndarray
-        Conditional mean of optimization variables given target.
-    cond_cov : ndarray
-        Conditional covariance of optimization variables given target.
-    linear_part : ndarray
-        Linear part of affine constraints: $\{o:Ao \leq b\}$
-    offset : ndarray
-        Offset part of affine constraints: $\{o:Ao \leq b\}$
-    solve_args : dict, optional
-        Arguments passed to solver.
-    level : float, optional
-        Confidence level.
-    useC : bool, optional
-        Use python or C solver.
-    """
-
-    (observed_target,
-     cov_target,
-     regress_target_score) = target_spec[:3]
-
-    if np.asarray(observed_target).shape in [(), (0,)]:
-        raise ValueError('no target specified')
-
-    observed_target = np.atleast_1d(observed_target)
-    prec_target = np.linalg.inv(cov_target)
-
-    prec_opt = np.linalg.inv(cond_cov)
-
-    # this is specific to target
-    
-    T1 = regress_target_score.T.dot(prec_target)
-    T2 = T1.T.dot(M2.dot(T1))
-    T3 = T1.T.dot(M3.dot(T1)) 
-    T4 = M1.dot(opt_linear).dot(cond_cov).dot(opt_linear.T.dot(M1.T.dot(T1)))
-    T5 = T1.T.dot(M1.dot(opt_linear))
-
-    prec_target_nosel = prec_target + T2 - T3
-
-    _P = -(T1.T.dot(M1.dot(observed_score)) + T2.dot(observed_target)) ##flipped sign of second term here
-
-    bias_target = cov_target.dot(T1.T.dot(-T4.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) - _P)
-
-    conjugate_arg = prec_opt.dot(cond_mean)
-
-    if useC:
-        solver = solve_barrier_affine_C
-    else:
-        solver = solve_barrier_affine_py
-
-    val, soln, hess = solver(conjugate_arg,
-                             prec_opt,
-                             observed_soln,
-                             linear_part,
-                             offset,
-                             **solve_args)
-
-    final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
-                      + regress_target_score.dot(M1.dot(opt_linear)).dot(cond_mean - soln) - bias_target
-
-    observed_info_natural = prec_target_nosel + T3 - T5.dot(hess.dot(T5.T))
-
-    unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) - bias_target
-
-    observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
-
-    Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
-
-    pvalues = ndist.cdf(Z_scores)
-
-    pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
-
-    alpha = 1. - level
-
-    quantile = ndist.ppf(1 - alpha / 2.)
-
-    intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
-                           final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
-
-    log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
-
-    result = pd.DataFrame({'MLE': final_estimator,
-                           'SE': np.sqrt(np.diag(observed_info_mean)),
-                           'Zvalue': Z_scores,
-                           'pvalue': pvalues,
-                           'lower_confidence': intervals[:, 0],
-                           'upper_confidence': intervals[:, 1],
-                           'unbiased': unbiased_estimator})
-
-    return result, observed_info_mean, log_ref
+# def selective_MLE(target_spec,
+#                   observed_soln,  # initial (observed) value of
+#                   # optimization variables -- used as a
+#                   # feasible point.  precise value used
+#                   # only for independent estimator
+#                   cond_mean,
+#                   cond_cov,
+#                   linear_part,
+#                   offset,
+#                   opt_linear,
+#                   M1,
+#                   M2,
+#                   M3,
+#                   observed_score,
+#                   solve_args={'tol': 1.e-12},
+#                   level=0.9,
+#                   useC=False):
+#
+#     """
+#     Selective MLE based on approximation of
+#     CGF.
+#     Parameters
+#     ----------
+#     observed_target : ndarray
+#         Observed estimate of target.
+#     cov_target : ndarray
+#         Estimated covaraince of target.
+#     regress_target_score : ndarray
+#         Estimated regression coefficient of target on score.
+#     observed_soln : ndarray
+#         Feasible point for optimization problem.
+#     cond_mean : ndarray
+#         Conditional mean of optimization variables given target.
+#     cond_cov : ndarray
+#         Conditional covariance of optimization variables given target.
+#     linear_part : ndarray
+#         Linear part of affine constraints: $\{o:Ao \leq b\}$
+#     offset : ndarray
+#         Offset part of affine constraints: $\{o:Ao \leq b\}$
+#     solve_args : dict, optional
+#         Arguments passed to solver.
+#     level : float, optional
+#         Confidence level.
+#     useC : bool, optional
+#         Use python or C solver.
+#     """
+#
+#     (observed_target,
+#      cov_target,
+#      regress_target_score) = target_spec[:3]
+#
+#     if np.asarray(observed_target).shape in [(), (0,)]:
+#         raise ValueError('no target specified')
+#
+#     observed_target = np.atleast_1d(observed_target)
+#     prec_target = np.linalg.inv(cov_target)
+#
+#     prec_opt = np.linalg.inv(cond_cov)
+#
+#     # this is specific to target
+#
+#     T1 = regress_target_score.T.dot(prec_target)
+#     T2 = T1.T.dot(M2.dot(T1))
+#     T3 = T1.T.dot(M3.dot(T1))
+#     T4 = M1.dot(opt_linear).dot(cond_cov).dot(opt_linear.T.dot(M1.T.dot(T1)))
+#     T5 = T1.T.dot(M1.dot(opt_linear))
+#
+#     prec_target_nosel = prec_target + T2 - T3
+#
+#     _P = -(T1.T.dot(M1.dot(observed_score)) + T2.dot(observed_target)) ##flipped sign of second term here
+#
+#     bias_target = cov_target.dot(T1.T.dot(-T4.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) - _P)
+#
+#     conjugate_arg = prec_opt.dot(cond_mean)
+#
+#     if useC:
+#         solver = solve_barrier_affine_C
+#     else:
+#         solver = solve_barrier_affine_py
+#
+#     val, soln, hess = solver(conjugate_arg,
+#                              prec_opt,
+#                              observed_soln,
+#                              linear_part,
+#                              offset,
+#                              **solve_args)
+#
+#     final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
+#                       + regress_target_score.dot(M1.dot(opt_linear)).dot(cond_mean - soln) - bias_target
+#
+#     observed_info_natural = prec_target_nosel + T3 - T5.dot(hess.dot(T5.T))
+#
+#     unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) - bias_target
+#
+#     observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
+#
+#     Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
+#
+#     pvalues = ndist.cdf(Z_scores)
+#
+#     pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
+#
+#     alpha = 1. - level
+#
+#     quantile = ndist.ppf(1 - alpha / 2.)
+#
+#     intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
+#                            final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
+#
+#     log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
+#
+#     result = pd.DataFrame({'MLE': final_estimator,
+#                            'SE': np.sqrt(np.diag(observed_info_mean)),
+#                            'Zvalue': Z_scores,
+#                            'pvalue': pvalues,
+#                            'lower_confidence': intervals[:, 0],
+#                            'upper_confidence': intervals[:, 1],
+#                            'unbiased': unbiased_estimator})
+#
+#     return result, observed_info_mean, log_ref
 
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index 2b8f6f9e9..96ceae97b 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -5,7 +5,7 @@
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from ..algorithms.barrier_affine import solve_barrier_affine_py
 
-class selective_MLE(object):
+class mle_inference(object):
 
     def __init__(self,
                  query,

From 5e448a19daae0d68f1f83b313b5d496c58ccecea Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 4 Nov 2021 16:40:46 -0400
Subject: [PATCH 158/187] removed regress_opt from return list

---
 selectinf/randomized/query.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 3f0610abb..6d8d0749a 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -103,7 +103,6 @@ def _setup_sampler(self,
         (cond_mean,
          cond_cov,
          cond_precision,
-         regress_opt,
          M1,
          M2,
          M3) = self._setup_implied_gaussian(opt_linear,
@@ -157,7 +156,6 @@ def _setup_implied_gaussian(self,
         return (cond_mean,
                 cond_cov,
                 cond_precision,
-                regress_opt,
                 M1,
                 M2,
                 M3)

From 6f38200bb4989256c0f6e1b944c14f4cec28ecba Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 4 Nov 2021 16:53:21 -0400
Subject: [PATCH 159/187] changed some names of variables in posterior: for
 consistency

---
 selectinf/randomized/posterior_inference.py | 50 +++++++++------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 7fa5b377b..595dca61d 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -47,48 +47,42 @@ def __init__(self,
          cov_target,
          regress_target_score) = target_spec[:3]
 
-        linear_part = query.affine_con.linear_part
-        offset = query.affine_con.offset
-
-        opt_linear = query.opt_linear
-
-        observed_score = query.observed_score_state + query.observed_subgrad
-
-        result, self.inverse_info, log_ref = query.selective_MLE(target_spec)
-
-        ### Note for an informative prior we might want to change this...
-
-        cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(cond_cov)
-        self.cond_cov = cond_cov
+        self.observed_target = observed_target
         self.cov_target = cov_target
         self.prec_target = np.linalg.inv(cov_target)
+        self.regress_target_score = regress_target_score
 
-        self.ntarget = self.cov_target.shape[0]
-        self.nopt = self.cond_precision.shape[0]
+        self.cond_mean = query.cond_mean
+        self.cond_cov = query.cond_cov
+        self.prec_opt = np.linalg.inv(self.cond_cov)
+        self.opt_linear = query.opt_linear
 
-        self.observed_target = observed_target
-        self.regress_target_score = regress_target_score
-        self.opt_linear = opt_linear
-        self.observed_score = observed_score
+        self.linear_part = query.affine_con.linear_part
+        self.offset = query.affine_con.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
         self.M3 = query.M3
-        self.feasible_point = query.observed_opt_state
+        self.observed_soln = query.observed_opt_state
+
+        self.observed_score = query.observed_score_state + query.observed_subgrad
+
+        result, self.inverse_info, log_ref = query.selective_MLE(target_spec)
+
+
+        self.ntarget = self.cov_target.shape[0]
+        self.nopt = self.prec_opt.shape[0]
 
-        self.cond_mean = query.cond_mean
-        self.linear_part = linear_part
-        self.offset = offset
 
         self.initial_estimate = np.asarray(result['MLE'])
         self.dispersion = dispersion
         self.log_ref = log_ref
 
-        self._set_marginal_parameters()
-
+        ### Note for an informative prior we might want to change this...
         self.prior = prior
 
+        self._set_marginal_parameters()
+
     def log_posterior(self,
                       target_parameter,
                       sigma=1):
@@ -115,7 +109,7 @@ def log_posterior(self,
 
         val, soln, hess = solver(conjugate_marginal,
                                  prec_marginal,
-                                 self.feasible_point,
+                                 self.observed_soln,
                                  self.linear_part,
                                  self.offset,
                                  **self.solve_args)
@@ -161,7 +155,7 @@ def _set_marginal_parameters(self):
         ###set parameters for the marginal distribution of optimization variables
 
         _Q = np.linalg.inv(prec_target_nosel + T3)
-        self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
+        self.prec_marginal = self.prec_opt - T5.T.dot(_Q).dot(T5)
         self.linear_coef = self.cond_cov.dot(T5.T)
         self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
 

From 69e7dd1db1fb3c39d010fce29844c3704803f96b Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 4 Nov 2021 17:00:18 -0400
Subject: [PATCH 160/187] changed some names of variables: for consistency

---
 selectinf/randomized/exact_reference.py     |  39 ++---
 selectinf/randomized/posterior_inference.py |   6 +-
 selectinf/randomized/query.py               | 156 ++------------------
 selectinf/randomized/selective_MLE.py       |   6 +-
 4 files changed, 37 insertions(+), 170 deletions(-)

diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index ce799a47c..00702a6b7 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -32,42 +32,33 @@ def __init__(self,
             Arguments passed to solver.
         """
 
+        self.solve_args = solve_args
+
         (observed_target,
          cov_target,
          regress_target_score) = target_spec[:3]
-        
-        self.solve_args = solve_args
-
-        linear_part = query.affine_con.linear_part
-        offset = query.affine_con.offset
-
-        opt_linear = query.opt_linear
-
-        observed_score = query.observed_score_state + query.observed_subgrad
-
-        result, inverse_info, log_ref = query.selective_MLE(target_spec)
 
-        cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(cond_cov)
-        self.cond_cov = cond_cov
+        self.observed_target = observed_target
         self.cov_target = cov_target
         self.prec_target = np.linalg.inv(cov_target)
-
-        self.observed_target = observed_target
         self.regress_target_score = regress_target_score
-        self.opt_linear = opt_linear
-        self.observed_score = observed_score
+
+        self.cond_mean = query.cond_mean
+        self.cond_cov = query.cond_cov
+        self.cond_precision = np.linalg.inv(self.cond_cov)
+        self.opt_linear = query.opt_linear
+
+        self.linear_part = query.affine_con.linear_part
+        self.offset = query.affine_con.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
         self.M3 = query.M3
-        self.feasible_point = query.observed_opt_state
+        self.observed_soln = query.observed_opt_state
 
-        self.cond_mean = query.cond_mean
-        self.linear_part = linear_part
-        self.offset = offset
+        self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        self.feasible_point = query.observed_opt_state
+        result, inverse_info, log_ref = query.selective_MLE(target_spec)
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
@@ -156,7 +147,7 @@ def log_reference(self,
             R = np.identity(num_opt) - _A.dot(eta.T)
 
             A = self.linear_part.dot(_A).reshape((-1,))
-            b = -self.linear_part.dot(R).dot(self.feasible_point)
+            b = -self.linear_part.dot(R).dot(self.observed_soln)
 
             trunc_ = np.true_divide((self.offset + b), A)
 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 595dca61d..63b478dec 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -54,7 +54,7 @@ def __init__(self,
 
         self.cond_mean = query.cond_mean
         self.cond_cov = query.cond_cov
-        self.prec_opt = np.linalg.inv(self.cond_cov)
+        self.cond_precision = np.linalg.inv(self.cond_cov)
         self.opt_linear = query.opt_linear
 
         self.linear_part = query.affine_con.linear_part
@@ -71,7 +71,7 @@ def __init__(self,
 
 
         self.ntarget = self.cov_target.shape[0]
-        self.nopt = self.prec_opt.shape[0]
+        self.nopt = self.cond_precision.shape[0]
 
 
         self.initial_estimate = np.asarray(result['MLE'])
@@ -155,7 +155,7 @@ def _set_marginal_parameters(self):
         ###set parameters for the marginal distribution of optimization variables
 
         _Q = np.linalg.inv(prec_target_nosel + T3)
-        self.prec_marginal = self.prec_opt - T5.T.dot(_Q).dot(T5)
+        self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
         self.linear_coef = self.cond_cov.dot(T5.T)
         self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 6d8d0749a..adf218825 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -165,32 +165,28 @@ def selective_MLE(self,
                       level=0.90,
                       solve_args={'tol': 1.e-12}):
 
+        """
+        Parameters
+           ----------
+           observed_target : ndarray
+                Observed estimate of target.
+           cov_target : ndarray
+                Estimated covaraince of target.
+           regress_target_score : ndarray
+                Estimated covariance of target and score of randomized query.
+           alternatives : [str], optional
+                Sequence of strings describing the alternatives,
+                should be values of ['twosided', 'less', 'greater']
+           solve_args : dict, optional
+                Arguments passed to solver.
+        """
+
         G = mle_inference(self,
                           target_spec,
                           solve_args=solve_args)
 
         return G.mle_inference(level=level)
 
-    # def selective_MLE(self,
-    #                   target_spec,
-    #                   level=0.90,
-    #                   solve_args={'tol': 1.e-12}):
-    #
-    #     return selective_MLE(target_spec,
-    #                          self.observed_opt_state,
-    #                          self.affine_con.mean,
-    #                          self.affine_con.covariance,
-    #                          self.affine_con.linear_part,
-    #                          self.affine_con.offset,
-    #                          self.opt_linear,
-    #                          self.M1,
-    #                          self.M2,
-    #                          self.M3,
-    #                          self.observed_score_state + self.observed_subgrad,
-    #                          solve_args=solve_args,
-    #                          level=level,
-    #                          useC=False)
-
     def posterior(self,
                   target_spec,
                   dispersion=1,
@@ -284,124 +280,4 @@ def exact_grid_inference(self,
         return G.summary(alternatives=target_spec.alternatives)
 
 
-# def selective_MLE(target_spec,
-#                   observed_soln,  # initial (observed) value of
-#                   # optimization variables -- used as a
-#                   # feasible point.  precise value used
-#                   # only for independent estimator
-#                   cond_mean,
-#                   cond_cov,
-#                   linear_part,
-#                   offset,
-#                   opt_linear,
-#                   M1,
-#                   M2,
-#                   M3,
-#                   observed_score,
-#                   solve_args={'tol': 1.e-12},
-#                   level=0.9,
-#                   useC=False):
-#
-#     """
-#     Selective MLE based on approximation of
-#     CGF.
-#     Parameters
-#     ----------
-#     observed_target : ndarray
-#         Observed estimate of target.
-#     cov_target : ndarray
-#         Estimated covaraince of target.
-#     regress_target_score : ndarray
-#         Estimated regression coefficient of target on score.
-#     observed_soln : ndarray
-#         Feasible point for optimization problem.
-#     cond_mean : ndarray
-#         Conditional mean of optimization variables given target.
-#     cond_cov : ndarray
-#         Conditional covariance of optimization variables given target.
-#     linear_part : ndarray
-#         Linear part of affine constraints: $\{o:Ao \leq b\}$
-#     offset : ndarray
-#         Offset part of affine constraints: $\{o:Ao \leq b\}$
-#     solve_args : dict, optional
-#         Arguments passed to solver.
-#     level : float, optional
-#         Confidence level.
-#     useC : bool, optional
-#         Use python or C solver.
-#     """
-#
-#     (observed_target,
-#      cov_target,
-#      regress_target_score) = target_spec[:3]
-#
-#     if np.asarray(observed_target).shape in [(), (0,)]:
-#         raise ValueError('no target specified')
-#
-#     observed_target = np.atleast_1d(observed_target)
-#     prec_target = np.linalg.inv(cov_target)
-#
-#     prec_opt = np.linalg.inv(cond_cov)
-#
-#     # this is specific to target
-#
-#     T1 = regress_target_score.T.dot(prec_target)
-#     T2 = T1.T.dot(M2.dot(T1))
-#     T3 = T1.T.dot(M3.dot(T1))
-#     T4 = M1.dot(opt_linear).dot(cond_cov).dot(opt_linear.T.dot(M1.T.dot(T1)))
-#     T5 = T1.T.dot(M1.dot(opt_linear))
-#
-#     prec_target_nosel = prec_target + T2 - T3
-#
-#     _P = -(T1.T.dot(M1.dot(observed_score)) + T2.dot(observed_target)) ##flipped sign of second term here
-#
-#     bias_target = cov_target.dot(T1.T.dot(-T4.dot(observed_target) + M1.dot(opt_linear.dot(cond_mean))) - _P)
-#
-#     conjugate_arg = prec_opt.dot(cond_mean)
-#
-#     if useC:
-#         solver = solve_barrier_affine_C
-#     else:
-#         solver = solve_barrier_affine_py
-#
-#     val, soln, hess = solver(conjugate_arg,
-#                              prec_opt,
-#                              observed_soln,
-#                              linear_part,
-#                              offset,
-#                              **solve_args)
-#
-#     final_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) \
-#                       + regress_target_score.dot(M1.dot(opt_linear)).dot(cond_mean - soln) - bias_target
-#
-#     observed_info_natural = prec_target_nosel + T3 - T5.dot(hess.dot(T5.T))
-#
-#     unbiased_estimator = cov_target.dot(prec_target_nosel).dot(observed_target) - bias_target
-#
-#     observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target))
-#
-#     Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
-#
-#     pvalues = ndist.cdf(Z_scores)
-#
-#     pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
-#
-#     alpha = 1. - level
-#
-#     quantile = ndist.ppf(1 - alpha / 2.)
-#
-#     intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
-#                            final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
-#
-#     log_ref = val + conjugate_arg.T.dot(cond_cov).dot(conjugate_arg) / 2.
-#
-#     result = pd.DataFrame({'MLE': final_estimator,
-#                            'SE': np.sqrt(np.diag(observed_info_mean)),
-#                            'Zvalue': Z_scores,
-#                            'pvalue': pvalues,
-#                            'lower_confidence': intervals[:, 0],
-#                            'upper_confidence': intervals[:, 1],
-#                            'unbiased': unbiased_estimator})
-#
-#     return result, observed_info_mean, log_ref
 
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index 96ceae97b..c4ceab085 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -25,7 +25,7 @@ def __init__(self,
 
         self.cond_mean = query.cond_mean
         self.cond_cov = query.cond_cov
-        self.prec_opt = np.linalg.inv(self.cond_cov)
+        self.cond_precision = np.linalg.inv(self.cond_cov)
         self.opt_linear = query.opt_linear
 
         self.linear_part = query.sampler.affine_con.linear_part
@@ -42,14 +42,14 @@ def __init__(self,
 
     def mle_inference(self, useC= False, level=0.90):
 
-        conjugate_arg = self.prec_opt.dot(self.cond_mean)
+        conjugate_arg = self.cond_precision.dot(self.cond_mean)
         if useC:
             solver = solve_barrier_affine_C
         else:
             solver = solve_barrier_affine_py
 
         val, soln, hess = solver(conjugate_arg,
-                                 self.prec_opt,
+                                 self.cond_precision,
                                  self.observed_soln,
                                  self.linear_part,
                                  self.offset,

From cd730d6d3f7d23c3d89735edad993e000a980a67 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 4 Nov 2021 21:26:33 -0400
Subject: [PATCH 161/187] some more name changes for variables

---
 selectinf/randomized/approx_reference.py | 41 +++++++++---------------
 selectinf/randomized/exact_reference.py  |  2 --
 2 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 38483f4c6..2e9bac78b 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -33,42 +33,33 @@ def __init__(self,
             Arguments passed to solver.
         """
 
+        self.solve_args = solve_args
+
         (observed_target,
          cov_target,
          regress_target_score) = target_spec[:3]
-        
-        self.solve_args = solve_args
-
-        linear_part = query.affine_con.linear_part
-        offset = query.affine_con.offset
-
-        opt_linear = query.opt_linear
 
-        observed_score = query.observed_score_state + query.observed_subgrad
-
-        result, inverse_info, log_ref = query.selective_MLE(target_spec)
-
-        cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(cond_cov)
-        self.cond_cov = cond_cov
+        self.observed_target = observed_target
         self.cov_target = cov_target
         self.prec_target = np.linalg.inv(cov_target)
-
-        self.observed_target = observed_target
         self.regress_target_score = regress_target_score
-        self.opt_linear = opt_linear
-        self.observed_score = observed_score
+
+        self.cond_mean = query.cond_mean
+        self.cond_cov = query.cond_cov
+        self.cond_precision = np.linalg.inv(self.cond_cov)
+        self.opt_linear = query.opt_linear
+
+        self.linear_part = query.affine_con.linear_part
+        self.offset = query.affine_con.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
         self.M3 = query.M3
-        self.feasible_point = query.observed_opt_state
+        self.observed_soln = query.observed_opt_state
 
-        self.cond_mean = query.cond_mean
-        self.linear_part = linear_part
-        self.offset = offset
+        self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        self.feasible_point = query.observed_opt_state
+        result, inverse_info, log_ref = query.selective_MLE(target_spec)
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
@@ -88,7 +79,7 @@ def __init__(self,
                                                    observed_target[j] + 1.5 * _scale[j],
                                                    num=ngrid)
 
-        self.opt_linear = query.opt_linear
+
         self.useIP = useIP
         self.inverse_info = inverse_info
 
@@ -158,7 +149,7 @@ def _approx_log_reference(self,
 
             val, _, _ = solver(conjugate_arg,
                                self.cond_precision,
-                               self.feasible_point,
+                               self.observed_soln,
                                self.linear_part,
                                self.offset,
                                **self.solve_args)
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 00702a6b7..291b640fd 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -26,8 +26,6 @@ def __init__(self,
             Estimated covaraince of target.
         cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
-        level : float, optional
-            Confidence level.
         solve_args : dict, optional
             Arguments passed to solver.
         """

From eb0d16e05e4d75290c21d8fbe91ca58f8c4a9454 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Fri, 5 Nov 2021 21:22:19 -0400
Subject: [PATCH 162/187] removed regress_opt from return list; some more
 consistency fixes

---
 selectinf/randomized/lasso.py                       | 11 +++++------
 selectinf/randomized/query.py                       |  2 +-
 selectinf/randomized/selective_MLE.py               |  8 ++++----
 .../randomized/tests/test_selective_MLE_high.py     | 13 +++++++------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 1cca12f32..26fecf91e 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -693,7 +693,7 @@ def __init__(self,
                  proportion_select,
                  ridge_term=0,
                  perturb=None,
-                 estimate_dispersion=False):
+                 estimate_dispersion=True):
 
         (self.loglike,
          self.feature_weights,
@@ -731,7 +731,7 @@ def fit(self,
                                                             'func') /
                               (n - df_fit))
 
-            self.dispersion = dispersion
+            self.dispersion_ = dispersion
             # run setup again after
             # estimating dispersion
 
@@ -741,13 +741,13 @@ def fit(self,
 
 
     def setup_inference(self,
-                        dispersion=None):
+                        dispersion):
 
         if self.df_fit > 0:
 
             if dispersion is None:
                 self._setup_sampler(*self._setup_sampler_data,
-                                    dispersion=self.dispersion)
+                                    dispersion=self.dispersion_)
 
             else:
                 self._setup_sampler(*self._setup_sampler_data,
@@ -805,7 +805,6 @@ def _setup_implied_gaussian(self,
         return (cond_mean,
                 cond_cov,
                 cond_precision,
-                regress_opt,
                 M1,
                 M2,
                 M3)
@@ -1086,4 +1085,4 @@ def poisson(X,
                            np.asarray(feature_weights),
                            proportion)
 
-    
+
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index adf218825..38ff6e957 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -185,7 +185,7 @@ def selective_MLE(self,
                           target_spec,
                           solve_args=solve_args)
 
-        return G.mle_inference(level=level)
+        return G.solve_estimating_eqn(level=level)
 
     def posterior(self,
                   target_spec,
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index c4ceab085..ed62d60ca 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -28,8 +28,8 @@ def __init__(self,
         self.cond_precision = np.linalg.inv(self.cond_cov)
         self.opt_linear = query.opt_linear
 
-        self.linear_part = query.sampler.affine_con.linear_part
-        self.offset = query.sampler.affine_con.offset
+        self.linear_part = query.affine_con.linear_part
+        self.offset = query.affine_con.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
@@ -40,7 +40,7 @@ def __init__(self,
 
         self._setup_estimating_eqn()
 
-    def mle_inference(self, useC= False, level=0.90):
+    def solve_estimating_eqn(self, useC= False, level=0.90):
 
         conjugate_arg = self.cond_precision.dot(self.cond_mean)
         if useC:
@@ -59,7 +59,7 @@ def mle_inference(self, useC= False, level=0.90):
                           + self.regress_target_score.dot(self.M1.dot(self.opt_linear)).dot(self.cond_mean - soln) \
                           - self.bias_target
 
-        observed_info_natural = self.prec_target_nosel + self.T3 - self.T5.dot(self.hess.dot(self.T5.T))
+        observed_info_natural = self.prec_target_nosel + self.T3 - self.T5.dot(hess.dot(self.T5.T))
 
         unbiased_estimator = self.cov_target.dot(self.prec_target_nosel).dot(self.observed_target) - self.bias_target
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index 444748b8d..aeb2571fb 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -347,12 +347,12 @@ def test_logistic_split(n=2000,
 
         if nonzero.sum() > 0:
 
-            conv.setup_inference(dispersion=1)
-
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=1)
 
+            conv.setup_inference(dispersion=None)
+
             result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
@@ -360,7 +360,7 @@ def test_logistic_split(n=2000,
                                            'upper_confidence']])
             
             return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], intervals
-        
+
 def test_poisson(n=2000, 
                  p=200, 
                  signal_fac=10.,
@@ -398,12 +398,13 @@ def test_poisson(n=2000,
         print("dimensions", n, p, nonzero.sum())
 
         if nonzero.sum() > 0:
-            conv.setup_inference(dispersion=1)
 
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=1)
 
+            conv.setup_inference(dispersion=1)
+
             result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
@@ -450,12 +451,12 @@ def test_poisson_split(n=2000,
 
         if nonzero.sum() > 0:
 
-            conv.setup_inference(dispersion=1)
-
             target_spec = selected_targets(conv.loglike,
                                            conv.observed_soln,
                                            dispersion=1)
 
+            conv.setup_inference(dispersion=1)
+
             result = conv.selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']

From 4418bf7c8aff68fa1f82bc76b940fb86ce9eb328 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 16 Nov 2021 22:50:19 -0800
Subject: [PATCH 163/187] a little reorg -- one method for inference

---
 selectinf/randomized/approx_reference.py      |   7 +-
 selectinf/randomized/exact_reference.py       |   6 +-
 selectinf/randomized/posterior_inference.py   |   4 +-
 selectinf/randomized/query.py                 | 184 +++++++++++-------
 .../tests/test_selective_MLE_high.py          |  22 +--
 5 files changed, 136 insertions(+), 87 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 2e9bac78b..0491700e1 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -59,7 +59,8 @@ def __init__(self,
 
         self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, inverse_info, log_ref = query.selective_MLE(target_spec)
+        result, inverse_info, log_ref = query._selective_MLE(target_spec,
+                                                             solve_args=solve_args)
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
@@ -103,12 +104,12 @@ def summary(self,
 
         if parameter is not None:
             pivots = self.approx_pivots(parameter,
-                                        alternatives=alternatives)
+                                        alternatives=alternatives)[0]
         else:
             pivots = None
 
         pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
-                                      alternatives=alternatives)
+                                      alternatives=alternatives)[0]
         lower, upper = self._approx_intervals(level=level)
 
         result = pd.DataFrame({'target': self.observed_target,
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 291b640fd..0818fba7c 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -26,11 +26,8 @@ def __init__(self,
             Estimated covaraince of target.
         cov_target_score : ndarray
             Estimated covariance of target and score of randomized query.
-        solve_args : dict, optional
-            Arguments passed to solver.
         """
 
-        self.solve_args = solve_args
 
         (observed_target,
          cov_target,
@@ -56,7 +53,8 @@ def __init__(self,
 
         self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, inverse_info, log_ref = query.selective_MLE(target_spec)
+        result, inverse_info, log_ref = query._selective_MLE(target_spec,
+                                                             solve_args=solve_args)
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 63b478dec..f4ab1698b 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -67,8 +67,8 @@ def __init__(self,
 
         self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, self.inverse_info, log_ref = query.selective_MLE(target_spec)
-
+        result, self.inverse_info, log_ref = query._selective_MLE(target_spec,
+                                                                  solve_args=solve_args)
 
         self.ntarget = self.cov_target.shape[0]
         self.nopt = self.cond_precision.shape[0]
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 38ff6e957..140f68f88 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1,7 +1,7 @@
-import numpy as np
+import numpy as np, pandas as pd
 
 from ..constraints.affine import constraints
-from .posterior_inference import posterior
+from .posterior_inference import (posterior, langevin_sampler)
 from .approx_reference import approximate_grid_inference
 from .exact_reference import exact_grid_inference
 from .selective_MLE import mle_inference
@@ -160,55 +160,72 @@ def _setup_implied_gaussian(self,
                 M2,
                 M3)
 
-    def selective_MLE(self,
-                      target_spec,
-                      level=0.90,
-                      solve_args={'tol': 1.e-12}):
+    def inference(self,
+                  target_spec,
+                  method,
+                  level=0.90,
+                  method_args={}):
 
         """
         Parameters
-           ----------
-           observed_target : ndarray
-                Observed estimate of target.
-           cov_target : ndarray
-                Estimated covaraince of target.
-           regress_target_score : ndarray
-                Estimated covariance of target and score of randomized query.
-           alternatives : [str], optional
-                Sequence of strings describing the alternatives,
-                should be values of ['twosided', 'less', 'greater']
-           solve_args : dict, optional
-                Arguments passed to solver.
+        ----------
+        target_spec : TargetSpec
+           Information needed to specify the target.
+        method : str
+           One of ['selective_MLE', 'approx', 'exact', 'posterior']
+        level : float
+           Confidence level or posterior quantiles.
+        method_args : dict
+           Dict of arguments to be optionally passed to the methods.
+
+        Returns
+        -------
+
+        summary : pd.DataFrame
+           Statistical summary for specified targets.
         """
 
-        G = mle_inference(self,
-                          target_spec,
-                          solve_args=solve_args)
-
-        return G.solve_estimating_eqn(level=level)
-
+        if method == 'selective_MLE':
+            return self._selective_MLE(target_spec,
+                                       level=level,
+                                       **method_args)[0]
+        elif method == 'exact':
+            return self._exact_grid_inference(target_spec,
+                                              level=level) # has no additional args
+        elif method == 'approx':
+            return self._approx_grid_inference(target_spec,
+                                               level=level,
+                                               **method_args)
+        elif method == 'posterior':
+            return self.posterior(target_spec,
+                                  **method_args)[1]
+
+                                              
     def posterior(self,
                   target_spec,
+                  level=0.90,
                   dispersion=1,
                   prior=None,
-                  solve_args={'tol': 1.e-12}):
+                  solve_args={'tol': 1.e-12},
+                  nsample=2000,
+                  nburnin=500):
         """
+
         Parameters
         ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        regress_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
+        target_spec : TargetSpec
+            Information needed to specify the target.
+        level : float
+            Level for credible interval.
+        dispersion : float, optional
+            Dispersion parameter for log-likelihood.
         prior : callable
             A callable object that takes a single argument
             `parameter` of the same shape as `observed_target`
             and returns (value of log prior, gradient of log prior)
-        dispersion : float, optional
-            Dispersion parameter for log-likelihood.
         solve_args : dict, optional
             Arguments passed to solver.
+
         """
 
         if prior is None:
@@ -219,31 +236,67 @@ def prior(target_parameter):
                 log_prior = -0.5 * np.sum(target_parameter ** 2 * Di)
                 return log_prior, grad_prior
 
-        return posterior(self,
-                         target_spec,
-                         dispersion,
-                         prior,
-                         solve_args=solve_args)
+        posterior_repr =  posterior(self,
+                                    target_spec,
+                                    dispersion,
+                                    prior,
+                                    solve_args=solve_args)
+        
+        samples = langevin_sampler(posterior_repr,
+                                   nsample=nsample,
+                                   nburnin=nburnin)
+
+        delta = 0.5 * (1 - level) * 100
+        lower = np.percentile(samples, delta, axis=0)
+        upper = np.percentile(samples, 100 - delta, axis=0)
+        mean = np.mean(samples, axis=0)
+
+        return samples, pd.DataFrame({'estimate':mean,
+                                      'lower_credible':lower,
+                                      'upper_credible':upper})
+        
+    # private methods
+
+    def _selective_MLE(self,
+                       target_spec,
+                       level=0.90,
+                       solve_args={'tol': 1.e-12}):
+
+        """
+        Parameters
+        ----------
+        target_spec : TargetSpec
+           Information needed to specify the target.
+        level : float
+           Confidence level or posterior quantiles.
+        solve_args : dict
+           Dict of arguments to be optionally passed to solver.
+        """
+
+        G = mle_inference(self,
+                          target_spec,
+                          solve_args=solve_args)
+
+        return G.solve_estimating_eqn(level=level)
+
 
-    def approximate_grid_inference(self,
-                                   target_spec,
-                                   useIP=True,
-                                   solve_args={'tol': 1.e-12}):
+    def _approximate_grid_inference(self,
+                                    target_spec,
+                                    level=0.90,
+                                    solve_args={'tol': 1.e-12},
+                                    useIP=True):
 
         """
         Parameters
         ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        regress_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
+        target_spec : TargetSpec
+           Information needed to specify the target.
+        level : float
+           Confidence level or posterior quantiles.
         solve_args : dict, optional
             Arguments passed to solver.
+        useIP : bool
+           Use spline extrapolation.
         """
 
         G = approximate_grid_inference(self,
@@ -251,33 +304,30 @@ def approximate_grid_inference(self,
                                        solve_args=solve_args,
                                        useIP=useIP)
 
-        return G.summary(alternatives=target_spec.alternatives)
+        return G.summary(alternatives=target_spec.alternatives,
+                         level=level)
 
-    def exact_grid_inference(self,
-                             target_spec,
-                             solve_args={'tol': 1.e-12}):
+    def _exact_grid_inference(self,
+                              target_spec,
+                              level=0.90,
+                              solve_args={'tol': 1.e-12}):
 
         """
         Parameters
         ----------
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        regress_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
+        target_spec : TargetSpec
+           Information needed to specify the target.
+        level : float
+           Confidence level or posterior quantiles.
         solve_args : dict, optional
             Arguments passed to solver.
         """
 
         G = exact_grid_inference(self,
-                                 target_spec,
-                                 solve_args=solve_args)
+                                 target_spec)
 
-        return G.summary(alternatives=target_spec.alternatives)
+        return G.summary(alternatives=target_spec.alternatives,
+                         level=level)
 
 
 
diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py
index aeb2571fb..d7aca0e34 100644
--- a/selectinf/randomized/tests/test_selective_MLE_high.py
+++ b/selectinf/randomized/tests/test_selective_MLE_high.py
@@ -78,7 +78,7 @@ def test_full_targets(n=200,
 
             conv.setup_inference(dispersion=dispersion)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
             estimate = result['MLE']
@@ -147,7 +147,7 @@ def test_selected_targets(n=2000,
                                            conv.observed_soln,
                                            dispersion=dispersion)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -245,7 +245,7 @@ def test_selected_targets_disperse(n=500,
                                            conv.observed_soln,
                                            dispersion=dispersion)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
 
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence', 'upper_confidence']])
@@ -301,7 +301,7 @@ def test_logistic(n=2000,
                                            conv.observed_soln,
                                            dispersion=1)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -353,7 +353,7 @@ def test_logistic_split(n=2000,
 
             conv.setup_inference(dispersion=None)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -405,7 +405,7 @@ def test_poisson(n=2000,
 
             conv.setup_inference(dispersion=1)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -457,7 +457,7 @@ def test_poisson_split(n=2000,
 
             conv.setup_inference(dispersion=1)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -512,7 +512,7 @@ def test_cox(n=2000,
                                            conv.observed_soln,
                                            dispersion=1)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -567,7 +567,7 @@ def test_cox_split(n=2000,
                                            conv.observed_soln,
                                            dispersion=1)
 
-            result = conv.selective_MLE(target_spec)[0]
+            result = conv._selective_MLE(target_spec)[0]
             estimate = result['MLE']
             pval = result['pvalue']
             intervals = np.asarray(result[['lower_confidence',
@@ -635,7 +635,7 @@ def test_scale_invariant_split(n=200,
         print('regress_target_score',  target_spec.regress_target_score[0,0]/scale**2)
 
 
-        result = conv.selective_MLE(target_spec)[0]
+        result = conv._selective_MLE(target_spec)[0]
 
         print(result['MLE'] / scale)
         results.append(result)
@@ -713,7 +713,7 @@ def test_scale_invariant(n=200,
         print('cov_target', target_spec.cov_target[0,0]/scale**2)
         print('regress_target_score',  target_spec.regress_target_score[0,0]/scale**2)
         
-        result = conv.selective_MLE(target_spec)[0]
+        result = conv._selective_MLE(target_spec)[0]
 
         print(result['MLE'] / scale)
         results.append(result)

From ffd89dda7479f0f1b88e504a97488eb627abb05a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 00:05:54 -0800
Subject: [PATCH 164/187] BF: fixing method name

---
 selectinf/randomized/query.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 140f68f88..f40c6e4cf 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -193,9 +193,9 @@ def inference(self,
             return self._exact_grid_inference(target_spec,
                                               level=level) # has no additional args
         elif method == 'approx':
-            return self._approx_grid_inference(target_spec,
-                                               level=level,
-                                               **method_args)
+            return self._approximate_grid_inference(target_spec,
+                                                    level=level,
+                                                    **method_args)
         elif method == 'posterior':
             return self.posterior(target_spec,
                                   **method_args)[1]

From e7a1c4a7a22e19256c8e11d7f305ecf8dc4b4f0d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 08:27:12 -0800
Subject: [PATCH 165/187] more cleanup, added QuerySpec named tuple

---
 selectinf/randomized/approx_reference.py    |  14 +-
 selectinf/randomized/exact_reference.py     |  15 +-
 selectinf/randomized/posterior_inference.py |  14 +-
 selectinf/randomized/query.py               | 143 +++++++++-----------
 selectinf/randomized/selective_MLE.py       |  69 +++++-----
 5 files changed, 130 insertions(+), 125 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 0491700e1..bb069396e 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -5,7 +5,7 @@
 
 from ..distributions.discrete_family import discrete_family
 from ..algorithms.barrier_affine import solve_barrier_affine_py
-
+from .selective_MLE import mle_inference
 
 class approximate_grid_inference(object):
 
@@ -49,8 +49,8 @@ def __init__(self,
         self.cond_precision = np.linalg.inv(self.cond_cov)
         self.opt_linear = query.opt_linear
 
-        self.linear_part = query.affine_con.linear_part
-        self.offset = query.affine_con.offset
+        self.linear_part = query.linear_part
+        self.offset = query.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
@@ -59,8 +59,11 @@ def __init__(self,
 
         self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, inverse_info, log_ref = query._selective_MLE(target_spec,
-                                                             solve_args=solve_args)
+        G = mle_inference(query,
+                          target_spec,
+                          solve_args=solve_args)
+
+        _, inverse_info, log_ref = G.solve_estimating_eqn()
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
@@ -114,6 +117,7 @@ def summary(self,
 
         result = pd.DataFrame({'target': self.observed_target,
                                'pvalue': pvalues,
+                               'alternative': alternatives,
                                'lower_confidence': lower,
                                'upper_confidence': upper})
 
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 0818fba7c..5d9ba19a6 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -4,6 +4,7 @@
 from scipy.stats import norm as ndist
 
 from ..distributions.discrete_family import discrete_family
+from .selective_MLE import mle_inference
 
 class exact_grid_inference(object):
 
@@ -43,8 +44,8 @@ def __init__(self,
         self.cond_precision = np.linalg.inv(self.cond_cov)
         self.opt_linear = query.opt_linear
 
-        self.linear_part = query.affine_con.linear_part
-        self.offset = query.affine_con.offset
+        self.linear_part = query.linear_part
+        self.offset = query.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
@@ -53,8 +54,11 @@ def __init__(self,
 
         self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, inverse_info, log_ref = query._selective_MLE(target_spec,
-                                                             solve_args=solve_args)
+        G = mle_inference(query,
+                          target_spec,
+                          solve_args=solve_args)
+
+        _, inverse_info, log_ref = G.solve_estimating_eqn()
 
         self.ntarget = ntarget = cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
@@ -88,7 +92,7 @@ def summary(self,
 
         if parameter is not None:
             pivots = self._pivots(parameter,
-                                        alternatives=alternatives)
+                                  alternatives=alternatives)
         else:
             pivots = None
 
@@ -98,6 +102,7 @@ def summary(self,
 
         result = pd.DataFrame({'target': self.observed_target,
                                'pvalue': pvalues,
+                               'alternative': alternatives,
                                'lower_confidence': lower,
                                'upper_confidence': upper})
 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index f4ab1698b..0c33f3b96 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -7,7 +7,7 @@
 from scipy.linalg import fractional_matrix_power
 
 from ..algorithms.barrier_affine import solve_barrier_affine_py
-
+from .selective_MLE import mle_inference
 
 class PosteriorAtt(typing.NamedTuple):
 
@@ -57,8 +57,8 @@ def __init__(self,
         self.cond_precision = np.linalg.inv(self.cond_cov)
         self.opt_linear = query.opt_linear
 
-        self.linear_part = query.affine_con.linear_part
-        self.offset = query.affine_con.offset
+        self.linear_part = query.linear_part
+        self.offset = query.offset
 
         self.M1 = query.M1
         self.M2 = query.M2
@@ -67,8 +67,11 @@ def __init__(self,
 
         self.observed_score = query.observed_score_state + query.observed_subgrad
 
-        result, self.inverse_info, log_ref = query._selective_MLE(target_spec,
-                                                                  solve_args=solve_args)
+        G = mle_inference(query,
+                          target_spec,
+                          solve_args=solve_args)
+
+        result, self.inverse_info, self.log_ref = G.solve_estimating_eqn()
 
         self.ntarget = self.cov_target.shape[0]
         self.nopt = self.cond_precision.shape[0]
@@ -76,7 +79,6 @@ def __init__(self,
 
         self.initial_estimate = np.asarray(result['MLE'])
         self.dispersion = dispersion
-        self.log_ref = log_ref
 
         ### Note for an informative prior we might want to change this...
         self.prior = prior
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index f40c6e4cf..594b08312 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -1,3 +1,4 @@
+from typing import NamedTuple
 import numpy as np, pandas as pd
 
 from ..constraints.affine import constraints
@@ -6,6 +7,36 @@
 from .exact_reference import exact_grid_inference
 from .selective_MLE import mle_inference
 
+class QuerySpec(NamedTuple):
+
+    # law of o|S,u
+
+    cond_mean : np.ndarray
+    cond_cov : np.ndarray
+
+    # how S enters into E[o|S,u]
+
+    opt_linear : np.ndarray
+
+    # constraints
+
+    linear_part : np.ndarray
+    offset : np.ndarray
+
+    # score / randomization relationship
+
+    M1 : np.ndarray
+    M2 : np.ndarray
+    M3 : np.ndarray
+
+    # observed values
+
+    observed_opt_state : np.ndarray
+    observed_score_state : np.ndarray
+    observed_subgrad : np.ndarray
+    observed_soln : np.ndarray
+    observed_score : np.ndarray
+    
 class query(object):
     r"""
     This class is the base of randomized selective inference
@@ -185,17 +216,43 @@ def inference(self,
            Statistical summary for specified targets.
         """
 
+        query_spec = QuerySpec(cond_mean=self.cond_mean,
+                               cond_cov=self.cond_cov,
+                               opt_linear=self.opt_linear,
+                               linear_part=self.affine_con.linear_part,
+                               offset=self.affine_con.offset,
+                               M1=self.M1,
+                               M2=self.M2,
+                               M3=self.M3,
+                               observed_opt_state=self.observed_opt_state,
+                               observed_score_state=self.observed_score_state,
+                               observed_subgrad=self.observed_subgrad,
+                               observed_soln=self.observed_opt_state,
+                               observed_score=self.observed_score_state + self.observed_subgrad)
+
         if method == 'selective_MLE':
-            return self._selective_MLE(target_spec,
-                                       level=level,
-                                       **method_args)[0]
+            G = mle_inference(query_spec,
+                              target_spec,
+                              **method_args)
+
+            return G.solve_estimating_eqn(alternatives=target_spec.alternatives,
+                                          level=level)[0]
+
         elif method == 'exact':
-            return self._exact_grid_inference(target_spec,
-                                              level=level) # has no additional args
+            G = exact_grid_inference(query_spec,
+                                     target_spec)
+
+            return G.summary(alternatives=target_spec.alternatives,
+                             level=level)
+
         elif method == 'approx':
-            return self._approximate_grid_inference(target_spec,
-                                                    level=level,
-                                                    **method_args)
+            G = approximate_grid_inference(query_spec,
+                                           target_spec,
+                                           **method_args)
+
+            return G.summary(alternatives=target_spec.alternatives,
+                             level=level)
+
         elif method == 'posterior':
             return self.posterior(target_spec,
                                   **method_args)[1]
@@ -255,79 +312,9 @@ def prior(target_parameter):
                                       'lower_credible':lower,
                                       'upper_credible':upper})
         
-    # private methods
-
-    def _selective_MLE(self,
-                       target_spec,
-                       level=0.90,
-                       solve_args={'tol': 1.e-12}):
-
-        """
-        Parameters
-        ----------
-        target_spec : TargetSpec
-           Information needed to specify the target.
-        level : float
-           Confidence level or posterior quantiles.
-        solve_args : dict
-           Dict of arguments to be optionally passed to solver.
-        """
-
-        G = mle_inference(self,
-                          target_spec,
-                          solve_args=solve_args)
 
-        return G.solve_estimating_eqn(level=level)
-
-
-    def _approximate_grid_inference(self,
-                                    target_spec,
-                                    level=0.90,
-                                    solve_args={'tol': 1.e-12},
-                                    useIP=True):
-
-        """
-        Parameters
-        ----------
-        target_spec : TargetSpec
-           Information needed to specify the target.
-        level : float
-           Confidence level or posterior quantiles.
-        solve_args : dict, optional
-            Arguments passed to solver.
-        useIP : bool
-           Use spline extrapolation.
-        """
-
-        G = approximate_grid_inference(self,
-                                       target_spec,
-                                       solve_args=solve_args,
-                                       useIP=useIP)
-
-        return G.summary(alternatives=target_spec.alternatives,
-                         level=level)
-
-    def _exact_grid_inference(self,
-                              target_spec,
-                              level=0.90,
-                              solve_args={'tol': 1.e-12}):
-
-        """
-        Parameters
-        ----------
-        target_spec : TargetSpec
-           Information needed to specify the target.
-        level : float
-           Confidence level or posterior quantiles.
-        solve_args : dict, optional
-            Arguments passed to solver.
-        """
 
-        G = exact_grid_inference(self,
-                                 target_spec)
 
-        return G.summary(alternatives=target_spec.alternatives,
-                         level=level)
 
 
 
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index ed62d60ca..757d55efe 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -8,7 +8,7 @@
 class mle_inference(object):
 
     def __init__(self,
-                 query,
+                 query_spec,
                  target_spec,
                  solve_args={'tol': 1.e-12}):
 
@@ -23,40 +23,33 @@ def __init__(self,
         self.prec_target = np.linalg.inv(cov_target)
         self.regress_target_score = regress_target_score
 
-        self.cond_mean = query.cond_mean
-        self.cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(self.cond_cov)
-        self.opt_linear = query.opt_linear
-
-        self.linear_part = query.affine_con.linear_part
-        self.offset = query.affine_con.offset
-
-        self.M1 = query.M1
-        self.M2 = query.M2
-        self.M3 = query.M3
-        self.observed_soln = query.observed_opt_state
-
-        self.observed_score = query.observed_score_state + query.observed_subgrad
+        self.query_spec = query_spec
 
         self._setup_estimating_eqn()
 
-    def solve_estimating_eqn(self, useC= False, level=0.90):
+    def solve_estimating_eqn(self,
+                             alternatives=None,
+                             useC=False,
+                             level=0.90):
+
+        Q = self.query_spec
+        cond_precision = np.linalg.inv(Q.cond_cov)
+        conjugate_arg = cond_precision.dot(Q.cond_mean)
 
-        conjugate_arg = self.cond_precision.dot(self.cond_mean)
         if useC:
             solver = solve_barrier_affine_C
         else:
             solver = solve_barrier_affine_py
 
         val, soln, hess = solver(conjugate_arg,
-                                 self.cond_precision,
-                                 self.observed_soln,
-                                 self.linear_part,
-                                 self.offset,
+                                 cond_precision,
+                                 Q.observed_soln,
+                                 Q.linear_part,
+                                 Q.offset,
                                  **self.solve_args)
 
         final_estimator = self.cov_target.dot(self.prec_target_nosel).dot(self.observed_target) \
-                          + self.regress_target_score.dot(self.M1.dot(self.opt_linear)).dot(self.cond_mean - soln) \
+                          + self.regress_target_score.dot(Q.M1.dot(Q.opt_linear)).dot(Q.cond_mean - soln) \
                           - self.bias_target
 
         observed_info_natural = self.prec_target_nosel + self.T3 - self.T5.dot(hess.dot(self.T5.T))
@@ -67,9 +60,21 @@ def solve_estimating_eqn(self, useC= False, level=0.90):
 
         Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
 
-        pvalues = ndist.cdf(Z_scores)
+        cdf_vals = ndist.cdf(Z_scores)
+        pvalues = []
+
+        if alternatives is None:
+            alternatives = ['twosided'] * len(cdf_vals)
 
-        pvalues = 2 * np.minimum(pvalues, 1 - pvalues)
+        for m, _cdf in enumerate(cdf_vals):
+            if alternatives[m] == 'twosided':
+                pvalues.append(2 * min(_cdf, 1 - _cdf))
+            elif alternatives[m] == 'greater':
+                pvalues.append(1 - _cdf)
+            elif alternatives[m] == 'less':
+                pvalues.append(_cdf)
+            else:
+                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
 
         alpha = 1. - level
 
@@ -78,12 +83,13 @@ def solve_estimating_eqn(self, useC= False, level=0.90):
         intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
                                final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
-        log_ref = val + conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.
+        log_ref = val + conjugate_arg.T.dot(Q.cond_cov).dot(conjugate_arg) / 2.
 
         result = pd.DataFrame({'MLE': final_estimator,
                                'SE': np.sqrt(np.diag(observed_info_mean)),
                                'Zvalue': Z_scores,
                                'pvalue': pvalues,
+                               'alternative': alternatives,
                                'lower_confidence': intervals[:, 0],
                                'upper_confidence': intervals[:, 1],
                                'unbiased': unbiased_estimator})
@@ -92,18 +98,19 @@ def solve_estimating_eqn(self, useC= False, level=0.90):
 
     def _setup_estimating_eqn(self):
 
+        Q = self.query_spec
         T1 = self.regress_target_score.T.dot(self.prec_target)
-        T2 = T1.T.dot(self.M2.dot(T1))
-        T3 = T1.T.dot(self.M3.dot(T1))
-        T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
-        T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+        T2 = T1.T.dot(Q.M2.dot(T1))
+        T3 = T1.T.dot(Q.M3.dot(T1))
+        T4 = Q.M1.dot(Q.opt_linear).dot(Q.cond_cov).dot(Q.opt_linear.T.dot(Q.M1.T.dot(T1)))
+        T5 = T1.T.dot(Q.M1.dot(Q.opt_linear))
 
         self.prec_target_nosel = self.prec_target + T2 - T3
 
-        _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(self.observed_target))
+        _P = -(T1.T.dot(Q.M1.dot(Q.observed_score)) + T2.dot(self.observed_target))
 
         self.bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target)
-                                                   + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
+                                                   + Q.M1.dot(Q.opt_linear.dot(Q.cond_mean))) - _P)
 
         self.T3 = T3
         self.T5 = T5

From a7704a4bf9265ee0007632faeb90d22d65af0922 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 08:38:28 -0800
Subject: [PATCH 166/187] more cleanup of selective_MLE

---
 selectinf/randomized/selective_MLE.py | 61 +++++++++++++--------------
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index 757d55efe..9fc302b25 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -12,27 +12,21 @@ def __init__(self,
                  target_spec,
                  solve_args={'tol': 1.e-12}):
 
-        self.solve_args = solve_args
-
-        (observed_target,
-         cov_target,
-         regress_target_score) = target_spec[:3]
-
-        self.observed_target = observed_target
-        self.cov_target = cov_target
-        self.prec_target = np.linalg.inv(cov_target)
-        self.regress_target_score = regress_target_score
-
         self.query_spec = query_spec
-
-        self._setup_estimating_eqn()
-
+        self.target_spec = target_spec
+        self.solve_args = solve_args
+        
     def solve_estimating_eqn(self,
                              alternatives=None,
                              useC=False,
                              level=0.90):
 
+        prec_target_nosel, bias_target, U3, U5 = _setup_estimating_eqn(self.query_spec,
+                                                                       self.target_spec)
+
         Q = self.query_spec
+        TS = self.target_spec
+        
         cond_precision = np.linalg.inv(Q.cond_cov)
         conjugate_arg = cond_precision.dot(Q.cond_mean)
 
@@ -48,15 +42,15 @@ def solve_estimating_eqn(self,
                                  Q.offset,
                                  **self.solve_args)
 
-        final_estimator = self.cov_target.dot(self.prec_target_nosel).dot(self.observed_target) \
-                          + self.regress_target_score.dot(Q.M1.dot(Q.opt_linear)).dot(Q.cond_mean - soln) \
-                          - self.bias_target
+        final_estimator = TS.cov_target.dot(prec_target_nosel).dot(TS.observed_target) \
+                          + TS.regress_target_score.dot(Q.M1.dot(Q.opt_linear)).dot(Q.cond_mean - soln) \
+                          - bias_target
 
-        observed_info_natural = self.prec_target_nosel + self.T3 - self.T5.dot(hess.dot(self.T5.T))
+        observed_info_natural = prec_target_nosel + U3 - U5.dot(hess.dot(U5.T))
 
-        unbiased_estimator = self.cov_target.dot(self.prec_target_nosel).dot(self.observed_target) - self.bias_target
+        unbiased_estimator = TS.cov_target.dot(prec_target_nosel).dot(TS.observed_target) - bias_target
 
-        observed_info_mean = self.cov_target.dot(observed_info_natural.dot(self.cov_target))
+        observed_info_mean = TS.cov_target.dot(observed_info_natural.dot(TS.cov_target))
 
         Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean))
 
@@ -96,24 +90,27 @@ def solve_estimating_eqn(self,
 
         return result, observed_info_mean, log_ref
 
-    def _setup_estimating_eqn(self):
+def _setup_estimating_eqn(query_spec,
+                          target_spec):
 
-        Q = self.query_spec
-        T1 = self.regress_target_score.T.dot(self.prec_target)
-        T2 = T1.T.dot(Q.M2.dot(T1))
-        T3 = T1.T.dot(Q.M3.dot(T1))
-        T4 = Q.M1.dot(Q.opt_linear).dot(Q.cond_cov).dot(Q.opt_linear.T.dot(Q.M1.T.dot(T1)))
-        T5 = T1.T.dot(Q.M1.dot(Q.opt_linear))
+        Q = query_spec
+        TS = target_spec
+
+        prec_target = np.linalg.inv(TS.cov_target)
+        U1 = TS.regress_target_score.T.dot(prec_target)
+        U2 = U1.T.dot(Q.M2.dot(U1))
+        U3 = U1.T.dot(Q.M3.dot(U1))
+        U4 = Q.M1.dot(Q.opt_linear).dot(Q.cond_cov).dot(Q.opt_linear.T.dot(Q.M1.T.dot(U1)))
+        U5 = U1.T.dot(Q.M1.dot(Q.opt_linear))
 
-        self.prec_target_nosel = self.prec_target + T2 - T3
+        prec_target_nosel = prec_target + U2 - U3
 
-        _P = -(T1.T.dot(Q.M1.dot(Q.observed_score)) + T2.dot(self.observed_target))
+        _P = -(U1.T.dot(Q.M1.dot(Q.observed_score)) + U2.dot(TS.observed_target))
 
-        self.bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target)
+        bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target)
                                                    + Q.M1.dot(Q.opt_linear.dot(Q.cond_mean))) - _P)
 
-        self.T3 = T3
-        self.T5 = T5
+        return prec_target_nosel, bias_target, U3, U5
 
 
 

From 7bbcf9012bd50bb0aeeea394d44a6606af372085 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 12:20:22 -0800
Subject: [PATCH 167/187] specification as a property; use QS instead of Q;
 standardizing grid_inference

---
 selectinf/randomized/approx_reference.py | 345 ++++++++++++-----------
 selectinf/randomized/base.py             | 294 +++++++++++++++++++
 selectinf/randomized/exact_reference.py  |   5 +-
 selectinf/randomized/query.py            |  31 +-
 4 files changed, 491 insertions(+), 184 deletions(-)
 create mode 100644 selectinf/randomized/base.py

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index bb069396e..588838cfc 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -5,12 +5,12 @@
 
 from ..distributions.discrete_family import discrete_family
 from ..algorithms.barrier_affine import solve_barrier_affine_py
-from .selective_MLE import mle_inference
+from .base import grid_inference
 
-class approximate_grid_inference(object):
+class approximate_grid_inference(grid_inference):
 
     def __init__(self,
-                 query,
+                 query_spec,
                  target_spec,
                  solve_args={'tol': 1.e-12},
                  useIP=False):
@@ -33,49 +33,12 @@ def __init__(self,
             Arguments passed to solver.
         """
 
-        self.solve_args = solve_args
+        grid_inference.__init__(self,
+                                query_spec,
+                                target_spec,
+                                solve_args=solve_args)
 
-        (observed_target,
-         cov_target,
-         regress_target_score) = target_spec[:3]
-
-        self.observed_target = observed_target
-        self.cov_target = cov_target
-        self.prec_target = np.linalg.inv(cov_target)
-        self.regress_target_score = regress_target_score
-
-        self.cond_mean = query.cond_mean
-        self.cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(self.cond_cov)
-        self.opt_linear = query.opt_linear
-
-        self.linear_part = query.linear_part
-        self.offset = query.offset
-
-        self.M1 = query.M1
-        self.M2 = query.M2
-        self.M3 = query.M3
-        self.observed_soln = query.observed_opt_state
-
-        self.observed_score = query.observed_score_state + query.observed_subgrad
-
-        G = mle_inference(query,
-                          target_spec,
-                          solve_args=solve_args)
-
-        _, inverse_info, log_ref = G.solve_estimating_eqn()
-
-        self.ntarget = ntarget = cov_target.shape[0]
-        _scale = 4 * np.sqrt(np.diag(inverse_info))
-
-        if useIP == False:
-            ngrid = 1000
-            self.stat_grid = np.zeros((ntarget, ngrid))
-            for j in range(ntarget):
-                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                                   observed_target[j] + 1.5 * _scale[j],
-                                                   num=ngrid)
-        else:
+        if useIP:
             ngrid = 60
             self.stat_grid = np.zeros((ntarget, ngrid))
             for j in range(ntarget):
@@ -85,47 +48,105 @@ def __init__(self,
 
 
         self.useIP = useIP
-        self.inverse_info = inverse_info
-
-    def summary(self,
-                alternatives=None,
-                parameter=None,
-                level=0.9):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        """
-
-        if parameter is not None:
-            pivots = self.approx_pivots(parameter,
-                                        alternatives=alternatives)[0]
-        else:
-            pivots = None
-
-        pvalues = self._approx_pivots(np.zeros_like(self.observed_target),
-                                      alternatives=alternatives)[0]
-        lower, upper = self._approx_intervals(level=level)
 
-        result = pd.DataFrame({'target': self.observed_target,
-                               'pvalue': pvalues,
-                               'alternative': alternatives,
-                               'lower_confidence': lower,
-                               'upper_confidence': upper})
-
-        if not np.all(parameter == 0):
-            result.insert(4, 'pivot', pivots)
-            result.insert(5, 'parameter', parameter)
-
-        return result
+        # self.useIP = useIP
+        # self.query_spec = query_spec
+        # self.target_spec = target_spec
+        # query = query_spec
+
+        # self.solve_args = solve_args
+
+        # (observed_target,
+        #  cov_target,
+        #  regress_target_score) = target_spec[:3]
+
+        # self.observed_target = observed_target
+        # self.cov_target = cov_target
+        # self.prec_target = np.linalg.inv(cov_target)
+        # self.regress_target_score = regress_target_score
+
+        # self.cond_mean = query.cond_mean
+        # self.cond_cov = query.cond_cov
+        # self.cond_precision = np.linalg.inv(self.cond_cov)
+        # self.opt_linear = query.opt_linear
+
+        # self.linear_part = query.linear_part
+        # self.offset = query.offset
+
+        # self.M1 = query.M1
+        # self.M2 = query.M2
+        # self.M3 = query.M3
+        # self.observed_soln = query.observed_opt_state
+
+        # self.observed_score = query.observed_score_state + query.observed_subgrad
+
+        # G = mle_inference(query,
+        #                   target_spec,
+        #                   solve_args=solve_args)
+
+        # _, inverse_info, log_ref = G.solve_estimating_eqn()
+
+        # self.ntarget = ntarget = cov_target.shape[0]
+        # _scale = 4 * np.sqrt(np.diag(inverse_info))
+
+        # if useIP == False:
+        #     ngrid = 1000
+        #     self.stat_grid = np.zeros((ntarget, ngrid))
+        #     for j in range(ntarget):
+        #         self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+        #                                            observed_target[j] + 1.5 * _scale[j],
+        #                                            num=ngrid)
+        # else:
+        #     ngrid = 60
+        #     self.stat_grid = np.zeros((ntarget, ngrid))
+        #     for j in range(ntarget):
+        #         self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+        #                                            observed_target[j] + 1.5 * _scale[j],
+        #                                            num=ngrid)
+
+
+        # self.useIP = useIP
+
+
+    # def summary(self,
+    #             alternatives=None,
+    #             parameter=None,
+    #             level=0.9):
+    #     """
+    #     Produce p-values and confidence intervals for targets
+    #     of model including selected features
+    #     Parameters
+    #     ----------
+    #     alternatives : [str], optional
+    #         Sequence of strings describing the alternatives,
+    #         should be values of ['twosided', 'less', 'greater']
+    #     parameter : np.array
+    #         Hypothesized value for parameter -- defaults to 0.
+    #     level : float
+    #         Confidence level.
+    #     """
+
+    #     if parameter is not None:
+    #         pivots = self._pivots(parameter,
+    #                               alternatives=alternatives)
+    #     else:
+    #         pivots = None
+
+    #     pvalues = self._pivots(np.zeros_like(self.observed_target),
+    #                                   alternatives=alternatives) 
+    #     lower, upper = self._intervals(level=level)
+
+    #     result = pd.DataFrame({'target': self.observed_target,
+    #                            'pvalue': pvalues,
+    #                            'alternative': alternatives,
+    #                            'lower_confidence': lower,
+    #                            'upper_confidence': upper})
+
+    #     if not np.all(parameter == 0):
+    #         result.insert(4, 'pivot', pivots)
+    #         result.insert(5, 'parameter', parameter)
+
+    #     return result
 
     def _approx_log_reference(self,
                               observed_target,
@@ -206,116 +227,102 @@ def _construct_families(self):
                                                       np.exp(logW)))
 
         self._log_ref = _log_ref
-            # construction of families follows `selectinf.learning.core`
-
-            # logG = - 0.5 * grid**2 / var_target
-            # logG -= logG.max()
-            # import matplotlib.pyplot as plt
-
-            # plt.plot(self.stat_grid[m][10:30], approx_log_ref[10:30])
-            # plt.plot(self.stat_grid[m][:10], approx_log_ref[:10], 'r', linewidth=4)
-            # plt.plot(self.stat_grid[m][30:], approx_log_ref[30:], 'r', linewidth=4)
-            # plt.plot(self.stat_grid[m]*1.5, fapprox(self.stat_grid[m]*1.5), 'k--')
-            # plt.show()
 
-            # plt.plot(grid, logW)
-            # plt.plot(grid, logG)
+    # def _pivots(self,
+    #             mean_parameter,
+    #             alternatives=None):
 
-    def _approx_pivots(self,
-                       mean_parameter,
-                       alternatives=None):
+    #     if not hasattr(self, "_families"):
+    #         self._construct_families()
 
-        if not hasattr(self, "_families"):
-            self._construct_families()
+    #     if alternatives is None:
+    #         alternatives = ['twosided'] * self.ntarget
 
-        if alternatives is None:
-            alternatives = ['twosided'] * self.ntarget
+    #     pivot = []
 
-        pivot = []
+    #     for m in range(self.ntarget):
 
-        for m in range(self.ntarget):
+    #         family = self._families[m]
+    #         var_target = 1. / ((self.precs[m])[0, 0])
 
-            family = self._families[m]
-            var_target = 1. / ((self.precs[m])[0, 0])
+    #         mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
+    #         # construction of pivot from families follows `selectinf.learning.core`
 
-            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
-            # construction of pivot from families follows `selectinf.learning.core`
+    #         _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
 
-            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
+    #         if alternatives[m] == 'twosided':
+    #             pivot.append(2 * min(_cdf, 1 - _cdf))
+    #         elif alternatives[m] == 'greater':
+    #             pivot.append(1 - _cdf)
+    #         elif alternatives[m] == 'less':
+    #             pivot.append(_cdf)
+    #         else:
+    #             raise ValueError('alternative should be in ["twosided", "less", "greater"]')
+    #     return pivot # , self._log_ref
 
-            if alternatives[m] == 'twosided':
-                pivot.append(2 * min(_cdf, 1 - _cdf))
-            elif alternatives[m] == 'greater':
-                pivot.append(1 - _cdf)
-            elif alternatives[m] == 'less':
-                pivot.append(_cdf)
-            else:
-                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
-        return pivot, self._log_ref
+    # def _intervals(self,
+    #                level=0.9):
 
-    def _approx_intervals(self,
-                          level=0.9):
+    #     if not hasattr(self, "_families"):
+    #         self._construct_families()
 
-        if not hasattr(self, "_families"):
-            self._construct_families()
+    #     lower, upper = [], []
 
-        lower, upper = [], []
+    #     for m in range(self.ntarget):
+    #         # construction of intervals from families follows `selectinf.learning.core`
+    #         family = self._families[m]
+    #         observed_target = self.observed_target[m]
 
-        for m in range(self.ntarget):
-            # construction of intervals from families follows `selectinf.learning.core`
-            family = self._families[m]
-            observed_target = self.observed_target[m]
+    #         l, u = family.equal_tailed_interval(observed_target,
+    #                                             alpha=1 - level)
 
-            l, u = family.equal_tailed_interval(observed_target,
-                                                alpha=1 - level)
+    #         var_target = 1. / ((self.precs[m])[0, 0])
 
-            var_target = 1. / ((self.precs[m])[0, 0])
+    #         lower.append(l * var_target + observed_target)
+    #         upper.append(u * var_target + observed_target)
 
-            lower.append(l * var_target + observed_target)
-            upper.append(u * var_target + observed_target)
+    #     return np.asarray(lower), np.asarray(upper)
 
-        return np.asarray(lower), np.asarray(upper)
+    # ### Private method
+    # def _construct_density(self):
 
-    ### Private method
-    def _construct_density(self):
+    #     precs = {}
+    #     S = {}
+    #     r = {}
+    #     T = {}
 
-        precs = {}
-        S = {}
-        r = {}
-        T = {}
+    #     p = self.regress_target_score.shape[1]
 
-        p = self.regress_target_score.shape[1]
-
-        for m in range(self.ntarget):
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            prec_target = 1. / cov_target_uni
-            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
+    #     for m in range(self.ntarget):
+    #         observed_target_uni = (self.observed_target[m]).reshape((1,))
+    #         cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+    #         prec_target = 1. / cov_target_uni
+    #         regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
 
-            T1 = regress_target_score_uni.T.dot(prec_target)
-            T2 = T1.T.dot(self.M2.dot(T1))
-            T3 = T1.T.dot(self.M3.dot(T1))
-            T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
-            T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+    #         T1 = regress_target_score_uni.T.dot(prec_target)
+    #         T2 = T1.T.dot(self.M2.dot(T1))
+    #         T3 = T1.T.dot(self.M3.dot(T1))
+    #         T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
+    #         T5 = T1.T.dot(self.M1.dot(self.opt_linear))
 
-            _T = self.cond_cov.dot(T5.T)
+    #         _T = self.cond_cov.dot(T5.T)
 
-            prec_target_nosel = prec_target + T2 - T3
+    #         prec_target_nosel = prec_target + T2 - T3
 
-            _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
+    #         _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
 
-            bias_target = cov_target_uni.dot(
-                T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
+    #         bias_target = cov_target_uni.dot(
+    #             T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
 
-            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
-            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
+    #         _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
+    #         _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
 
-            S[m] = _S
-            r[m] = _r
-            precs[m] = prec_target_nosel
-            T[m] = _T
+    #         S[m] = _S
+    #         r[m] = _r
+    #         precs[m] = prec_target_nosel
+    #         T[m] = _T
 
-        self.precs = precs
-        self.S = S
-        self.r = r
-        self.T = T
+    #     self.precs = precs
+    #     self.S = S
+    #     self.r = r
+    #     self.T = T
diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
new file mode 100644
index 000000000..3f19eaeb2
--- /dev/null
+++ b/selectinf/randomized/base.py
@@ -0,0 +1,294 @@
+import numpy as np, pandas as pd
+
+from .selective_MLE import mle_inference
+
+class grid_inference(object):
+
+    def __init__(self,
+                 query_spec,
+                 target_spec,
+                 solve_args={'tol': 1.e-12}):
+
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        query : `gaussian_query`
+            A Gaussian query which has information
+            to describe implied Gaussian.
+        observed_target : ndarray
+            Observed estimate of target.
+        cov_target : ndarray
+            Estimated covaraince of target.
+        cov_target_score : ndarray
+            Estimated covariance of target and score of randomized query.
+        solve_args : dict, optional
+            Arguments passed to solver.
+        """
+
+        self.query_spec = query_spec
+        self.target_spec = target_spec
+        query = query_spec
+
+        self.solve_args = solve_args
+
+        (observed_target,
+         cov_target,
+         regress_target_score) = target_spec[:3]
+
+        self.observed_target = observed_target
+        self.cov_target = cov_target
+        self.prec_target = np.linalg.inv(cov_target)
+        self.regress_target_score = regress_target_score
+
+        self.cond_mean = query.cond_mean
+        self.cond_cov = query.cond_cov
+        self.cond_precision = np.linalg.inv(self.cond_cov)
+        self.opt_linear = query.opt_linear
+
+        self.linear_part = query.linear_part
+        self.offset = query.offset
+
+        self.M1 = query.M1
+        self.M2 = query.M2
+        self.M3 = query.M3
+        self.observed_soln = query.observed_opt_state
+
+        self.observed_score = query.observed_score_state + query.observed_subgrad
+
+        G = mle_inference(query,
+                          target_spec,
+                          solve_args=solve_args)
+
+        _, inverse_info, log_ref = G.solve_estimating_eqn()
+
+        self.ntarget = ntarget = cov_target.shape[0]
+        _scale = 4 * np.sqrt(np.diag(inverse_info))
+        self.inverse_info = inverse_info
+
+        ngrid = 1000
+        self.stat_grid = np.zeros((ntarget, ngrid))
+        for j in range(ntarget):
+            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
+                                               observed_target[j] + 1.5 * _scale[j],
+                                               num=ngrid)
+
+    def summary(self,
+                alternatives=None,
+                parameter=None,
+                level=0.9):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+        level : float
+            Confidence level.
+        """
+
+        if parameter is not None:
+            pivots = self._pivots(parameter,
+                                  alternatives=alternatives)
+        else:
+            pivots = None
+
+        pvalues = self._pivots(np.zeros_like(self.observed_target),
+                                      alternatives=alternatives) 
+        lower, upper = self._intervals(level=level)
+
+        result = pd.DataFrame({'target': self.observed_target,
+                               'pvalue': pvalues,
+                               'alternative': alternatives,
+                               'lower_confidence': lower,
+                               'upper_confidence': upper})
+
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
+
+        return result
+
+    def _approx_log_reference(self,
+                              observed_target,
+                              cov_target,
+                              linear_coef,
+                              grid):
+
+        """
+        Approximate the log of the reference density on a grid.
+        """
+        if np.asarray(observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        ref_hat = []
+        solver = solve_barrier_affine_py
+
+        for k in range(grid.shape[0]):
+            # in the usual D = N + Gamma theta.hat,
+            # regress_opt_target is "something" times Gamma,
+            # where "something" comes from implied Gaussian
+            # cond_mean is "something" times D
+            # Gamma is cov_target_score.T.dot(prec_target)
+
+            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + self.cond_mean)
+            conjugate_arg = self.cond_precision.dot(cond_mean_grid)
+
+            val, _, _ = solver(conjugate_arg,
+                               self.cond_precision,
+                               self.observed_soln,
+                               self.linear_part,
+                               self.offset,
+                               **self.solve_args)
+
+            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
+
+        return np.asarray(ref_hat)
+
+    def _construct_families(self):
+
+        self._construct_density()
+
+        self._families = []
+        _log_ref = np.zeros((self.ntarget, 1000))
+        for m in range(self.ntarget):
+
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+
+            var_target = 1. / ((self.precs[m])[0, 0])
+
+            approx_log_ref = self._approx_log_reference(observed_target_uni,
+                                                        cov_target_uni,
+                                                        self.T[m],
+                                                        self.stat_grid[m])
+
+            if self.useIP == False:
+
+                logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+                logW -= logW.max()
+                _log_ref[m,:] = logW
+                self._families.append(discrete_family(self.stat_grid[m],
+                                                      np.exp(logW)))
+            else:
+
+                approx_fn = interp1d(self.stat_grid[m],
+                                     approx_log_ref,
+                                     kind='quadratic',
+                                     bounds_error=False,
+                                     fill_value='extrapolate')
+
+                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+                logW = (approx_fn(grid) -
+                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
+
+                logW -= logW.max()
+                _log_ref[m, :] = logW
+                self._families.append(discrete_family(grid,
+                                                      np.exp(logW)))
+
+        self._log_ref = _log_ref
+
+    def _pivots(self,
+                mean_parameter,
+                alternatives=None):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+
+        if alternatives is None:
+            alternatives = ['twosided'] * self.ntarget
+
+        pivot = []
+
+        for m in range(self.ntarget):
+
+            family = self._families[m]
+            var_target = 1. / ((self.precs[m])[0, 0])
+
+            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
+            # construction of pivot from families follows `selectinf.learning.core`
+
+            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
+
+            if alternatives[m] == 'twosided':
+                pivot.append(2 * min(_cdf, 1 - _cdf))
+            elif alternatives[m] == 'greater':
+                pivot.append(1 - _cdf)
+            elif alternatives[m] == 'less':
+                pivot.append(_cdf)
+            else:
+                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
+        return pivot # , self._log_ref
+
+    def _intervals(self,
+                   level=0.9):
+
+        if not hasattr(self, "_families"):
+            self._construct_families()
+
+        lower, upper = [], []
+
+        for m in range(self.ntarget):
+            # construction of intervals from families follows `selectinf.learning.core`
+            family = self._families[m]
+            observed_target = self.observed_target[m]
+
+            l, u = family.equal_tailed_interval(observed_target,
+                                                alpha=1 - level)
+
+            var_target = 1. / ((self.precs[m])[0, 0])
+
+            lower.append(l * var_target + observed_target)
+            upper.append(u * var_target + observed_target)
+
+        return np.asarray(lower), np.asarray(upper)
+
+    ### Private method
+    def _construct_density(self):
+
+        precs = {}
+        S = {}
+        r = {}
+        T = {}
+
+        p = self.regress_target_score.shape[1]
+
+        for m in range(self.ntarget):
+            observed_target_uni = (self.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            prec_target = 1. / cov_target_uni
+            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
+
+            T1 = regress_target_score_uni.T.dot(prec_target)
+            T2 = T1.T.dot(self.M2.dot(T1))
+            T3 = T1.T.dot(self.M3.dot(T1))
+            T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
+            T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+
+            _T = self.cond_cov.dot(T5.T)
+
+            prec_target_nosel = prec_target + T2 - T3
+
+            _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
+
+            bias_target = cov_target_uni.dot(
+                T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
+
+            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
+            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
+
+            S[m] = _S
+            r[m] = _r
+            precs[m] = prec_target_nosel
+            T[m] = _T
+
+        self.precs = precs
+        self.S = S
+        self.r = r
+        self.T = T
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 5d9ba19a6..ccc76e37f 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -5,8 +5,9 @@
 
 from ..distributions.discrete_family import discrete_family
 from .selective_MLE import mle_inference
+from .base import grid_inference
 
-class exact_grid_inference(object):
+class exact_grid_inference(grid_inference):
 
     def __init__(self,
                  query,
@@ -97,7 +98,7 @@ def summary(self,
             pivots = None
 
         pvalues = self._pivots(np.zeros_like(self.observed_target),
-                                      alternatives=alternatives)
+                               alternatives=alternatives)
         lower, upper = self._intervals(level=level)
 
         result = pd.DataFrame({'target': self.observed_target,
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 594b08312..bd3a1bcd1 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -67,6 +67,23 @@ def __init__(self, randomization, perturb=None):
         self._randomized = False
         self._setup = False
 
+    @property
+    def specification(self):
+        return QuerySpec(cond_mean=self.cond_mean,
+                         cond_cov=self.cond_cov,
+                         opt_linear=self.opt_linear,
+                         linear_part=self.affine_con.linear_part,
+                         offset=self.affine_con.offset,
+                         M1=self.M1,
+                         M2=self.M2,
+                         M3=self.M3,
+                         observed_opt_state=self.observed_opt_state,
+                         observed_score_state=self.observed_score_state,
+                         observed_subgrad=self.observed_subgrad,
+                         observed_soln=self.observed_opt_state,
+                         observed_score=self.observed_score_state + self.observed_subgrad)
+   
+
     # Methods reused by subclasses
 
     def randomize(self, perturb=None):
@@ -216,19 +233,7 @@ def inference(self,
            Statistical summary for specified targets.
         """
 
-        query_spec = QuerySpec(cond_mean=self.cond_mean,
-                               cond_cov=self.cond_cov,
-                               opt_linear=self.opt_linear,
-                               linear_part=self.affine_con.linear_part,
-                               offset=self.affine_con.offset,
-                               M1=self.M1,
-                               M2=self.M2,
-                               M3=self.M3,
-                               observed_opt_state=self.observed_opt_state,
-                               observed_score_state=self.observed_score_state,
-                               observed_subgrad=self.observed_subgrad,
-                               observed_soln=self.observed_opt_state,
-                               observed_score=self.observed_score_state + self.observed_subgrad)
+        query_spec = self.specification
 
         if method == 'selective_MLE':
             G = mle_inference(query_spec,

From 559bc96bee16942564eaec98f8d0748db53d0996 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 12:41:00 -0800
Subject: [PATCH 168/187] simplifying grid methods

---
 selectinf/randomized/approx_reference.py | 225 ++--------------------
 selectinf/randomized/base.py             |  74 +++----
 selectinf/randomized/exact_reference.py  | 234 ++---------------------
 selectinf/randomized/selective_MLE.py    |  30 +--
 4 files changed, 82 insertions(+), 481 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 588838cfc..e7d2df42f 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -49,105 +49,6 @@ def __init__(self,
 
         self.useIP = useIP
 
-        # self.useIP = useIP
-        # self.query_spec = query_spec
-        # self.target_spec = target_spec
-        # query = query_spec
-
-        # self.solve_args = solve_args
-
-        # (observed_target,
-        #  cov_target,
-        #  regress_target_score) = target_spec[:3]
-
-        # self.observed_target = observed_target
-        # self.cov_target = cov_target
-        # self.prec_target = np.linalg.inv(cov_target)
-        # self.regress_target_score = regress_target_score
-
-        # self.cond_mean = query.cond_mean
-        # self.cond_cov = query.cond_cov
-        # self.cond_precision = np.linalg.inv(self.cond_cov)
-        # self.opt_linear = query.opt_linear
-
-        # self.linear_part = query.linear_part
-        # self.offset = query.offset
-
-        # self.M1 = query.M1
-        # self.M2 = query.M2
-        # self.M3 = query.M3
-        # self.observed_soln = query.observed_opt_state
-
-        # self.observed_score = query.observed_score_state + query.observed_subgrad
-
-        # G = mle_inference(query,
-        #                   target_spec,
-        #                   solve_args=solve_args)
-
-        # _, inverse_info, log_ref = G.solve_estimating_eqn()
-
-        # self.ntarget = ntarget = cov_target.shape[0]
-        # _scale = 4 * np.sqrt(np.diag(inverse_info))
-
-        # if useIP == False:
-        #     ngrid = 1000
-        #     self.stat_grid = np.zeros((ntarget, ngrid))
-        #     for j in range(ntarget):
-        #         self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-        #                                            observed_target[j] + 1.5 * _scale[j],
-        #                                            num=ngrid)
-        # else:
-        #     ngrid = 60
-        #     self.stat_grid = np.zeros((ntarget, ngrid))
-        #     for j in range(ntarget):
-        #         self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-        #                                            observed_target[j] + 1.5 * _scale[j],
-        #                                            num=ngrid)
-
-
-        # self.useIP = useIP
-
-
-    # def summary(self,
-    #             alternatives=None,
-    #             parameter=None,
-    #             level=0.9):
-    #     """
-    #     Produce p-values and confidence intervals for targets
-    #     of model including selected features
-    #     Parameters
-    #     ----------
-    #     alternatives : [str], optional
-    #         Sequence of strings describing the alternatives,
-    #         should be values of ['twosided', 'less', 'greater']
-    #     parameter : np.array
-    #         Hypothesized value for parameter -- defaults to 0.
-    #     level : float
-    #         Confidence level.
-    #     """
-
-    #     if parameter is not None:
-    #         pivots = self._pivots(parameter,
-    #                               alternatives=alternatives)
-    #     else:
-    #         pivots = None
-
-    #     pvalues = self._pivots(np.zeros_like(self.observed_target),
-    #                                   alternatives=alternatives) 
-    #     lower, upper = self._intervals(level=level)
-
-    #     result = pd.DataFrame({'target': self.observed_target,
-    #                            'pvalue': pvalues,
-    #                            'alternative': alternatives,
-    #                            'lower_confidence': lower,
-    #                            'upper_confidence': upper})
-
-    #     if not np.all(parameter == 0):
-    #         result.insert(4, 'pivot', pivots)
-    #         result.insert(5, 'parameter', parameter)
-
-    #     return result
-
     def _approx_log_reference(self,
                               observed_target,
                               cov_target,
@@ -157,6 +58,11 @@ def _approx_log_reference(self,
         """
         Approximate the log of the reference density on a grid.
         """
+
+        TS = self.target_spec
+        QS = self.query_spec
+        cond_precision = np.linalg.inv(QS.cond_cov)
+        
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
@@ -170,30 +76,33 @@ def _approx_log_reference(self,
             # cond_mean is "something" times D
             # Gamma is cov_target_score.T.dot(prec_target)
 
-            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + self.cond_mean)
-            conjugate_arg = self.cond_precision.dot(cond_mean_grid)
+            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + QS.cond_mean)
+            conjugate_arg = cond_precision.dot(cond_mean_grid)
 
             val, _, _ = solver(conjugate_arg,
-                               self.cond_precision,
-                               self.observed_soln,
-                               self.linear_part,
-                               self.offset,
+                               cond_precision,
+                               QS.observed_soln,
+                               QS.linear_part,
+                               QS.offset,
                                **self.solve_args)
 
-            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
+            ref_hat.append(-val - (conjugate_arg.T.dot(QS.cond_cov).dot(conjugate_arg) / 2.))
 
         return np.asarray(ref_hat)
 
     def _construct_families(self):
 
+        TS = self.target_spec
+        QS = self.query_spec
+
         self._construct_density()
 
         self._families = []
         _log_ref = np.zeros((self.ntarget, 1000))
         for m in range(self.ntarget):
 
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            observed_target_uni = (TS.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
@@ -204,7 +113,7 @@ def _construct_families(self):
 
             if self.useIP == False:
 
-                logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+                logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - TS.observed_target[m]) ** 2 / var_target)
                 logW -= logW.max()
                 _log_ref[m,:] = logW
                 self._families.append(discrete_family(self.stat_grid[m],
@@ -228,101 +137,3 @@ def _construct_families(self):
 
         self._log_ref = _log_ref
 
-    # def _pivots(self,
-    #             mean_parameter,
-    #             alternatives=None):
-
-    #     if not hasattr(self, "_families"):
-    #         self._construct_families()
-
-    #     if alternatives is None:
-    #         alternatives = ['twosided'] * self.ntarget
-
-    #     pivot = []
-
-    #     for m in range(self.ntarget):
-
-    #         family = self._families[m]
-    #         var_target = 1. / ((self.precs[m])[0, 0])
-
-    #         mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
-    #         # construction of pivot from families follows `selectinf.learning.core`
-
-    #         _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
-
-    #         if alternatives[m] == 'twosided':
-    #             pivot.append(2 * min(_cdf, 1 - _cdf))
-    #         elif alternatives[m] == 'greater':
-    #             pivot.append(1 - _cdf)
-    #         elif alternatives[m] == 'less':
-    #             pivot.append(_cdf)
-    #         else:
-    #             raise ValueError('alternative should be in ["twosided", "less", "greater"]')
-    #     return pivot # , self._log_ref
-
-    # def _intervals(self,
-    #                level=0.9):
-
-    #     if not hasattr(self, "_families"):
-    #         self._construct_families()
-
-    #     lower, upper = [], []
-
-    #     for m in range(self.ntarget):
-    #         # construction of intervals from families follows `selectinf.learning.core`
-    #         family = self._families[m]
-    #         observed_target = self.observed_target[m]
-
-    #         l, u = family.equal_tailed_interval(observed_target,
-    #                                             alpha=1 - level)
-
-    #         var_target = 1. / ((self.precs[m])[0, 0])
-
-    #         lower.append(l * var_target + observed_target)
-    #         upper.append(u * var_target + observed_target)
-
-    #     return np.asarray(lower), np.asarray(upper)
-
-    # ### Private method
-    # def _construct_density(self):
-
-    #     precs = {}
-    #     S = {}
-    #     r = {}
-    #     T = {}
-
-    #     p = self.regress_target_score.shape[1]
-
-    #     for m in range(self.ntarget):
-    #         observed_target_uni = (self.observed_target[m]).reshape((1,))
-    #         cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-    #         prec_target = 1. / cov_target_uni
-    #         regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
-
-    #         T1 = regress_target_score_uni.T.dot(prec_target)
-    #         T2 = T1.T.dot(self.M2.dot(T1))
-    #         T3 = T1.T.dot(self.M3.dot(T1))
-    #         T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
-    #         T5 = T1.T.dot(self.M1.dot(self.opt_linear))
-
-    #         _T = self.cond_cov.dot(T5.T)
-
-    #         prec_target_nosel = prec_target + T2 - T3
-
-    #         _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
-
-    #         bias_target = cov_target_uni.dot(
-    #             T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
-
-    #         _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
-    #         _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
-
-    #         S[m] = _S
-    #         r[m] = _r
-    #         precs[m] = prec_target_nosel
-    #         T[m] = _T
-
-    #     self.precs = precs
-    #     self.S = S
-    #     self.r = r
-    #     self.T = T
diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index 3f19eaeb2..66db3d955 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -29,49 +29,24 @@ def __init__(self,
 
         self.query_spec = query_spec
         self.target_spec = target_spec
-        query = query_spec
-
         self.solve_args = solve_args
 
-        (observed_target,
-         cov_target,
-         regress_target_score) = target_spec[:3]
-
-        self.observed_target = observed_target
-        self.cov_target = cov_target
-        self.prec_target = np.linalg.inv(cov_target)
-        self.regress_target_score = regress_target_score
-
-        self.cond_mean = query.cond_mean
-        self.cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(self.cond_cov)
-        self.opt_linear = query.opt_linear
-
-        self.linear_part = query.linear_part
-        self.offset = query.offset
-
-        self.M1 = query.M1
-        self.M2 = query.M2
-        self.M3 = query.M3
-        self.observed_soln = query.observed_opt_state
-
-        self.observed_score = query.observed_score_state + query.observed_subgrad
-
-        G = mle_inference(query,
+        G = mle_inference(query_spec,
                           target_spec,
                           solve_args=solve_args)
 
         _, inverse_info, log_ref = G.solve_estimating_eqn()
 
-        self.ntarget = ntarget = cov_target.shape[0]
+        TS = target_spec
+        self.ntarget = ntarget = TS.cov_target.shape[0]
         _scale = 4 * np.sqrt(np.diag(inverse_info))
         self.inverse_info = inverse_info
 
         ngrid = 1000
         self.stat_grid = np.zeros((ntarget, ngrid))
         for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                               observed_target[j] + 1.5 * _scale[j],
+            self.stat_grid[j, :] = np.linspace(TS.observed_target[j] - 1.5 * _scale[j],
+                                               TS.observed_target[j] + 1.5 * _scale[j],
                                                num=ngrid)
 
     def summary(self,
@@ -92,17 +67,19 @@ def summary(self,
             Confidence level.
         """
 
+        TS = self.target_spec
+
         if parameter is not None:
             pivots = self._pivots(parameter,
                                   alternatives=alternatives)
         else:
             pivots = None
 
-        pvalues = self._pivots(np.zeros_like(self.observed_target),
+        pvalues = self._pivots(np.zeros_like(TS.observed_target),
                                       alternatives=alternatives) 
         lower, upper = self._intervals(level=level)
 
-        result = pd.DataFrame({'target': self.observed_target,
+        result = pd.DataFrame({'target': TS.observed_target,
                                'pvalue': pvalues,
                                'alternative': alternatives,
                                'lower_confidence': lower,
@@ -198,6 +175,8 @@ def _pivots(self,
                 mean_parameter,
                 alternatives=None):
 
+        TS = self.target_spec
+
         if not hasattr(self, "_families"):
             self._construct_families()
 
@@ -214,7 +193,7 @@ def _pivots(self,
             mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
             # construction of pivot from families follows `selectinf.learning.core`
 
-            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
+            _cdf = family.cdf((mean[0] - TS.observed_target[m]) / var_target, x=TS.observed_target[m])
 
             if alternatives[m] == 'twosided':
                 pivot.append(2 * min(_cdf, 1 - _cdf))
@@ -229,6 +208,8 @@ def _pivots(self,
     def _intervals(self,
                    level=0.9):
 
+        TS = self.target_spec
+        
         if not hasattr(self, "_families"):
             self._construct_families()
 
@@ -237,7 +218,7 @@ def _intervals(self,
         for m in range(self.ntarget):
             # construction of intervals from families follows `selectinf.learning.core`
             family = self._families[m]
-            observed_target = self.observed_target[m]
+            observed_target = TS.observed_target[m]
 
             l, u = family.equal_tailed_interval(observed_target,
                                                 alpha=1 - level)
@@ -252,33 +233,36 @@ def _intervals(self,
     ### Private method
     def _construct_density(self):
 
+        TS = self.target_spec
+        QS = self.query_spec
+
         precs = {}
         S = {}
         r = {}
         T = {}
 
-        p = self.regress_target_score.shape[1]
+        p = TS.regress_target_score.shape[1]
 
         for m in range(self.ntarget):
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            observed_target_uni = (TS.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
             prec_target = 1. / cov_target_uni
-            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
+            regress_target_score_uni = TS.regress_target_score[m, :].reshape((1, p))
 
             T1 = regress_target_score_uni.T.dot(prec_target)
-            T2 = T1.T.dot(self.M2.dot(T1))
-            T3 = T1.T.dot(self.M3.dot(T1))
-            T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
-            T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+            T2 = T1.T.dot(QS.M2.dot(T1))
+            T3 = T1.T.dot(QS.M3.dot(T1))
+            T4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(T1)))
+            T5 = T1.T.dot(QS.M1.dot(QS.opt_linear))
 
-            _T = self.cond_cov.dot(T5.T)
+            _T = QS.cond_cov.dot(T5.T)
 
             prec_target_nosel = prec_target + T2 - T3
 
-            _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
+            _P = -(T1.T.dot(QS.M1.dot(QS.observed_score)) + T2.dot(observed_target_uni))
 
             bias_target = cov_target_uni.dot(
-                T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
+                T1.T.dot(-T4.dot(observed_target_uni) + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
 
             _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
             _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index ccc76e37f..d37a99b3e 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -9,121 +9,24 @@
 
 class exact_grid_inference(grid_inference):
 
-    def __init__(self,
-                 query,
-                 target_spec,
-                 solve_args={'tol': 1.e-12}):
-
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        query : `gaussian_query`
-            A Gaussian query which has information
-            to describe implied Gaussian.
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        cov_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
-        """
-
-
-        (observed_target,
-         cov_target,
-         regress_target_score) = target_spec[:3]
-
-        self.observed_target = observed_target
-        self.cov_target = cov_target
-        self.prec_target = np.linalg.inv(cov_target)
-        self.regress_target_score = regress_target_score
-
-        self.cond_mean = query.cond_mean
-        self.cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(self.cond_cov)
-        self.opt_linear = query.opt_linear
-
-        self.linear_part = query.linear_part
-        self.offset = query.offset
-
-        self.M1 = query.M1
-        self.M2 = query.M2
-        self.M3 = query.M3
-        self.observed_soln = query.observed_opt_state
-
-        self.observed_score = query.observed_score_state + query.observed_subgrad
-
-        G = mle_inference(query,
-                          target_spec,
-                          solve_args=solve_args)
-
-        _, inverse_info, log_ref = G.solve_estimating_eqn()
-
-        self.ntarget = ntarget = cov_target.shape[0]
-        _scale = 4 * np.sqrt(np.diag(inverse_info))
-
-        ngrid = 1000
-        self.stat_grid = np.zeros((ntarget, ngrid))
-        for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                               observed_target[j] + 1.5 * _scale[j],
-                                               num=ngrid)
-
-        self.inverse_info = inverse_info
-
-    def summary(self,
-                alternatives=None,
-                parameter=None,
-                level=0.90):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        """
-
-        if parameter is not None:
-            pivots = self._pivots(parameter,
-                                  alternatives=alternatives)
-        else:
-            pivots = None
-
-        pvalues = self._pivots(np.zeros_like(self.observed_target),
-                               alternatives=alternatives)
-        lower, upper = self._intervals(level=level)
-
-        result = pd.DataFrame({'target': self.observed_target,
-                               'pvalue': pvalues,
-                               'alternative': alternatives,
-                               'lower_confidence': lower,
-                               'upper_confidence': upper})
-
-        if not np.all(parameter == 0):
-            result.insert(4, 'pivot', pivots)
-            result.insert(5, 'parameter', parameter)
-
-        return result
-
     def log_reference(self,
                       observed_target,
                       cov_target,
                       linear_coef,
                       grid):
 
+        QS = self.query_spec
+        TS = self.target_spec
+
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
 
         ref_hat = []
 
+        cond_precision = np.linalg.inv(QS.cond_cov)
+        num_opt = cond_precision.shape[0]
+        num_con = QS.linear_part.shape[0]
+
         for k in range(grid.shape[0]):
             # in the usual D = N + Gamma theta.hat,
             # regress_opt_target is "something" times Gamma,
@@ -131,27 +34,24 @@ def log_reference(self,
             # cond_mean is "something" times D
             # Gamma is cov_target_score.T.dot(prec_target)
 
-            num_opt = self.cond_precision.shape[0]
-            num_con = self.linear_part.shape[0]
-
             cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) +
-                              self.cond_mean)
+                              QS.cond_mean)
 
             #direction for decomposing o
 
-            eta = self.cond_precision.dot(linear_coef).dot(cov_target)
+            eta = cond_precision.dot(linear_coef).dot(cov_target)
 
             implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
-            implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
+            implied_cov = np.asscalar(eta.T.dot(QS.cond_cov).dot(eta))
             implied_prec = 1./implied_cov
 
-            _A = self.cond_cov.dot(eta) * implied_prec
+            _A = QS.cond_cov.dot(eta) * implied_prec
             R = np.identity(num_opt) - _A.dot(eta.T)
 
-            A = self.linear_part.dot(_A).reshape((-1,))
-            b = -self.linear_part.dot(R).dot(self.observed_soln)
+            A = QS.linear_part.dot(_A).reshape((-1,))
+            b = -QS.linear_part.dot(R).dot(QS.observed_soln)
 
-            trunc_ = np.true_divide((self.offset + b), A)
+            trunc_ = np.true_divide((QS.offset + b), A)
 
             neg_indx = np.asarray([j for j in range(num_con) if A[j] < 0.])
             pos_indx = np.asarray([j for j in range(num_con) if A[j] > 0.])
@@ -186,14 +86,17 @@ def log_reference(self,
 
     def _construct_families(self):
 
+        QS = self.query_spec
+        TS = self.target_spec
+
         self._construct_density()
 
         self._families = []
 
         for m in range(self.ntarget):
 
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
+            observed_target_uni = (TS.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
 
             var_target = 1. / ((self.precs[m])[0, 0])
 
@@ -202,108 +105,11 @@ def _construct_families(self):
                                          self.T[m],
                                          self.stat_grid[m])
 
-            logW = (log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
+            logW = (log_ref - 0.5 * (self.stat_grid[m] - TS.observed_target[m]) ** 2 / var_target)
             logW -= logW.max()
             self._families.append(discrete_family(self.stat_grid[m],
                                                   np.exp(logW)))
 
-    def _pivots(self,
-                mean_parameter,
-                alternatives=None):
-
-        if not hasattr(self, "_families"):
-            self._construct_families()
-
-        if alternatives is None:
-            alternatives = ['twosided'] * self.ntarget
-
-        pivot = []
-
-        for m in range(self.ntarget):
-
-            family = self._families[m]
-            var_target = 1. / ((self.precs[m])[0, 0])
-
-            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
-
-            _cdf = family.cdf((mean[0] - self.observed_target[m]) / var_target, x=self.observed_target[m])
-
-            if alternatives[m] == 'twosided':
-                pivot.append(2 * min(_cdf, 1 - _cdf))
-            elif alternatives[m] == 'greater':
-                pivot.append(1 - _cdf)
-            elif alternatives[m] == 'less':
-                pivot.append(_cdf)
-            else:
-                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
-        return pivot
-
-    def _intervals(self,
-                   level=0.9):
-
-        if not hasattr(self, "_families"):
-            self._construct_families()
-
-        lower, upper = [], []
-
-        for m in range(self.ntarget):
-            # construction of intervals from families follows `selectinf.learning.core`
-            family = self._families[m]
-            observed_target = self.observed_target[m]
-
-            l, u = family.equal_tailed_interval(observed_target,
-                                                alpha=1 - level)
-
-            var_target = 1. / ((self.precs[m])[0, 0])
-
-            lower.append(l * var_target + observed_target)
-            upper.append(u * var_target + observed_target)
-
-        return np.asarray(lower), np.asarray(upper)
-
-    ### Private method
-    def _construct_density(self):
-
-        precs = {}
-        S = {}
-        r = {}
-        T = {}
-
-        p = self.regress_target_score.shape[1]
-
-        for m in range(self.ntarget):
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-            prec_target = 1. / cov_target_uni
-            regress_target_score_uni = self.regress_target_score[m, :].reshape((1, p))
-
-            T1 = regress_target_score_uni.T.dot(prec_target)
-            T2 = T1.T.dot(self.M2.dot(T1))
-            T3 = T1.T.dot(self.M3.dot(T1))
-            T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
-            T5 = T1.T.dot(self.M1.dot(self.opt_linear))
-
-            _T = self.cond_cov.dot(T5.T)
-
-            prec_target_nosel = prec_target + T2 - T3
-
-            _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(observed_target_uni))
-
-            bias_target = cov_target_uni.dot(T1.T.dot(-T4.dot(observed_target_uni) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
-
-            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
-            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
-
-            S[m] = _S
-            r[m] = _r
-            precs[m] = prec_target_nosel
-            T[m] = _T
-
-        self.precs = precs
-        self.S = S
-        self.r = r
-        self.T = T
-
 
 
 
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index 9fc302b25..76bd8907b 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -24,11 +24,11 @@ def solve_estimating_eqn(self,
         prec_target_nosel, bias_target, U3, U5 = _setup_estimating_eqn(self.query_spec,
                                                                        self.target_spec)
 
-        Q = self.query_spec
+        QS = self.query_spec
         TS = self.target_spec
         
-        cond_precision = np.linalg.inv(Q.cond_cov)
-        conjugate_arg = cond_precision.dot(Q.cond_mean)
+        cond_precision = np.linalg.inv(QS.cond_cov)
+        conjugate_arg = cond_precision.dot(QS.cond_mean)
 
         if useC:
             solver = solve_barrier_affine_C
@@ -37,13 +37,13 @@ def solve_estimating_eqn(self,
 
         val, soln, hess = solver(conjugate_arg,
                                  cond_precision,
-                                 Q.observed_soln,
-                                 Q.linear_part,
-                                 Q.offset,
+                                 QS.observed_soln,
+                                 QS.linear_part,
+                                 QS.offset,
                                  **self.solve_args)
 
         final_estimator = TS.cov_target.dot(prec_target_nosel).dot(TS.observed_target) \
-                          + TS.regress_target_score.dot(Q.M1.dot(Q.opt_linear)).dot(Q.cond_mean - soln) \
+                          + TS.regress_target_score.dot(QS.M1.dot(QS.opt_linear)).dot(QS.cond_mean - soln) \
                           - bias_target
 
         observed_info_natural = prec_target_nosel + U3 - U5.dot(hess.dot(U5.T))
@@ -77,7 +77,7 @@ def solve_estimating_eqn(self,
         intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)),
                                final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T
 
-        log_ref = val + conjugate_arg.T.dot(Q.cond_cov).dot(conjugate_arg) / 2.
+        log_ref = val + conjugate_arg.T.dot(QS.cond_cov).dot(conjugate_arg) / 2.
 
         result = pd.DataFrame({'MLE': final_estimator,
                                'SE': np.sqrt(np.diag(observed_info_mean)),
@@ -93,22 +93,22 @@ def solve_estimating_eqn(self,
 def _setup_estimating_eqn(query_spec,
                           target_spec):
 
-        Q = query_spec
+        QS = query_spec
         TS = target_spec
 
         prec_target = np.linalg.inv(TS.cov_target)
         U1 = TS.regress_target_score.T.dot(prec_target)
-        U2 = U1.T.dot(Q.M2.dot(U1))
-        U3 = U1.T.dot(Q.M3.dot(U1))
-        U4 = Q.M1.dot(Q.opt_linear).dot(Q.cond_cov).dot(Q.opt_linear.T.dot(Q.M1.T.dot(U1)))
-        U5 = U1.T.dot(Q.M1.dot(Q.opt_linear))
+        U2 = U1.T.dot(QS.M2.dot(U1))
+        U3 = U1.T.dot(QS.M3.dot(U1))
+        U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
+        U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
 
         prec_target_nosel = prec_target + U2 - U3
 
-        _P = -(U1.T.dot(Q.M1.dot(Q.observed_score)) + U2.dot(TS.observed_target))
+        _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(TS.observed_target))
 
         bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target)
-                                                   + Q.M1.dot(Q.opt_linear.dot(Q.cond_mean))) - _P)
+                                                   + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
 
         return prec_target_nosel, bias_target, U3, U5
 

From aea5df80cb5c8d00db6ae4f5de6892cf0470a8b7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 12:42:36 -0800
Subject: [PATCH 169/187] renaming temporary matrices

---
 selectinf/randomized/base.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index 66db3d955..12f3b7bbf 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -249,20 +249,20 @@ def _construct_density(self):
             prec_target = 1. / cov_target_uni
             regress_target_score_uni = TS.regress_target_score[m, :].reshape((1, p))
 
-            T1 = regress_target_score_uni.T.dot(prec_target)
-            T2 = T1.T.dot(QS.M2.dot(T1))
-            T3 = T1.T.dot(QS.M3.dot(T1))
-            T4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(T1)))
-            T5 = T1.T.dot(QS.M1.dot(QS.opt_linear))
+            U1 = regress_target_score_uni.T.dot(prec_target)
+            U2 = U1.T.dot(QS.M2.dot(U1))
+            U3 = U1.T.dot(QS.M3.dot(U1))
+            U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
+            U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
 
-            _T = QS.cond_cov.dot(T5.T)
+            _T = QS.cond_cov.dot(U5.T)
 
-            prec_target_nosel = prec_target + T2 - T3
+            prec_target_nosel = prec_target + U2 - U3
 
-            _P = -(T1.T.dot(QS.M1.dot(QS.observed_score)) + T2.dot(observed_target_uni))
+            _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(observed_target_uni))
 
             bias_target = cov_target_uni.dot(
-                T1.T.dot(-T4.dot(observed_target_uni) + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
+                U1.T.dot(-U4.dot(observed_target_uni) + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
 
             _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
             _S = np.linalg.inv(prec_target_nosel).dot(prec_target)

From 8fffbb203f455c9c73e10348fc8412d53741d505 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 17 Nov 2021 22:09:35 -0800
Subject: [PATCH 170/187] more cleanup; remains to unify U1,U2,U3...
 calculation across 4 methods

---
 selectinf/base.py                           |   5 +-
 selectinf/randomized/approx_reference.py    |   6 +-
 selectinf/randomized/base.py                | 111 ++++++++---------
 selectinf/randomized/exact_reference.py     |   6 +-
 selectinf/randomized/posterior_inference.py | 110 +++++++++--------
 selectinf/randomized/query.py               | 125 ++++++++++----------
 6 files changed, 173 insertions(+), 190 deletions(-)

diff --git a/selectinf/base.py b/selectinf/base.py
index 3c8100cf5..51c09ba85 100644
--- a/selectinf/base.py
+++ b/selectinf/base.py
@@ -1,4 +1,4 @@
-import typing
+from typing import NamedTuple
 
 import numpy as np
 
@@ -47,13 +47,12 @@ def restricted_estimator(loss, active, solve_args={'min_its':50, 'tol':1.e-10}):
 # functions construct targets of inference
 # and covariance with score representation
 
-class TargetSpec(typing.NamedTuple):
+class TargetSpec(NamedTuple):
     
     observed_target : np.ndarray
     cov_target : np.ndarray
     regress_target_score : np.ndarray
     alternatives : list
-    #dispersion : float = 1
     
 def selected_targets(loglike, 
                      solution,
diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index e7d2df42f..0e70b80d0 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -95,7 +95,7 @@ def _construct_families(self):
         TS = self.target_spec
         QS = self.query_spec
 
-        self._construct_density()
+        precs, S, r, T = self.conditional_spec
 
         self._families = []
         _log_ref = np.zeros((self.ntarget, 1000))
@@ -104,11 +104,11 @@ def _construct_families(self):
             observed_target_uni = (TS.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
 
-            var_target = 1. / ((self.precs[m])[0, 0])
+            var_target = 1. / (precs[m][0, 0])
 
             approx_log_ref = self._approx_log_reference(observed_target_uni,
                                                         cov_target_uni,
-                                                        self.T[m],
+                                                        T[m],
                                                         self.stat_grid[m])
 
             if self.useIP == False:
diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index 12f3b7bbf..b3aa8332b 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -1,7 +1,22 @@
+from typing import NamedTuple
 import numpy as np, pandas as pd
 
 from .selective_MLE import mle_inference
 
+class ConditionalSpec(NamedTuple):
+
+    # description of (preselection) conditional law of
+    # targets \hat{\theta} | u, N
+    # if they were unbiased, then:
+    # 1) precision will agree with marginal variance
+    # 2) scalings will all be 1
+    # 3) shifts will be 0
+
+    precision : np.ndarray
+    scalings : np.ndarray
+    shifts : np.ndarray
+    T : np.ndarray  # what is T?
+    
 class grid_inference(object):
 
     def __init__(self,
@@ -127,58 +142,16 @@ def _approx_log_reference(self,
 
         return np.asarray(ref_hat)
 
-    def _construct_families(self):
-
-        self._construct_density()
-
-        self._families = []
-        _log_ref = np.zeros((self.ntarget, 1000))
-        for m in range(self.ntarget):
-
-            observed_target_uni = (self.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(self.cov_target)[m]).reshape((1, 1))
-
-            var_target = 1. / ((self.precs[m])[0, 0])
-
-            approx_log_ref = self._approx_log_reference(observed_target_uni,
-                                                        cov_target_uni,
-                                                        self.T[m],
-                                                        self.stat_grid[m])
-
-            if self.useIP == False:
-
-                logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - self.observed_target[m]) ** 2 / var_target)
-                logW -= logW.max()
-                _log_ref[m,:] = logW
-                self._families.append(discrete_family(self.stat_grid[m],
-                                                      np.exp(logW)))
-            else:
-
-                approx_fn = interp1d(self.stat_grid[m],
-                                     approx_log_ref,
-                                     kind='quadratic',
-                                     bounds_error=False,
-                                     fill_value='extrapolate')
-
-                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
-                logW = (approx_fn(grid) -
-                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
-
-                logW -= logW.max()
-                _log_ref[m, :] = logW
-                self._families.append(discrete_family(grid,
-                                                      np.exp(logW)))
-
-        self._log_ref = _log_ref
-
     def _pivots(self,
                 mean_parameter,
                 alternatives=None):
 
         TS = self.target_spec
-
+        
         if not hasattr(self, "_families"):
-            self._construct_families()
+            self._construct_density() # generic
+            self._construct_families() # specific to the method
+        precs, S, r = self.conditional_spec
 
         if alternatives is None:
             alternatives = ['twosided'] * self.ntarget
@@ -188,9 +161,9 @@ def _pivots(self,
         for m in range(self.ntarget):
 
             family = self._families[m]
-            var_target = 1. / ((self.precs[m])[0, 0])
+            var_target = 1. / (precs[m][0, 0])
 
-            mean = self.S[m].dot(mean_parameter[m].reshape((1,))) + self.r[m]
+            mean = S[m].dot(mean_parameter[m].reshape((1,))) + r[m]
             # construction of pivot from families follows `selectinf.learning.core`
 
             _cdf = family.cdf((mean[0] - TS.observed_target[m]) / var_target, x=TS.observed_target[m])
@@ -211,7 +184,10 @@ def _intervals(self,
         TS = self.target_spec
         
         if not hasattr(self, "_families"):
-            self._construct_families()
+            self._construct_density() # generic
+            self._construct_families() # specific to the method
+
+        precs, S, r, _ = self.conditional_spec
 
         lower, upper = [], []
 
@@ -223,7 +199,9 @@ def _intervals(self,
             l, u = family.equal_tailed_interval(observed_target,
                                                 alpha=1 - level)
 
-            var_target = 1. / ((self.precs[m])[0, 0])
+            var_target = 1. / (precs[m][0, 0])
+
+            # JT: I think these should cover S \theta^* + r not theta^*
 
             lower.append(l * var_target + observed_target)
             upper.append(u * var_target + observed_target)
@@ -231,15 +209,19 @@ def _intervals(self,
         return np.asarray(lower), np.asarray(upper)
 
     ### Private method
+
     def _construct_density(self):
+        """
+        What is this method doing?
+        """
 
         TS = self.target_spec
         QS = self.query_spec
 
-        precs = {}
-        S = {}
-        r = {}
-        T = {}
+        precs = []
+        S = []
+        r = []
+        T = []
 
         p = TS.regress_target_score.shape[1]
 
@@ -255,6 +237,7 @@ def _construct_density(self):
             U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
             U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
 
+            # JT: what is _T?
             _T = QS.cond_cov.dot(U5.T)
 
             prec_target_nosel = prec_target + U2 - U3
@@ -267,12 +250,16 @@ def _construct_density(self):
             _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
             _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
 
-            S[m] = _S
-            r[m] = _r
-            precs[m] = prec_target_nosel
-            T[m] = _T
+            S.append(_S)
+            r.append(_r)
+            precs.append(prec_target_nosel)
+            T.append(_T)
+
+        self.conditional_spec = ConditionalSpec(np.array(precs),
+                                                np.array(S),
+                                                np.array(r),
+                                                np.array(T) # what is T here?
+                                                )
+
+        return self.conditional_spec
 
-        self.precs = precs
-        self.S = S
-        self.r = r
-        self.T = T
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index d37a99b3e..8a3b51b6e 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -89,7 +89,7 @@ def _construct_families(self):
         QS = self.query_spec
         TS = self.target_spec
 
-        self._construct_density()
+        precs, S, r, T = self.conditional_spec
 
         self._families = []
 
@@ -98,11 +98,11 @@ def _construct_families(self):
             observed_target_uni = (TS.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
 
-            var_target = 1. / ((self.precs[m])[0, 0])
+            var_target = 1. / (precs[m][0, 0])
 
             log_ref = self.log_reference(observed_target_uni,
                                          cov_target_uni,
-                                         self.T[m],
+                                         T[m],
                                          self.stat_grid[m])
 
             logW = (log_ref - 0.5 * (self.stat_grid[m] - TS.observed_target[m]) ** 2 / var_target)
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 0c33f3b96..7ab09195b 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -35,47 +35,24 @@ class posterior(object):
     """
 
     def __init__(self,
-                 query,
+                 query_spec,
                  target_spec,
                  dispersion,
                  prior,
                  solve_args={'tol': 1.e-12}):
 
+        self.query_spec = QS = query_spec
+        self.target_spec = TS = target_spec
         self.solve_args = solve_args
 
-        (observed_target,
-         cov_target,
-         regress_target_score) = target_spec[:3]
-
-        self.observed_target = observed_target
-        self.cov_target = cov_target
-        self.prec_target = np.linalg.inv(cov_target)
-        self.regress_target_score = regress_target_score
-
-        self.cond_mean = query.cond_mean
-        self.cond_cov = query.cond_cov
-        self.cond_precision = np.linalg.inv(self.cond_cov)
-        self.opt_linear = query.opt_linear
-
-        self.linear_part = query.linear_part
-        self.offset = query.offset
-
-        self.M1 = query.M1
-        self.M2 = query.M2
-        self.M3 = query.M3
-        self.observed_soln = query.observed_opt_state
-
-        self.observed_score = query.observed_score_state + query.observed_subgrad
-
-        G = mle_inference(query,
+        G = mle_inference(query_spec,
                           target_spec,
                           solve_args=solve_args)
 
         result, self.inverse_info, self.log_ref = G.solve_estimating_eqn()
 
-        self.ntarget = self.cov_target.shape[0]
-        self.nopt = self.cond_precision.shape[0]
-
+        self.ntarget = TS.cov_target.shape[0]
+        self.nopt = QS.cond_cov.shape[0]
 
         self.initial_estimate = np.asarray(result['MLE'])
         self.dispersion = dispersion
@@ -83,7 +60,7 @@ def __init__(self,
         ### Note for an informative prior we might want to change this...
         self.prior = prior
 
-        self._set_marginal_parameters()
+        self._get_marginal_parameters()
 
     def log_posterior(self,
                       target_parameter,
@@ -99,30 +76,39 @@ def log_posterior(self,
             Noise standard deviation.
         """
 
+        QS = self.query_spec
+        TS = self.target_spec
+        
+        (prec_marginal,
+         linear_coef,
+         offset_coef,
+         r,
+         S,
+         prec_target_nosel) = self._get_marginal_parameters()
+        
         sigmasq = sigma ** 2
 
-        target = self.S.dot(target_parameter) + self.r
+        target = S.dot(target_parameter) + r
 
-        mean_marginal = self.linear_coef.dot(target) + self.offset_coef
-        prec_marginal = self.prec_marginal
+        mean_marginal = linear_coef.dot(target) + offset_coef
         conjugate_marginal = prec_marginal.dot(mean_marginal)
 
         solver = solve_barrier_affine_py
 
         val, soln, hess = solver(conjugate_marginal,
                                  prec_marginal,
-                                 self.observed_soln,
-                                 self.linear_part,
-                                 self.offset,
+                                 QS.observed_soln,
+                                 QS.linear_part,
+                                 QS.offset,
                                  **self.solve_args)
 
         log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2.
 
-        log_lik = -((self.observed_target - target).T.dot(self.prec_target_nosel).dot(self.observed_target - target)) / 2. \
+        log_lik = -((TS.observed_target - target).T.dot(prec_target_nosel).dot(TS.observed_target - target)) / 2. \
                   - log_normalizer
 
-        grad_lik = self.S.T.dot(self.prec_target_nosel.dot(self.observed_target) - self.prec_target_nosel.dot(target)
-                                - self.linear_coef.T.dot(prec_marginal.dot(soln) - conjugate_marginal))
+        grad_lik = S.T.dot(prec_target_nosel.dot(TS.observed_target) - prec_target_nosel.dot(target)
+                                - linear_coef.T.dot(prec_marginal.dot(soln) - conjugate_marginal))
 
         log_prior, grad_prior = self.prior(target_parameter)
 
@@ -134,7 +120,7 @@ def log_posterior(self,
 
     ### Private method
 
-    def _set_marginal_parameters(self):
+    def _get_marginal_parameters(self):
         """
         This works out the implied covariance
         of optimization varibles as a function
@@ -142,33 +128,43 @@ def _set_marginal_parameters(self):
         implied mean as a function of the true parameters.
         """
 
-        T1 = self.regress_target_score.T.dot(self.prec_target)
-        T2 = T1.T.dot(self.M2.dot(T1))
-        T3 = T1.T.dot(self.M3.dot(T1))
-        T4 = self.M1.dot(self.opt_linear).dot(self.cond_cov).dot(self.opt_linear.T.dot(self.M1.T.dot(T1)))
-        T5 = T1.T.dot(self.M1.dot(self.opt_linear))
+        QS = self.query_spec
+        TS = self.target_spec
+
+        prec_target = np.linalg.inv(TS.cov_target)
+        cond_precision = np.linalg.inv(QS.cond_cov)
+        
+        U1 = TS.regress_target_score.T.dot(prec_target)
+        U2 = U1.T.dot(QS.M2.dot(U1))
+        U3 = U1.T.dot(QS.M3.dot(U1))
+        U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
+        U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
 
-        prec_target_nosel = self.prec_target + T2 - T3
+        prec_target_nosel = prec_target + U2 - U3
 
-        _P = -(T1.T.dot(self.M1.dot(self.observed_score)) + T2.dot(self.observed_target))
+        _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(TS.observed_target))
 
-        bias_target = self.cov_target.dot(T1.T.dot(-T4.dot(self.observed_target) + self.M1.dot(self.opt_linear.dot(self.cond_mean))) - _P)
+        bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target) +
+                                                 QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
 
         ###set parameters for the marginal distribution of optimization variables
 
-        _Q = np.linalg.inv(prec_target_nosel + T3)
-        self.prec_marginal = self.cond_precision - T5.T.dot(_Q).dot(T5)
-        self.linear_coef = self.cond_cov.dot(T5.T)
-        self.offset_coef = self.cond_mean - self.linear_coef.dot(self.observed_target)
+        _Q = np.linalg.inv(prec_target_nosel + U3)
+        prec_marginal = cond_precision - U5.T.dot(_Q).dot(U5)
+        linear_coef = QS.cond_cov.dot(U5.T)
+        offset_coef = QS.cond_mean - linear_coef.dot(TS.observed_target)
 
         ###set parameters for the marginal distribution of target
 
-        r = np.linalg.inv(prec_target_nosel).dot(self.prec_target.dot(bias_target))
-        S = np.linalg.inv(prec_target_nosel).dot(self.prec_target)
+        r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
+        S = np.linalg.inv(prec_target_nosel).dot(prec_target)
 
-        self.r = r
-        self.S = S
-        self.prec_target_nosel = prec_target_nosel
+        return (prec_marginal,
+                linear_coef,
+                offset_coef,
+                r,
+                S,
+                prec_target_nosel)
 
 ### sampling methods
 
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index bd3a1bcd1..610d177ff 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -37,7 +37,7 @@ class QuerySpec(NamedTuple):
     observed_soln : np.ndarray
     observed_score : np.ndarray
     
-class query(object):
+class gaussian_query(object):
     r"""
     This class is the base of randomized selective inference
     based on convex programs.
@@ -112,19 +112,19 @@ def set_sampler(self, sampler):
 
     sampler = property(get_sampler, set_sampler, doc='Sampler of optimization (augmented) variables.')
 
-    # implemented by subclasses
+    # # implemented by subclasses
 
-    def solve(self):
+    # def solve(self):
 
-        raise NotImplementedError('abstract method')
+    #     raise NotImplementedError('abstract method')
 
 
-class gaussian_query(query):
+# class gaussian_query(query):
 
-    """
-    A class with Gaussian perturbation to the objective -- 
-    easy to apply CLT to such things
-    """
+#     """
+#     A class with Gaussian perturbation to the objective -- 
+#     easy to apply CLT to such things
+#     """
 
     def fit(self, perturb=None):
 
@@ -259,64 +259,65 @@ def inference(self,
                              level=level)
 
         elif method == 'posterior':
-            return self.posterior(target_spec,
-                                  **method_args)[1]
+            return _posterior(query_spec,
+                              target_spec,
+                              **method_args)[1]
 
                                               
-    def posterior(self,
-                  target_spec,
-                  level=0.90,
-                  dispersion=1,
-                  prior=None,
-                  solve_args={'tol': 1.e-12},
-                  nsample=2000,
-                  nburnin=500):
-        """
+def _posterior(query_spec,
+               target_spec,
+               level=0.90,
+               dispersion=1,
+               prior=None,
+               solve_args={'tol': 1.e-12},
+               nsample=2000,
+               nburnin=500):
+    """
 
-        Parameters
-        ----------
-        target_spec : TargetSpec
-            Information needed to specify the target.
-        level : float
-            Level for credible interval.
-        dispersion : float, optional
-            Dispersion parameter for log-likelihood.
-        prior : callable
-            A callable object that takes a single argument
-            `parameter` of the same shape as `observed_target`
-            and returns (value of log prior, gradient of log prior)
-        solve_args : dict, optional
-            Arguments passed to solver.
+    Parameters
+    ----------
+    target_spec : TargetSpec
+        Information needed to specify the target.
+    level : float
+        Level for credible interval.
+    dispersion : float, optional
+        Dispersion parameter for log-likelihood.
+    prior : callable
+        A callable object that takes a single argument
+        `parameter` of the same shape as `observed_target`
+        and returns (value of log prior, gradient of log prior)
+    solve_args : dict, optional
+        Arguments passed to solver.
 
-        """
+    """
+
+    if prior is None:
+        Di = 1. / (200 * np.diag(target_spec.cov_target))
+
+        def prior(target_parameter):
+            grad_prior = -target_parameter * Di
+            log_prior = -0.5 * np.sum(target_parameter ** 2 * Di)
+            return log_prior, grad_prior
+
+    posterior_repr =  posterior(query_spec,
+                                target_spec,
+                                dispersion,
+                                prior,
+                                solve_args=solve_args)
+
+    samples = langevin_sampler(posterior_repr,
+                               nsample=nsample,
+                               nburnin=nburnin)
+
+    delta = 0.5 * (1 - level) * 100
+    lower = np.percentile(samples, delta, axis=0)
+    upper = np.percentile(samples, 100 - delta, axis=0)
+    mean = np.mean(samples, axis=0)
+
+    return samples, pd.DataFrame({'estimate':mean,
+                                  'lower_credible':lower,
+                                  'upper_credible':upper})
 
-        if prior is None:
-            Di = 1. / (200 * np.diag(target_spec.cov_target))
-
-            def prior(target_parameter):
-                grad_prior = -target_parameter * Di
-                log_prior = -0.5 * np.sum(target_parameter ** 2 * Di)
-                return log_prior, grad_prior
-
-        posterior_repr =  posterior(self,
-                                    target_spec,
-                                    dispersion,
-                                    prior,
-                                    solve_args=solve_args)
-        
-        samples = langevin_sampler(posterior_repr,
-                                   nsample=nsample,
-                                   nburnin=nburnin)
-
-        delta = 0.5 * (1 - level) * 100
-        lower = np.percentile(samples, delta, axis=0)
-        upper = np.percentile(samples, 100 - delta, axis=0)
-        mean = np.mean(samples, axis=0)
-
-        return samples, pd.DataFrame({'estimate':mean,
-                                      'lower_credible':lower,
-                                      'upper_credible':upper})
-        
 
 
 

From 53645b7a2281532197e18f8ea3ae184e932cec57 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Tue, 23 Nov 2021 07:45:28 -0500
Subject: [PATCH 171/187] adjusted cov in split_lasso; adjusted intervals for
 actual target in exact_ref

---
 selectinf/randomized/base.py            | 13 +++++++++----
 selectinf/randomized/exact_reference.py |  5 ++---
 selectinf/randomized/lasso.py           | 10 ++++++----
 selectinf/randomized/query.py           | 14 --------------
 4 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index b3aa8332b..16773807b 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -195,16 +195,21 @@ def _intervals(self,
             # construction of intervals from families follows `selectinf.learning.core`
             family = self._families[m]
             observed_target = TS.observed_target[m]
+            unbiased_est = (observed_target - r[m][0]) * (1./(S[m][0,0]))
 
-            l, u = family.equal_tailed_interval(observed_target,
-                                                alpha=1 - level)
+            _l, _u = family.equal_tailed_interval(observed_target,
+                                                  alpha=1 - level)
+            l = _l * (1./(S[m][0,0]))
+            u = _u * (1./(S[m][0,0]))
 
             var_target = 1. / (precs[m][0, 0])
 
             # JT: I think these should cover S \theta^* + r not theta^*
 
-            lower.append(l * var_target + observed_target)
-            upper.append(u * var_target + observed_target)
+            #lower.append(l * var_target + observed_target)
+            #upper.append(u * var_target + observed_target)
+            lower.append(l * var_target + unbiased_est)
+            upper.append(u * var_target + unbiased_est)
 
         return np.asarray(lower), np.asarray(upper)
 
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index 8a3b51b6e..dbc7711da 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -1,10 +1,9 @@
 from __future__ import division, print_function
 
-import numpy as np, pandas as pd
+import numpy as np
 from scipy.stats import norm as ndist
 
 from ..distributions.discrete_family import discrete_family
-from .selective_MLE import mle_inference
 from .base import grid_inference
 
 class exact_grid_inference(grid_inference):
@@ -16,7 +15,7 @@ def log_reference(self,
                       grid):
 
         QS = self.query_spec
-        TS = self.target_spec
+        TS = self.target_spec ## we don't use this; it seems that we have already formed the target_specific elements which we input as arguments for this functions
 
         if np.asarray(observed_target).shape in [(), (0,)]:
             raise ValueError('no target specified')
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 26fecf91e..6f71819d0 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -788,11 +788,13 @@ def _setup_implied_gaussian(self,
         regress_opt[:, ordered_vars] = -cond_cov * signs[None, :] / (dispersion * ratio)
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
-        prod_score_prec = np.identity(self.nfeature) / ratio
-        
-        cov_rand = self._unscaled_cov_score * dispersion
+        ## probably missing a dispersion in the denominator
+        prod_score_prec_unnorm = np.identity(self.nfeature) / (dispersion * ratio)
+
+        ## probably missing a multiplicative factor of ratio
+        cov_rand = self._unscaled_cov_score * (dispersion * ratio)
 
-        M1 = prod_score_prec * dispersion
+        M1 = prod_score_prec_unnorm * dispersion
         M2 = M1.dot(cov_rand).dot(M1.T)
         M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
     
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 610d177ff..f67ba3ec1 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -112,20 +112,6 @@ def set_sampler(self, sampler):
 
     sampler = property(get_sampler, set_sampler, doc='Sampler of optimization (augmented) variables.')
 
-    # # implemented by subclasses
-
-    # def solve(self):
-
-    #     raise NotImplementedError('abstract method')
-
-
-# class gaussian_query(query):
-
-#     """
-#     A class with Gaussian perturbation to the objective -- 
-#     easy to apply CLT to such things
-#     """
-
     def fit(self, perturb=None):
 
         # take a new perturbation if supplied

From dad81a2674c2e0c94111ec17399dab5181502d5a Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Thu, 25 Nov 2021 16:03:13 -0500
Subject: [PATCH 172/187] minor fix in return list

---
 selectinf/randomized/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index 16773807b..9c05ca7b4 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -151,7 +151,7 @@ def _pivots(self,
         if not hasattr(self, "_families"):
             self._construct_density() # generic
             self._construct_families() # specific to the method
-        precs, S, r = self.conditional_spec
+        precs, S, r, _ = self.conditional_spec
 
         if alternatives is None:
             alternatives = ['twosided'] * self.ntarget

From 40cd77b2d70e433dacc4755560407aafac051c25 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 29 Nov 2021 22:41:31 -0800
Subject: [PATCH 173/187] fixing approx_reference

---
 selectinf/randomized/approx_reference.py | 65 +++++++++++++++++-------
 selectinf/randomized/base.py             |  9 ++--
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 0e70b80d0..dd27e98b3 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -1,3 +1,4 @@
+
 from __future__ import division, print_function
 
 import numpy as np, pandas as pd
@@ -13,7 +14,8 @@ def __init__(self,
                  query_spec,
                  target_spec,
                  solve_args={'tol': 1.e-12},
-                 useIP=False):
+                 ngrid=1000,
+                 ncoarse=40):
 
         """
         Produce p-values and confidence intervals for targets
@@ -38,16 +40,7 @@ def __init__(self,
                                 target_spec,
                                 solve_args=solve_args)
 
-        if useIP:
-            ngrid = 60
-            self.stat_grid = np.zeros((ntarget, ngrid))
-            for j in range(ntarget):
-                self.stat_grid[j, :] = np.linspace(observed_target[j] - 1.5 * _scale[j],
-                                                   observed_target[j] + 1.5 * _scale[j],
-                                                   num=ngrid)
-
-
-        self.useIP = useIP
+        self.ncoarse = ncoarse
 
     def _approx_log_reference(self,
                               observed_target,
@@ -98,7 +91,19 @@ def _construct_families(self):
         precs, S, r, T = self.conditional_spec
 
         self._families = []
-        _log_ref = np.zeros((self.ntarget, 1000))
+
+        if self.ncoarse is not None:
+            coarse_grid = np.zeros((self.stat_grid.shape[0], self.ncoarse))
+            for j in range(coarse_grid.shape[0]):
+                coarse_grid[j,:] = np.linspace(self.stat_grid[j].min(),
+                                               self.stat_grid[j].max(),
+                                               self.ncoarse)
+            eval_grid = coarse_grid
+        else:
+            eval_grid = self.stat_grid
+            
+        _log_ref = np.zeros((self.ntarget, self.stat_grid[0].shape[0]))
+
         for m in range(self.ntarget):
 
             observed_target_uni = (TS.observed_target[m]).reshape((1,))
@@ -109,9 +114,9 @@ def _construct_families(self):
             approx_log_ref = self._approx_log_reference(observed_target_uni,
                                                         cov_target_uni,
                                                         T[m],
-                                                        self.stat_grid[m])
-
-            if self.useIP == False:
+                                                        eval_grid[m])
+            
+            if self.ncoarse is None:
 
                 logW = (approx_log_ref - 0.5 * (self.stat_grid[m] - TS.observed_target[m]) ** 2 / var_target)
                 logW -= logW.max()
@@ -120,17 +125,41 @@ def _construct_families(self):
                                                       np.exp(logW)))
             else:
 
-                approx_fn = interp1d(self.stat_grid[m],
+                approx_fn = interp1d(eval_grid[m],
                                      approx_log_ref,
                                      kind='quadratic',
                                      bounds_error=False,
                                      fill_value='extrapolate')
 
-                grid = np.linspace(self.stat_grid[m].min(), self.stat_grid[m].max(), 1000)
+                grid = self.stat_grid[m]
                 logW = (approx_fn(grid) -
-                        0.5 * (grid - self.observed_target[m]) ** 2 / var_target)
+                        0.5 * (grid - TS.observed_target[m]) ** 2 / var_target)
 
                 logW -= logW.max()
+
+                DEBUG = False # JT: this can be removed 
+                if DEBUG:
+                    approx_log_ref2 = self._approx_log_reference(observed_target_uni,
+                                                                 cov_target_uni,
+                                                                 T[m],
+                                                                 grid)
+                    logW2 = (approx_log_ref2 - 0.5 * (grid - TS.observed_target[m]) ** 2 / var_target)
+                    logW2 -= logW2.max()
+                    import matplotlib.pyplot as plt
+                    plt.plot(grid, logW, label='extrapolated')
+
+                    plt.plot(grid, logW2, label='fine grid')
+                    plt.legend()
+
+                    plt.figure(num=2)
+                    plt.plot(eval_grid[m], approx_fn(eval_grid[m]), label='extrapolated coarse')
+                    plt.plot(grid, approx_fn(grid), label='extrapolated fine')
+                    plt.plot(grid, approx_log_ref2, label='fine grid')
+                    plt.legend()
+
+                    plt.show()
+                    stop
+
                 _log_ref[m, :] = logW
                 self._families.append(discrete_family(grid,
                                                       np.exp(logW)))
diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index 9c05ca7b4..279e94dd7 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -22,7 +22,8 @@ class grid_inference(object):
     def __init__(self,
                  query_spec,
                  target_spec,
-                 solve_args={'tol': 1.e-12}):
+                 solve_args={'tol': 1.e-12},
+                 ngrid=1000):
 
         """
         Produce p-values and confidence intervals for targets
@@ -45,6 +46,7 @@ def __init__(self,
         self.query_spec = query_spec
         self.target_spec = target_spec
         self.solve_args = solve_args
+        self.ngrid = ngrid
 
         G = mle_inference(query_spec,
                           target_spec,
@@ -57,7 +59,6 @@ def __init__(self,
         _scale = 4 * np.sqrt(np.diag(inverse_info))
         self.inverse_info = inverse_info
 
-        ngrid = 1000
         self.stat_grid = np.zeros((ntarget, ngrid))
         for j in range(ntarget):
             self.stat_grid[j, :] = np.linspace(TS.observed_target[j] - 1.5 * _scale[j],
@@ -204,10 +205,6 @@ def _intervals(self,
 
             var_target = 1. / (precs[m][0, 0])
 
-            # JT: I think these should cover S \theta^* + r not theta^*
-
-            #lower.append(l * var_target + observed_target)
-            #upper.append(u * var_target + observed_target)
             lower.append(l * var_target + unbiased_est)
             upper.append(u * var_target + unbiased_est)
 

From 1508e06e57815c5add6f6ffe25149ea111dfc14d Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 6 Dec 2021 22:38:33 -0800
Subject: [PATCH 174/187] refactor SLOPE to new form; added split_slope

---
 selectinf/randomized/slope.py | 422 +++++++++++++++++++++++++++++++++-
 1 file changed, 412 insertions(+), 10 deletions(-)

diff --git a/selectinf/randomized/slope.py b/selectinf/randomized/slope.py
index b7ede0954..3015c8259 100644
--- a/selectinf/randomized/slope.py
+++ b/selectinf/randomized/slope.py
@@ -104,10 +104,10 @@ def fit(self,
 
         _active_signs = active_signs.copy()
         self.selection_variable = {'sign': _active_signs,
-                                   'variables': self._overall}
+                                   'variables': np.nonzero(self._overall)[0]}
 
 
-        indices = np.argsort(-np.fabs(self.observed_soln))
+        indices = self.selection_variable['indices'] = np.argsort(-np.fabs(self.observed_soln))
         sorted_soln = self.observed_soln[indices]
         initial_scalings = np.sort(np.unique(np.fabs(self.observed_soln[active])))[::-1]
         self.observed_opt_state = initial_scalings
@@ -150,6 +150,7 @@ def fit(self,
                     break
 
         signs_cluster = np.asarray(signs_cluster).T
+        self.selection_variable['signs_cluster'] = signs_cluster
 
         if signs_cluster.size == 0:
             return active_signs
@@ -158,8 +159,6 @@ def fit(self,
             X_clustered = X[:, indices].dot(signs_cluster)
             _opt_linear_term = X.T.dot(X_clustered)
 
-            _, prec = self.randomizer.cov_prec
-
             # now make the constraints
 
             self._setup = True
@@ -170,13 +169,21 @@ def fit(self,
             A_scaling = np.vstack([A_scaling_0, A_scaling_1])
             b_scaling = np.zeros(2 * self.num_opt_var - 1)
 
-            self._setup_sampler(A_scaling,
-                                b_scaling,
-                                _opt_linear_term,
-                                self.observed_subgrad)
-
+            self._setup_sampler_data = (A_scaling,
+                                        b_scaling,
+                                        _opt_linear_term,
+                                        self.observed_subgrad)
+            self.opt_linear = _opt_linear_term
             return active_signs
 
+    def setup_inference(self,
+                        dispersion):
+
+        if self.num_opt_var > 0:
+            self._setup_sampler(*self._setup_sampler_data,
+                                dispersion=dispersion)
+
+
     # Targets of inference
     # and covariance with score representation
     # are same as LASSO
@@ -187,7 +194,7 @@ def gaussian(X,
                  slope_weights,
                  sigma=1.,
                  quadratic=None,
-                 ridge_term=0.,
+                 ridge_term=None,
                  randomizer_scale=None):
 
         loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic)
@@ -207,6 +214,401 @@ def gaussian(X,
                      ridge_term, 
                      randomizer)
 
+# split SLOPE
+
+class split_slope(lasso):
+
+    """
+    Data split, then LASSO (i.e. data carving)
+    """
+
+    def __init__(self,
+                 loglike,
+                 slope_weights,
+                 proportion_select,
+                 ridge_term=0,
+                 perturb=None,
+                 estimate_dispersion=True):
+
+        (self.loglike,
+         self.slope_weights,
+         self.proportion_select,
+         self.ridge_term) = (loglike,
+                             slope_weights,
+                             proportion_select,
+                             ridge_term)
+
+        self.nfeature = p = self.loglike.shape[0]
+        self.penalty = rr.slope(slope_weights, lagrange=1.)
+        self._initial_omega = perturb  # random perturbation
+        self.estimate_dispersion = estimate_dispersion
+
+    def fit(self,
+            solve_args={'tol': 1.e-12, 'min_its': 50},
+            perturb=None):
+
+        signs = slope.fit(self, 
+                          solve_args=solve_args,
+                          perturb=perturb)
+        
+        # for data splitting randomization,
+        # we need to estimate a dispersion parameter
+
+        # we then setup up the sampler again
+        df_fit = len(self.selection_variable['variables'])
+
+        if self.estimate_dispersion:
+
+            X, y = self.loglike.data
+            n, p = X.shape
+
+            dispersion = 2 * (self.loglike.smooth_objective(self._beta_full,
+                                                            'func') /
+                              (n - df_fit))
+
+            self.dispersion_ = dispersion
+            # run setup again after
+            # estimating dispersion
+
+        self.df_fit = df_fit
+
+        return signs
+
+
+    def setup_inference(self,
+                        dispersion):
+
+        if self.df_fit > 0:
+
+            if dispersion is None:
+                self._setup_sampler(*self._setup_sampler_data,
+                                    dispersion=self.dispersion_)
+
+            else:
+                self._setup_sampler(*self._setup_sampler_data,
+                                    dispersion=dispersion)
+
+    def _setup_implied_gaussian(self, 
+                                opt_linear, 
+                                observed_subgrad,
+                                dispersion=1):
+
+        # key observation is that the covariance of the added noise is 
+        # roughly dispersion * (1 - pi) / pi * X^TX (in OLS regression, similar for other
+        # models), so the precision is  (X^TX)^{-1} * (pi / ((1 - pi) * dispersion))
+        # and prec.dot(opt_linear) = S_E / (dispersion * (1 - pi) / pi)
+        # because opt_linear has shape p x E with the columns
+        # being those non-zero columns of the solution. Above S_E = np.diag(signs)
+        # the conditional precision is S_E Q[E][:,E] * pi / ((1 - pi) * dispersion) S_E
+        # and regress_opt is -Q[E][:,E]^{-1} S_E
+        # padded with zeros
+        # to be E x p
+
+        pi_s = self.proportion_select
+        ratio = (1 - pi_s) / pi_s
+
+        ordered_vars = self.selection_variable['variables']
+        indices = self.selection_variable['indices']
+        signs_cluster = self.selection_variable['signs_cluster']
+        
+        # JT: this may be expensive to form -- not pxp but large
+        cond_precision = signs_cluster.T.dot(self.opt_linear[indices] / (dispersion * ratio))
+
+        assert(np.linalg.norm(cond_precision - cond_precision.T) / 
+               np.linalg.norm(cond_precision) < 1.e-6)
+        cond_cov = np.linalg.inv(cond_precision)
+        regress_opt = np.zeros((len(ordered_vars),
+                                   self.nfeature)) 
+        # JT: not sure this is right -- had to remove signs
+        regress_opt[:, ordered_vars] = -cond_cov / (dispersion * ratio)
+        cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
+
+        ## probably missing a dispersion in the denominator
+        prod_score_prec_unnorm = np.identity(self.nfeature) / (dispersion * ratio)
+
+        ## probably missing a multiplicative factor of ratio
+        cov_rand = self._unscaled_cov_score * (dispersion * ratio)
+
+        M1 = prod_score_prec_unnorm * dispersion
+        M2 = M1.dot(cov_rand).dot(M1.T)
+        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
+    
+        # would be nice to not store these?
+        
+        self.M1 = M1  
+        self.M2 = M2
+        self.M3 = M3
+
+        return (cond_mean,
+                cond_cov,
+                cond_precision,
+                M1,
+                M2,
+                M3)
+
+    def _solve_randomized_problem(self, 
+                                  # optional binary vector 
+                                  # indicating selection data 
+                                  perturb=None, 
+                                  solve_args={'tol': 1.e-12, 'min_its': 50}):
+
+        # take a new perturbation if none supplied
+        if perturb is not None:
+            self._selection_idx = perturb
+        if not hasattr(self, "_selection_idx"):
+            X, y = self.loglike.data
+            total_size = n = X.shape[0]
+            pi_s = self.proportion_select
+            self._selection_idx = np.zeros(n, np.bool)
+            self._selection_idx[:int(pi_s*n)] = True
+            np.random.shuffle(self._selection_idx)
+
+        inv_frac = 1 / self.proportion_select
+        quad = rr.identity_quadratic(self.ridge_term,
+                                     0,
+                                     0,
+                                     0)
+        
+        randomized_loss = self.loglike.subsample(self._selection_idx)
+        randomized_loss.coef *= inv_frac
+
+        problem = rr.simple_problem(randomized_loss, self.penalty)
+        observed_soln = problem.solve(quad, **solve_args) 
+        observed_subgrad = -(randomized_loss.smooth_objective(observed_soln,
+                                                             'grad') +
+                            quad.objective(observed_soln, 'grad'))
+
+        return observed_soln, observed_subgrad
+
+    @staticmethod
+    def gaussian(X,
+                 Y,
+                 slope_weights,
+                 proportion,
+                 sigma=1.,
+                 quadratic=None,
+                 estimate_dispersion=True):
+        r"""
+        Squared-error LASSO with feature weights.
+        Objective function is (before randomization)
+
+        .. math::
+
+            \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + 
+           \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\lambda$ is `slope_weights`. The ridge term
+        is determined by the Hessian and `np.std(Y)` by default.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        Y : ndarray
+            Shape (n,) -- the response.
+
+        slope_weights: [float, sequence]
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        sigma : float (optional)
+            Noise variance. Set to 1 if `covariance_estimator` is not None.
+            This scales the loglikelihood by `sigma**(-2)`.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.slope.slope`
+
+        """
+
+        loglike = rr.glm.gaussian(X, 
+                                  Y, 
+                                  coef=1. / sigma ** 2, 
+                                  quadratic=quadratic)
+
+        return split_slope(loglike, 
+                           np.asarray(slope_weights)/sigma**2,
+                           proportion,
+                           estimate_dispersion=estimate_dispersion)
+
+
+    @staticmethod
+    def logistic(X,
+                 successes,
+                 slope_weights,
+                 proportion,
+                 trials=None,
+                 quadratic=None):
+        r"""
+        Logistic LASSO with feature weights (before randomization)
+
+        .. math::
+
+             \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\ell$ is the negative of the logistic
+        log-likelihood (half the logistic deviance)
+        and $\lambda$ is `slope_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        successes : ndarray
+            Shape (n,) -- response vector. An integer number of successes.
+            For data that is proportions, multiply the proportions
+            by the number of trials first.
+
+        slope_weights: [float, sequence]
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        trials : ndarray (optional)
+            Number of trials per response, defaults to
+            ones the same shape as Y.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.slope.slope`
+
+        """
+
+        loglike = rr.glm.logistic(X,
+                                  successes,
+                                  trials=trials,
+                                  quadratic=quadratic)
+
+        return split_slope(loglike, 
+                           np.asarray(slope_weights),
+                           proportion)
+
+    @staticmethod
+    def coxph(X,
+              times,
+              status,
+              slope_weights,
+              proportion,
+              quadratic=None):
+        r"""
+        Cox proportional hazards LASSO with feature weights.
+        Objective function is (before randomization)
+
+        .. math::
+
+            \beta \mapsto \ell^{\text{Cox}}(\beta) + 
+            \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\ell^{\text{Cox}}$ is the
+        negative of the log of the Cox partial
+        likelihood and $\lambda$ is `slope_weights`.
+        Uses Efron's tie breaking method.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        times : ndarray
+            Shape (n,) -- the survival times.
+
+        status : ndarray
+            Shape (n,) -- the censoring status.
+
+        slope_weights: [float, sequence]
+
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.slope.slope`
+
+        """
+        n, p = X.shape
+        loglike = rr.glm.cox(X, times, status, quadratic=quadratic)
+
+        return split_slope(loglike, 
+                           np.asarray(slope_weights),
+                           proportion)
+
+    @staticmethod
+    def poisson(X,
+                counts,
+                slope_weights,
+                proportion,
+                quadratic=None,
+                ridge_term=0):
+        r"""
+        Poisson log-linear LASSO with feature weights.
+        Objective function is (before randomization)
+
+        .. math::
+
+            \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\ell^{\text{Poisson}}$ is the negative
+        of the log of the Poisson likelihood (half the deviance)
+        and $\lambda$ is `slope_weights`.
+
+        Parameters
+        ----------
+
+        X : ndarray
+            Shape (n,p) -- the design matrix.
+
+        counts : ndarray
+            Shape (n,) -- the response.
+
+        slope_weights: [float, sequence]
+
+        proportion: float
+            What proportion of data to use for selection.
+ 
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        Returns
+        -------
+
+        L : `selection.randomized.slope.slope`
+
+        """
+        loglike = rr.glm.poisson(X, counts, quadratic=quadratic)
+
+        return split_slope(loglike, 
+                           np.asarray(slope_weights),
+                           proportion)
+
+
+
 # Projection onto selected subgradients of SLOPE
 
 def _projection_onto_selected_subgradients(prox_arg,

From 8d05d911845bfec4d2e0032ff3f93e62f99e8625 Mon Sep 17 00:00:00 2001
From: Snigdha Panigrahi <psnigdha@stat-psnigdha-mbp.local>
Date: Mon, 13 Dec 2021 21:27:49 -0500
Subject: [PATCH 175/187] moved  U1-5 calculations from methods to base

---
 selectinf/base.py                           | 15 ++++++
 selectinf/randomized/base.py                | 16 +++---
 selectinf/randomized/posterior_inference.py | 27 +++++++---
 selectinf/randomized/selective_MLE.py       | 55 ++++++++++-----------
 4 files changed, 71 insertions(+), 42 deletions(-)

diff --git a/selectinf/base.py b/selectinf/base.py
index 51c09ba85..9371d62d3 100644
--- a/selectinf/base.py
+++ b/selectinf/base.py
@@ -269,3 +269,18 @@ def _pearsonX2(y,
     n = y.shape[0]
     resid = y - loglike.saturated_loss.mean_function(linpred)
     return (resid ** 2 / W).sum() / (n - df_fit)
+
+def target_query_Interactspec(query_spec,
+                              regress_target_score,
+                              cov_target):
+
+    QS = query_spec
+    prec_target = np.linalg.inv(cov_target)
+
+    U1 = regress_target_score.T.dot(prec_target)
+    U2 = U1.T.dot(QS.M2.dot(U1))
+    U3 = U1.T.dot(QS.M3.dot(U1))
+    U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
+    U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
+
+    return U1, U2, U3, U4, U5
\ No newline at end of file
diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index 279e94dd7..cdaf21ca5 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -2,6 +2,7 @@
 import numpy as np, pandas as pd
 
 from .selective_MLE import mle_inference
+from ..base import target_query_Interactspec
 
 class ConditionalSpec(NamedTuple):
 
@@ -230,14 +231,13 @@ def _construct_density(self):
         for m in range(self.ntarget):
             observed_target_uni = (TS.observed_target[m]).reshape((1,))
             cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
-            prec_target = 1. / cov_target_uni
             regress_target_score_uni = TS.regress_target_score[m, :].reshape((1, p))
 
-            U1 = regress_target_score_uni.T.dot(prec_target)
-            U2 = U1.T.dot(QS.M2.dot(U1))
-            U3 = U1.T.dot(QS.M3.dot(U1))
-            U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
-            U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
+            U1, U2, U3, U4, U5 = target_query_Interactspec(QS,
+                                                           regress_target_score_uni,
+                                                           cov_target_uni)
+
+            prec_target = 1. / cov_target_uni
 
             # JT: what is _T?
             _T = QS.cond_cov.dot(U5.T)
@@ -265,3 +265,7 @@ def _construct_density(self):
 
         return self.conditional_spec
 
+
+
+
+
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 7ab09195b..256a5ae78 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -8,6 +8,7 @@
 
 from ..algorithms.barrier_affine import solve_barrier_affine_py
 from .selective_MLE import mle_inference
+from ..base import target_query_Interactspec
 
 class PosteriorAtt(typing.NamedTuple):
 
@@ -131,14 +132,12 @@ def _get_marginal_parameters(self):
         QS = self.query_spec
         TS = self.target_spec
 
+        U1, U2, U3, U4, U5 = target_query_Interactspec(QS,
+                                                       TS.regress_target_score,
+                                                       TS.cov_target)
+
         prec_target = np.linalg.inv(TS.cov_target)
         cond_precision = np.linalg.inv(QS.cond_cov)
-        
-        U1 = TS.regress_target_score.T.dot(prec_target)
-        U2 = U1.T.dot(QS.M2.dot(U1))
-        U3 = U1.T.dot(QS.M3.dot(U1))
-        U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
-        U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
 
         prec_target_nosel = prec_target + U2 - U3
 
@@ -282,3 +281,19 @@ def __next__(self):
                 self.state[:] = candidate
                 break
         return self.state
+
+
+def target_query_Interactspec(query_spec,
+                              regress_target_score,
+                              cov_target):
+
+    QS = query_spec
+    prec_target = np.linalg.inv(cov_target)
+
+    U1 = regress_target_score.T.dot(prec_target)
+    U2 = U1.T.dot(QS.M2.dot(U1))
+    U3 = U1.T.dot(QS.M3.dot(U1))
+    U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
+    U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
+
+    return U1, U2, U3, U4, U5
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index 76bd8907b..0fff47de6 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -4,6 +4,7 @@
 from scipy.stats import norm as ndist
 from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from ..algorithms.barrier_affine import solve_barrier_affine_py
+from ..base import target_query_Interactspec
 
 class mle_inference(object):
 
@@ -21,11 +22,21 @@ def solve_estimating_eqn(self,
                              useC=False,
                              level=0.90):
 
-        prec_target_nosel, bias_target, U3, U5 = _setup_estimating_eqn(self.query_spec,
-                                                                       self.target_spec)
-
         QS = self.query_spec
         TS = self.target_spec
+
+        U1, U2, U3, U4, U5= target_query_Interactspec(QS,
+                                                      TS.regress_target_score,
+                                                      TS.cov_target)
+
+        prec_target = np.linalg.inv(TS.cov_target)
+
+        prec_target_nosel = prec_target + U2 - U3
+
+        _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(TS.observed_target))
+
+        bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target)
+                                                 + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
         
         cond_precision = np.linalg.inv(QS.cond_cov)
         conjugate_arg = cond_precision.dot(QS.cond_mean)
@@ -90,33 +101,17 @@ def solve_estimating_eqn(self,
 
         return result, observed_info_mean, log_ref
 
-def _setup_estimating_eqn(query_spec,
-                          target_spec):
-
-        QS = query_spec
-        TS = target_spec
-
-        prec_target = np.linalg.inv(TS.cov_target)
-        U1 = TS.regress_target_score.T.dot(prec_target)
-        U2 = U1.T.dot(QS.M2.dot(U1))
-        U3 = U1.T.dot(QS.M3.dot(U1))
-        U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
-        U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
-
-        prec_target_nosel = prec_target + U2 - U3
-
-        _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(TS.observed_target))
-
-        bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target)
-                                                   + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
-
-        return prec_target_nosel, bias_target, U3, U5
-
-
-
-
-
-
+def target_query_Interactspec(query_spec,
+                              regress_target_score,
+                              cov_target):
 
+    QS = query_spec
+    prec_target = np.linalg.inv(cov_target)
 
+    U1 = regress_target_score.T.dot(prec_target)
+    U2 = U1.T.dot(QS.M2.dot(U1))
+    U3 = U1.T.dot(QS.M3.dot(U1))
+    U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
+    U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
 
+    return U1, U2, U3, U4, U5

From a63dae6daa1cfd04babb08ec81a161d84d2c4ac4 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 10 Jan 2022 16:18:36 -0800
Subject: [PATCH 176/187] WIP: screeening

---
 selectinf/randomized/screening.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/selectinf/randomized/screening.py b/selectinf/randomized/screening.py
index 0b61626b0..1e24c73bf 100644
--- a/selectinf/randomized/screening.py
+++ b/selectinf/randomized/screening.py
@@ -134,10 +134,10 @@ def fit(self, perturb=None):
         A_scaling = -np.identity(len(active_signs))
         b_scaling = np.zeros(self.num_opt_var)
 
-        self._setup_sampler(A_scaling,
-                            b_scaling,
-                            opt_linear,
-                            observed_subgrad)
+        self._setup_sampler_data = (A_scaling,
+                                    b_scaling,
+                                    opt_linear,
+                                    observed_subgrad)
 
         return self._selected
 
@@ -237,10 +237,10 @@ def fit(self, perturb=None):
             A_scaling = -np.identity(self.num_opt_var)
             b_scaling = np.zeros(self.num_opt_var)
 
-            self._setup_sampler(A_scaling,
-                                b_scaling,
-                                opt_linear,
-                                observed_subgrad)
+            self._setup_sampler_data = (A_scaling,
+                                        b_scaling,
+                                        opt_linear,
+                                        observed_subgrad)
         else:
             self._selected = np.zeros(p, np.bool)
         return self._selected
@@ -374,10 +374,10 @@ def fit(self, perturb=None):
         A_scaling = -np.identity(self.num_opt_var)
         b_scaling = -np.ones(self.num_opt_var) * lower_bound
 
-        self._setup_sampler(A_scaling,
-                            b_scaling,
-                            opt_linear,
-                            observed_subgrad)
+        self._setup_sampler_data = (A_scaling,
+                                    b_scaling,
+                                    opt_linear,
+                                    observed_subgrad)
 
         return self._selected
 

From a4818570252dde5d7a82e4a10fc023f1e179346b Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 10 Jan 2022 17:29:09 -0800
Subject: [PATCH 177/187] removed redundant interaction functions; have the U
 quantities computed in a class method

---
 selectinf/base.py                           |  22 +-
 selectinf/randomized/approx_reference.py    | 278 +++++++++++++++++++-
 selectinf/randomized/base.py                | 276 +------------------
 selectinf/randomized/exact_reference.py     |   2 +-
 selectinf/randomized/lasso.py               |  12 +-
 selectinf/randomized/posterior_inference.py |  34 +--
 selectinf/randomized/query.py               |  26 +-
 selectinf/randomized/selective_MLE.py       |  36 ++-
 selectinf/randomized/slope.py               |   9 +-
 9 files changed, 355 insertions(+), 340 deletions(-)

diff --git a/selectinf/base.py b/selectinf/base.py
index 9371d62d3..d2b9d9a1b 100644
--- a/selectinf/base.py
+++ b/selectinf/base.py
@@ -243,14 +243,14 @@ def _compute_hessian(loglike,
             _right = np.zeros((n, bool_idx.sum()))
             for i, j in enumerate(np.nonzero(bool_idx)[0]):
                 _right[:,i] = loglike.saturated_loss.hessian_mult(linpred, 
-                                                                       X[:,j], 
-                                                                       case_weights=loglike.saturated_loss.case_weights)
+                                                                  X[:,j], 
+                                                                  case_weights=loglike.saturated_loss.case_weights)
             parts.append(X.T.dot(_right))
         _hessian = np.zeros_like(X)
         for i in range(X.shape[1]):
             _hessian[:,i] = loglike.saturated_loss.hessian_mult(linpred, 
-                                                                     X[:,i], 
-                                                                     case_weights=loglike.saturated_loss.case_weights)
+                                                                X[:,i], 
+                                                                case_weights=loglike.saturated_loss.case_weights)
         _hessian = X.T.dot(_hessian)
     else:
         raise ValueError('saturated_loss has no hessian or hessian_mult method')
@@ -270,17 +270,3 @@ def _pearsonX2(y,
     resid = y - loglike.saturated_loss.mean_function(linpred)
     return (resid ** 2 / W).sum() / (n - df_fit)
 
-def target_query_Interactspec(query_spec,
-                              regress_target_score,
-                              cov_target):
-
-    QS = query_spec
-    prec_target = np.linalg.inv(cov_target)
-
-    U1 = regress_target_score.T.dot(prec_target)
-    U2 = U1.T.dot(QS.M2.dot(U1))
-    U3 = U1.T.dot(QS.M3.dot(U1))
-    U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
-    U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
-
-    return U1, U2, U3, U4, U5
\ No newline at end of file
diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index dd27e98b3..6feaca6db 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -1,12 +1,286 @@
-
 from __future__ import division, print_function
+from typing import NamedTuple
 
 import numpy as np, pandas as pd
 from scipy.interpolate import interp1d
 
 from ..distributions.discrete_family import discrete_family
 from ..algorithms.barrier_affine import solve_barrier_affine_py
-from .base import grid_inference
+from .selective_MLE import mle_inference
+from .base import target_query_Interactspec
+
+class ConditionalSpec(NamedTuple):
+
+    # description of (preselection) conditional law of
+    # targets \hat{\theta} | u, N
+    # if they were unbiased, then:
+    # 1) precision will agree with marginal variance
+    # 2) scalings will all be 1
+    # 3) shifts will be 0
+
+    precision : np.ndarray
+    scalings : np.ndarray
+    shifts : np.ndarray
+    T : np.ndarray  # what is T?
+
+class grid_inference(object):
+
+    def __init__(self,
+                 query_spec,
+                 target_spec,
+                 solve_args={'tol': 1.e-12},
+                 ngrid=1000):
+
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        query : `gaussian_query`
+            A Gaussian query which has information
+            to describe implied Gaussian.
+        observed_target : ndarray
+            Observed estimate of target.
+        cov_target : ndarray
+            Estimated covaraince of target.
+        cov_target_score : ndarray
+            Estimated covariance of target and score of randomized query.
+        solve_args : dict, optional
+            Arguments passed to solver.
+        """
+
+        self.query_spec = query_spec
+        self.target_spec = target_spec
+        self.solve_args = solve_args
+        self.ngrid = ngrid
+
+        G = mle_inference(query_spec,
+                          target_spec,
+                          solve_args=solve_args)
+
+        _, inverse_info, log_ref = G.solve_estimating_eqn()
+
+        TS = target_spec
+        self.ntarget = ntarget = TS.cov_target.shape[0]
+        _scale = 4 * np.sqrt(np.diag(inverse_info))
+        self.inverse_info = inverse_info
+
+        self.stat_grid = np.zeros((ntarget, ngrid))
+        for j in range(ntarget):
+            self.stat_grid[j, :] = np.linspace(TS.observed_target[j] - 1.5 * _scale[j],
+                                               TS.observed_target[j] + 1.5 * _scale[j],
+                                               num=ngrid)
+
+    def summary(self,
+                alternatives=None,
+                parameter=None,
+                level=0.9):
+        """
+        Produce p-values and confidence intervals for targets
+        of model including selected features
+        Parameters
+        ----------
+        alternatives : [str], optional
+            Sequence of strings describing the alternatives,
+            should be values of ['twosided', 'less', 'greater']
+        parameter : np.array
+            Hypothesized value for parameter -- defaults to 0.
+        level : float
+            Confidence level.
+        """
+
+        TS = self.target_spec
+
+        if parameter is not None:
+            pivots = self._pivots(parameter,
+                                  alternatives=alternatives)
+        else:
+            pivots = None
+
+        pvalues = self._pivots(np.zeros_like(TS.observed_target),
+                                      alternatives=alternatives) 
+        lower, upper = self._intervals(level=level)
+
+        result = pd.DataFrame({'target': TS.observed_target,
+                               'pvalue': pvalues,
+                               'alternative': alternatives,
+                               'lower_confidence': lower,
+                               'upper_confidence': upper})
+
+        if not np.all(parameter == 0):
+            result.insert(4, 'pivot', pivots)
+            result.insert(5, 'parameter', parameter)
+
+        return result
+
+    def _approx_log_reference(self,
+                              observed_target,
+                              cov_target,
+                              linear_coef,
+                              grid):
+
+        """
+        Approximate the log of the reference density on a grid.
+        """
+        if np.asarray(observed_target).shape in [(), (0,)]:
+            raise ValueError('no target specified')
+
+        ref_hat = []
+        solver = solve_barrier_affine_py
+
+        for k in range(grid.shape[0]):
+            # in the usual D = N + Gamma theta.hat,
+            # regress_opt_target is "something" times Gamma,
+            # where "something" comes from implied Gaussian
+            # cond_mean is "something" times D
+            # Gamma is cov_target_score.T.dot(prec_target)
+
+            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + self.cond_mean)
+            conjugate_arg = self.cond_precision.dot(cond_mean_grid)
+
+            val, _, _ = solver(conjugate_arg,
+                               self.cond_precision,
+                               self.observed_soln,
+                               self.linear_part,
+                               self.offset,
+                               **self.solve_args)
+
+            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
+
+        return np.asarray(ref_hat)
+
+    def _pivots(self,
+                mean_parameter,
+                alternatives=None):
+
+        TS = self.target_spec
+        
+        if not hasattr(self, "_families"):
+            self._construct_density() # generic
+            self._construct_families() # specific to the method
+        precs, S, r, _ = self.conditional_spec
+
+        if alternatives is None:
+            alternatives = ['twosided'] * self.ntarget
+
+        pivot = []
+
+        for m in range(self.ntarget):
+
+            family = self._families[m]
+            var_target = 1. / (precs[m][0, 0])
+
+            mean = S[m].dot(mean_parameter[m].reshape((1,))) + r[m]
+            # construction of pivot from families follows `selectinf.learning.core`
+
+            _cdf = family.cdf((mean[0] - TS.observed_target[m]) / var_target, x=TS.observed_target[m])
+
+            if alternatives[m] == 'twosided':
+                pivot.append(2 * min(_cdf, 1 - _cdf))
+            elif alternatives[m] == 'greater':
+                pivot.append(1 - _cdf)
+            elif alternatives[m] == 'less':
+                pivot.append(_cdf)
+            else:
+                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
+        return pivot # , self._log_ref
+
+    def _intervals(self,
+                   level=0.9):
+
+        TS = self.target_spec
+        
+        if not hasattr(self, "_families"):
+            self._construct_density() # generic
+            self._construct_families() # specific to the method
+
+        precs, S, r, _ = self.conditional_spec
+
+        lower, upper = [], []
+
+        for m in range(self.ntarget):
+            # construction of intervals from families follows `selectinf.learning.core`
+            family = self._families[m]
+            observed_target = TS.observed_target[m]
+            unbiased_est = (observed_target - r[m][0]) * (1./(S[m][0,0]))
+
+            _l, _u = family.equal_tailed_interval(observed_target,
+                                                  alpha=1 - level)
+            l = _l * (1./(S[m][0,0]))
+            u = _u * (1./(S[m][0,0]))
+
+            var_target = 1. / (precs[m][0, 0])
+
+            lower.append(l * var_target + unbiased_est)
+            upper.append(u * var_target + unbiased_est)
+
+        return np.asarray(lower), np.asarray(upper)
+
+    ### Private method
+
+    def _construct_density(self):
+        """
+        What is this method doing?
+        """
+
+        TS = self.target_spec
+        QS = self.query_spec
+
+        precs = []
+        S = []
+        r = []
+        T = []
+
+        p = TS.regress_target_score.shape[1]
+
+        for m in range(self.ntarget):
+            observed_target_uni = (TS.observed_target[m]).reshape((1,))
+            cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
+            regress_target_score_uni = TS.regress_target_score[m, :].reshape((1, p))
+
+            U1, U2, U3, U4, U5 = self._form_interaction_pieces(QS,
+                                                               regress_target_score_uni,
+                                                               cov_target_uni)
+
+            prec_target = 1. / cov_target_uni
+
+            # JT: what is _T?
+            _T = QS.cond_cov.dot(U5.T)
+
+            prec_target_nosel = prec_target + U2 - U3
+
+            _P = -(U1.T.dot(QS.M5) + U2.dot(observed_target_uni))
+
+            bias_target = cov_target_uni.dot(
+                U1.T.dot(-U4.dot(observed_target_uni) + QS.M4.dot(QS.cond_mean)) - _P)
+
+            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
+            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
+
+            S.append(_S)
+            r.append(_r)
+            precs.append(prec_target_nosel)
+            T.append(_T)
+
+        self.conditional_spec = ConditionalSpec(np.array(precs),
+                                                np.array(S),
+                                                np.array(r),
+                                                np.array(T) # what is T here?
+                                                )
+
+        return self.conditional_spec
+
+    # Private
+
+    def _form_interaction_pieces(self,
+                                 QS,
+                                 regress_target_score,
+                                 cov_target):
+
+        return target_query_Interactspec(QS,
+                                         regress_target_score,
+                                         cov_target)
+    
 
 class approximate_grid_inference(grid_inference):
 
diff --git a/selectinf/randomized/base.py b/selectinf/randomized/base.py
index cdaf21ca5..5a25ff11e 100644
--- a/selectinf/randomized/base.py
+++ b/selectinf/randomized/base.py
@@ -1,271 +1,19 @@
-from typing import NamedTuple
-import numpy as np, pandas as pd
+import numpy as np
 
-from .selective_MLE import mle_inference
-from ..base import target_query_Interactspec
+def target_query_Interactspec(query_spec,
+                              regress_target_score,
+                              cov_target):
 
-class ConditionalSpec(NamedTuple):
-
-    # description of (preselection) conditional law of
-    # targets \hat{\theta} | u, N
-    # if they were unbiased, then:
-    # 1) precision will agree with marginal variance
-    # 2) scalings will all be 1
-    # 3) shifts will be 0
-
-    precision : np.ndarray
-    scalings : np.ndarray
-    shifts : np.ndarray
-    T : np.ndarray  # what is T?
-    
-class grid_inference(object):
-
-    def __init__(self,
-                 query_spec,
-                 target_spec,
-                 solve_args={'tol': 1.e-12},
-                 ngrid=1000):
-
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        query : `gaussian_query`
-            A Gaussian query which has information
-            to describe implied Gaussian.
-        observed_target : ndarray
-            Observed estimate of target.
-        cov_target : ndarray
-            Estimated covaraince of target.
-        cov_target_score : ndarray
-            Estimated covariance of target and score of randomized query.
-        solve_args : dict, optional
-            Arguments passed to solver.
-        """
-
-        self.query_spec = query_spec
-        self.target_spec = target_spec
-        self.solve_args = solve_args
-        self.ngrid = ngrid
-
-        G = mle_inference(query_spec,
-                          target_spec,
-                          solve_args=solve_args)
-
-        _, inverse_info, log_ref = G.solve_estimating_eqn()
-
-        TS = target_spec
-        self.ntarget = ntarget = TS.cov_target.shape[0]
-        _scale = 4 * np.sqrt(np.diag(inverse_info))
-        self.inverse_info = inverse_info
-
-        self.stat_grid = np.zeros((ntarget, ngrid))
-        for j in range(ntarget):
-            self.stat_grid[j, :] = np.linspace(TS.observed_target[j] - 1.5 * _scale[j],
-                                               TS.observed_target[j] + 1.5 * _scale[j],
-                                               num=ngrid)
-
-    def summary(self,
-                alternatives=None,
-                parameter=None,
-                level=0.9):
-        """
-        Produce p-values and confidence intervals for targets
-        of model including selected features
-        Parameters
-        ----------
-        alternatives : [str], optional
-            Sequence of strings describing the alternatives,
-            should be values of ['twosided', 'less', 'greater']
-        parameter : np.array
-            Hypothesized value for parameter -- defaults to 0.
-        level : float
-            Confidence level.
-        """
-
-        TS = self.target_spec
-
-        if parameter is not None:
-            pivots = self._pivots(parameter,
-                                  alternatives=alternatives)
-        else:
-            pivots = None
-
-        pvalues = self._pivots(np.zeros_like(TS.observed_target),
-                                      alternatives=alternatives) 
-        lower, upper = self._intervals(level=level)
-
-        result = pd.DataFrame({'target': TS.observed_target,
-                               'pvalue': pvalues,
-                               'alternative': alternatives,
-                               'lower_confidence': lower,
-                               'upper_confidence': upper})
-
-        if not np.all(parameter == 0):
-            result.insert(4, 'pivot', pivots)
-            result.insert(5, 'parameter', parameter)
-
-        return result
-
-    def _approx_log_reference(self,
-                              observed_target,
-                              cov_target,
-                              linear_coef,
-                              grid):
-
-        """
-        Approximate the log of the reference density on a grid.
-        """
-        if np.asarray(observed_target).shape in [(), (0,)]:
-            raise ValueError('no target specified')
-
-        ref_hat = []
-        solver = solve_barrier_affine_py
-
-        for k in range(grid.shape[0]):
-            # in the usual D = N + Gamma theta.hat,
-            # regress_opt_target is "something" times Gamma,
-            # where "something" comes from implied Gaussian
-            # cond_mean is "something" times D
-            # Gamma is cov_target_score.T.dot(prec_target)
-
-            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + self.cond_mean)
-            conjugate_arg = self.cond_precision.dot(cond_mean_grid)
-
-            val, _, _ = solver(conjugate_arg,
-                               self.cond_precision,
-                               self.observed_soln,
-                               self.linear_part,
-                               self.offset,
-                               **self.solve_args)
-
-            ref_hat.append(-val - (conjugate_arg.T.dot(self.cond_cov).dot(conjugate_arg) / 2.))
-
-        return np.asarray(ref_hat)
-
-    def _pivots(self,
-                mean_parameter,
-                alternatives=None):
-
-        TS = self.target_spec
-        
-        if not hasattr(self, "_families"):
-            self._construct_density() # generic
-            self._construct_families() # specific to the method
-        precs, S, r, _ = self.conditional_spec
-
-        if alternatives is None:
-            alternatives = ['twosided'] * self.ntarget
-
-        pivot = []
-
-        for m in range(self.ntarget):
-
-            family = self._families[m]
-            var_target = 1. / (precs[m][0, 0])
-
-            mean = S[m].dot(mean_parameter[m].reshape((1,))) + r[m]
-            # construction of pivot from families follows `selectinf.learning.core`
-
-            _cdf = family.cdf((mean[0] - TS.observed_target[m]) / var_target, x=TS.observed_target[m])
-
-            if alternatives[m] == 'twosided':
-                pivot.append(2 * min(_cdf, 1 - _cdf))
-            elif alternatives[m] == 'greater':
-                pivot.append(1 - _cdf)
-            elif alternatives[m] == 'less':
-                pivot.append(_cdf)
-            else:
-                raise ValueError('alternative should be in ["twosided", "less", "greater"]')
-        return pivot # , self._log_ref
-
-    def _intervals(self,
-                   level=0.9):
-
-        TS = self.target_spec
-        
-        if not hasattr(self, "_families"):
-            self._construct_density() # generic
-            self._construct_families() # specific to the method
-
-        precs, S, r, _ = self.conditional_spec
-
-        lower, upper = [], []
-
-        for m in range(self.ntarget):
-            # construction of intervals from families follows `selectinf.learning.core`
-            family = self._families[m]
-            observed_target = TS.observed_target[m]
-            unbiased_est = (observed_target - r[m][0]) * (1./(S[m][0,0]))
-
-            _l, _u = family.equal_tailed_interval(observed_target,
-                                                  alpha=1 - level)
-            l = _l * (1./(S[m][0,0]))
-            u = _u * (1./(S[m][0,0]))
-
-            var_target = 1. / (precs[m][0, 0])
-
-            lower.append(l * var_target + unbiased_est)
-            upper.append(u * var_target + unbiased_est)
-
-        return np.asarray(lower), np.asarray(upper)
-
-    ### Private method
-
-    def _construct_density(self):
-        """
-        What is this method doing?
-        """
-
-        TS = self.target_spec
-        QS = self.query_spec
-
-        precs = []
-        S = []
-        r = []
-        T = []
-
-        p = TS.regress_target_score.shape[1]
-
-        for m in range(self.ntarget):
-            observed_target_uni = (TS.observed_target[m]).reshape((1,))
-            cov_target_uni = (np.diag(TS.cov_target)[m]).reshape((1, 1))
-            regress_target_score_uni = TS.regress_target_score[m, :].reshape((1, p))
-
-            U1, U2, U3, U4, U5 = target_query_Interactspec(QS,
-                                                           regress_target_score_uni,
-                                                           cov_target_uni)
-
-            prec_target = 1. / cov_target_uni
-
-            # JT: what is _T?
-            _T = QS.cond_cov.dot(U5.T)
-
-            prec_target_nosel = prec_target + U2 - U3
-
-            _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(observed_target_uni))
-
-            bias_target = cov_target_uni.dot(
-                U1.T.dot(-U4.dot(observed_target_uni) + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
-
-            _r = np.linalg.inv(prec_target_nosel).dot(prec_target.dot(bias_target))
-            _S = np.linalg.inv(prec_target_nosel).dot(prec_target)
-
-            S.append(_S)
-            r.append(_r)
-            precs.append(prec_target_nosel)
-            T.append(_T)
-
-        self.conditional_spec = ConditionalSpec(np.array(precs),
-                                                np.array(S),
-                                                np.array(r),
-                                                np.array(T) # what is T here?
-                                                )
-
-        return self.conditional_spec
+    QS = query_spec
+    prec_target = np.linalg.inv(cov_target)
 
+    U1 = regress_target_score.T.dot(prec_target)
+    U2 = U1.T.dot(QS.M2.dot(U1))
+    U3 = U1.T.dot(QS.M3.dot(U1))
+    U5 = U1.T.dot(QS.M4)
+    U4 = QS.M4.dot(QS.cond_cov).dot(U5.T)
 
+    return U1, U2, U3, U4, U5
 
 
 
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index dbc7711da..ebc8cbd26 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -4,7 +4,7 @@
 from scipy.stats import norm as ndist
 
 from ..distributions.discrete_family import discrete_family
-from .base import grid_inference
+from .approx_reference import grid_inference
 
 class exact_grid_inference(grid_inference):
 
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index 6f71819d0..e37023a9b 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -789,27 +789,33 @@ def _setup_implied_gaussian(self,
         cond_mean = regress_opt.dot(self.observed_score_state + observed_subgrad)
 
         ## probably missing a dispersion in the denominator
+        # this might be too big -- use a linear_transform instead
         prod_score_prec_unnorm = np.identity(self.nfeature) / (dispersion * ratio)
 
         ## probably missing a multiplicative factor of ratio
         cov_rand = self._unscaled_cov_score * (dispersion * ratio)
 
         M1 = prod_score_prec_unnorm * dispersion
+        M4 = M1.dot(opt_linear)
         M2 = M1.dot(cov_rand).dot(M1.T)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
+        M3 = M4.dot(cond_cov).dot(M4.T)
     
         # would be nice to not store these?
         
         self.M1 = M1  
         self.M2 = M2
         self.M3 = M3
-
+        self.M4 = M4
+        self.M5 = M1.dot(self.observed_score_state + observed_subgrad)
+        
         return (cond_mean,
                 cond_cov,
                 cond_precision,
                 M1,
                 M2,
-                M3)
+                M3,
+                self.M4,
+                self.M5)
 
     def _solve_randomized_problem(self, 
                                   # optional binary vector 
diff --git a/selectinf/randomized/posterior_inference.py b/selectinf/randomized/posterior_inference.py
index 256a5ae78..1dd16572f 100644
--- a/selectinf/randomized/posterior_inference.py
+++ b/selectinf/randomized/posterior_inference.py
@@ -8,7 +8,7 @@
 
 from ..algorithms.barrier_affine import solve_barrier_affine_py
 from .selective_MLE import mle_inference
-from ..base import target_query_Interactspec
+from .base import target_query_Interactspec
 
 class PosteriorAtt(typing.NamedTuple):
 
@@ -132,19 +132,19 @@ def _get_marginal_parameters(self):
         QS = self.query_spec
         TS = self.target_spec
 
-        U1, U2, U3, U4, U5 = target_query_Interactspec(QS,
-                                                       TS.regress_target_score,
-                                                       TS.cov_target)
+        U1, U2, U3, U4, U5 = self._form_interaction_pieces(QS,
+                                                           TS.regress_target_score,
+                                                           TS.cov_target)
 
         prec_target = np.linalg.inv(TS.cov_target)
         cond_precision = np.linalg.inv(QS.cond_cov)
 
         prec_target_nosel = prec_target + U2 - U3
 
-        _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(TS.observed_target))
+        _P = -(U1.T.dot(QS.M5) + U2.dot(TS.observed_target))
 
         bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target) +
-                                                 QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
+                                                 QS.M4.dot(QS.cond_mean)) - _P)
 
         ###set parameters for the marginal distribution of optimization variables
 
@@ -165,6 +165,14 @@ def _get_marginal_parameters(self):
                 S,
                 prec_target_nosel)
 
+    def _form_interaction_pieces(self,
+                                 QS,
+                                 regress_target_score,
+                                 cov_target):
+
+        return target_query_Interactspec(QS,
+                                         regress_target_score,
+                                         cov_target)
 ### sampling methods
 
 def langevin_sampler(selective_posterior,
@@ -283,17 +291,3 @@ def __next__(self):
         return self.state
 
 
-def target_query_Interactspec(query_spec,
-                              regress_target_score,
-                              cov_target):
-
-    QS = query_spec
-    prec_target = np.linalg.inv(cov_target)
-
-    U1 = regress_target_score.T.dot(prec_target)
-    U2 = U1.T.dot(QS.M2.dot(U1))
-    U3 = U1.T.dot(QS.M3.dot(U1))
-    U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
-    U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
-
-    return U1, U2, U3, U4, U5
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index f67ba3ec1..32c86d0a0 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -25,10 +25,11 @@ class QuerySpec(NamedTuple):
 
     # score / randomization relationship
 
-    M1 : np.ndarray
     M2 : np.ndarray
     M3 : np.ndarray
-
+    M4 : np.ndarray
+    M5 : np.ndarray
+    
     # observed values
 
     observed_opt_state : np.ndarray
@@ -74,9 +75,10 @@ def specification(self):
                          opt_linear=self.opt_linear,
                          linear_part=self.affine_con.linear_part,
                          offset=self.affine_con.offset,
-                         M1=self.M1,
                          M2=self.M2,
                          M3=self.M3,
+                         M4=self.M4,
+                         M5=self.M5,
                          observed_opt_state=self.observed_opt_state,
                          observed_score_state=self.observed_score_state,
                          observed_subgrad=self.observed_subgrad,
@@ -136,12 +138,9 @@ def _setup_sampler(self,
 
         (cond_mean,
          cond_cov,
-         cond_precision,
-         M1,
-         M2,
-         M3) = self._setup_implied_gaussian(opt_linear,
+         cond_precision) = self._setup_implied_gaussian(opt_linear,
                                             observed_subgrad,
-                                            dispersion=dispersion)
+                                            dispersion=dispersion)[:3]
 
         self.cond_mean, self.cond_cov = cond_mean, cond_cov
 
@@ -181,18 +180,23 @@ def _setup_implied_gaussian(self,
 
         M1 = prod_score_prec_unnorm * dispersion
         M2 = M1.dot(cov_rand).dot(M1.T)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T)
+        M4 = M1.dot(opt_linear)
+        M3 = M4.dot(cond_cov).dot(M4.T)
 
         self.M1 = M1
         self.M2 = M2
         self.M3 = M3
-
+        self.M4 = M4
+        self.M5 = M1.dot(self.observed_score_state + observed_subgrad)
+        
         return (cond_mean,
                 cond_cov,
                 cond_precision,
                 M1,
                 M2,
-                M3)
+                M3,
+                self.M4,
+                self.M5)
 
     def inference(self,
                   target_spec,
diff --git a/selectinf/randomized/selective_MLE.py b/selectinf/randomized/selective_MLE.py
index 0fff47de6..cc7aed4a2 100644
--- a/selectinf/randomized/selective_MLE.py
+++ b/selectinf/randomized/selective_MLE.py
@@ -2,9 +2,10 @@
 
 import numpy as np, pandas as pd
 from scipy.stats import norm as ndist
-from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
 from ..algorithms.barrier_affine import solve_barrier_affine_py
-from ..base import target_query_Interactspec
+
+from .selective_MLE_utils import solve_barrier_affine as solve_barrier_affine_C
+from .base import target_query_Interactspec
 
 class mle_inference(object):
 
@@ -25,18 +26,18 @@ def solve_estimating_eqn(self,
         QS = self.query_spec
         TS = self.target_spec
 
-        U1, U2, U3, U4, U5= target_query_Interactspec(QS,
-                                                      TS.regress_target_score,
-                                                      TS.cov_target)
+        U1, U2, U3, U4, U5 = self._form_interaction_pieces(QS,
+                                                           TS.regress_target_score,
+                                                           TS.cov_target)
 
         prec_target = np.linalg.inv(TS.cov_target)
 
         prec_target_nosel = prec_target + U2 - U3
 
-        _P = -(U1.T.dot(QS.M1.dot(QS.observed_score)) + U2.dot(TS.observed_target))
+        _P = -(U1.T.dot(QS.M5) + U2.dot(TS.observed_target))
 
         bias_target = TS.cov_target.dot(U1.T.dot(-U4.dot(TS.observed_target)
-                                                 + QS.M1.dot(QS.opt_linear.dot(QS.cond_mean))) - _P)
+                                                 + QS.M4.dot(QS.cond_mean)) - _P)
         
         cond_precision = np.linalg.inv(QS.cond_cov)
         conjugate_arg = cond_precision.dot(QS.cond_mean)
@@ -54,7 +55,7 @@ def solve_estimating_eqn(self,
                                  **self.solve_args)
 
         final_estimator = TS.cov_target.dot(prec_target_nosel).dot(TS.observed_target) \
-                          + TS.regress_target_score.dot(QS.M1.dot(QS.opt_linear)).dot(QS.cond_mean - soln) \
+                          + TS.regress_target_score.dot(QS.M4).dot(QS.cond_mean - soln) \
                           - bias_target
 
         observed_info_natural = prec_target_nosel + U3 - U5.dot(hess.dot(U5.T))
@@ -101,17 +102,14 @@ def solve_estimating_eqn(self,
 
         return result, observed_info_mean, log_ref
 
-def target_query_Interactspec(query_spec,
-                              regress_target_score,
-                              cov_target):
+    # Private
 
-    QS = query_spec
-    prec_target = np.linalg.inv(cov_target)
+    def _form_interaction_pieces(self,
+                                 QS,
+                                 regress_target_score,
+                                 cov_target):
 
-    U1 = regress_target_score.T.dot(prec_target)
-    U2 = U1.T.dot(QS.M2.dot(U1))
-    U3 = U1.T.dot(QS.M3.dot(U1))
-    U4 = QS.M1.dot(QS.opt_linear).dot(QS.cond_cov).dot(QS.opt_linear.T.dot(QS.M1.T.dot(U1)))
-    U5 = U1.T.dot(QS.M1.dot(QS.opt_linear))
+        return target_query_Interactspec(QS,
+                                         regress_target_score,
+                                         cov_target)
 
-    return U1, U2, U3, U4, U5
diff --git a/selectinf/randomized/slope.py b/selectinf/randomized/slope.py
index 3015c8259..c8c53b9bf 100644
--- a/selectinf/randomized/slope.py
+++ b/selectinf/randomized/slope.py
@@ -331,20 +331,25 @@ def _setup_implied_gaussian(self,
 
         M1 = prod_score_prec_unnorm * dispersion
         M2 = M1.dot(cov_rand).dot(M1.T)
-        M3 = M1.dot(opt_linear.dot(cond_cov).dot(opt_linear.T)).dot(M1.T) 
+        M4 = M1.dot(opt_linear)
+        M3 = M4.dot(cond_cov).dot(M4.T)
     
         # would be nice to not store these?
         
         self.M1 = M1  
         self.M2 = M2
         self.M3 = M3
+        self.M4 = M4
+        self.M5 = M1.dot(self.observed_score_state + observed_subgrad)
 
         return (cond_mean,
                 cond_cov,
                 cond_precision,
                 M1,
                 M2,
-                M3)
+                M3,
+                self.M4,
+                self.M5)
 
     def _solve_randomized_problem(self, 
                                   # optional binary vector 

From a425869b1cf301ece6731c991b6777db4a64e1a8 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 13 Dec 2022 16:22:32 -0800
Subject: [PATCH 178/187] bootstrap lasso version

---
 selectinf/randomized/approx_reference.py      |  3 +-
 .../randomized/approx_reference_grouplasso.py |  4 +-
 selectinf/randomized/exact_reference.py       |  4 +-
 selectinf/randomized/lasso.py                 | 83 +++++++++++++++++++
 selectinf/randomized/query.py                 |  6 +-
 5 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/selectinf/randomized/approx_reference.py b/selectinf/randomized/approx_reference.py
index 6feaca6db..81f907e13 100644
--- a/selectinf/randomized/approx_reference.py
+++ b/selectinf/randomized/approx_reference.py
@@ -343,7 +343,8 @@ def _approx_log_reference(self,
             # cond_mean is "something" times D
             # Gamma is cov_target_score.T.dot(prec_target)
 
-            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) + QS.cond_mean)
+            cond_mean_grid = (linear_coef.dot(np.atleast_1d(grid[k] - observed_target)) +
+                              QS.cond_mean)
             conjugate_arg = cond_precision.dot(cond_mean_grid)
 
             val, _, _ = solver(conjugate_arg,
diff --git a/selectinf/randomized/approx_reference_grouplasso.py b/selectinf/randomized/approx_reference_grouplasso.py
index 5d90e981b..acd9bf811 100644
--- a/selectinf/randomized/approx_reference_grouplasso.py
+++ b/selectinf/randomized/approx_reference_grouplasso.py
@@ -546,8 +546,8 @@ def log_reference(self,
 
             eta = self.prec_opt.dot(self.regress_opt.dot(cov_target_score.T))
 
-            implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
-            implied_cov = np.asscalar(eta.T.dot(self.cond_cov).dot(eta))
+            implied_mean = (eta.T.dot(cond_mean_grid)).item()
+            implied_cov = (eta.T.dot(self.cond_cov).dot(eta)).item()
             implied_prec = 1./implied_cov
 
             _A = self.cond_cov.dot(eta) * implied_prec
diff --git a/selectinf/randomized/exact_reference.py b/selectinf/randomized/exact_reference.py
index ebc8cbd26..209c40c97 100644
--- a/selectinf/randomized/exact_reference.py
+++ b/selectinf/randomized/exact_reference.py
@@ -40,8 +40,8 @@ def log_reference(self,
 
             eta = cond_precision.dot(linear_coef).dot(cov_target)
 
-            implied_mean = np.asscalar(eta.T.dot(cond_mean_grid))
-            implied_cov = np.asscalar(eta.T.dot(QS.cond_cov).dot(eta))
+            implied_mean = (eta.T.dot(cond_mean_grid)).item()
+            implied_cov = (eta.T.dot(QS.cond_cov).dot(eta)).item()
             implied_prec = 1./implied_cov
 
             _A = QS.cond_cov.dot(eta) * implied_prec
diff --git a/selectinf/randomized/lasso.py b/selectinf/randomized/lasso.py
index e37023a9b..0c87a0524 100644
--- a/selectinf/randomized/lasso.py
+++ b/selectinf/randomized/lasso.py
@@ -264,6 +264,89 @@ def _solve_randomized_problem(self,
 
         return observed_soln, observed_subgrad
 
+    @staticmethod
+    def fromsample(samples,
+                   feature_weights,
+                   proportion_select=0.5,
+                   estimator=None,
+                   covariance=None):
+        r"""
+        Squared-error LASSO with feature weights.
+        Objective function is (before randomization)
+
+        .. math::
+
+            \beta \mapsto \frac{1}{2} (\beta-\hat{\beta})'\hat{\Sigma}^{-1}(\beta-\hat{\beta}) + \sum_{i=1}^p \lambda_i |\beta_i|
+
+        where $\lambda$ is `feature_weights`, $\hat{\beta}$` is the row mean
+        of `samples` and $\hat{\Sigma}$ is its sample covariance.
+
+        Parameters
+        ----------
+
+        samples : ndarray
+            Shape (B,p) -- the sample data matrix (e.g. bootstrap samples)
+
+        feature_weights: [float, sequence]
+            Penalty weights. An intercept, or other unpenalized
+            features are handled by setting those entries of
+            `feature_weights` to 0. If `feature_weights` is
+            a float, then all parameters are penalized equally.
+
+        quadratic : `regreg.identity_quadratic.identity_quadratic` (optional)
+            An optional quadratic term to be added to the objective.
+            Can also be a linear term by setting quadratic
+            coefficient to 0.
+
+        ridge_term : float
+            How big a ridge term to add?
+
+        randomizer_scale : float
+            Scale for IID components of randomizer.
+
+        Returns
+        -------
+
+        L : `selection.randomized.lasso.lasso`
+
+        """
+
+        samples = np.asarray(samples)
+        B, p = samples.shape
+
+        if estimator is None:
+            estimator = samples.mean(0)
+        if covariance is None:
+            covariance = np.cov(samples.T)
+
+        U, D, V = np.linalg.svd(covariance)
+        
+        sqrt_prec = U / np.sqrt(D)[None,:] 
+        sqrt_prec = sqrt_prec.dot(U.T)
+        prec = sqrt_prec.dot(sqrt_prec.T)
+        np.testing.assert_allclose(prec, np.linalg.inv(covariance))
+        Y = prec.dot(estimator)
+        
+        loglike = rr.glm.gaussian(sqrt_prec, 
+                                  Y, 
+                                  coef=1.,
+                                  quadratic=None)
+
+        # proportion should be used somewhere here...
+
+        multiplier = 1 / proportion_select - 1
+        randomizer = randomization.gaussian(prec * multiplier)
+
+        idx = np.random.choice(B, 1)[0]
+        perturb = (samples[idx] - estimator) * np.sqrt(multiplier)
+        return (lasso(loglike, 
+                      np.asarray(feature_weights),
+                      0,
+                      randomizer,
+                      perturb=perturb),
+                perturb)
+
+
     @staticmethod
     def gaussian(X,
                  Y,
diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py
index 32c86d0a0..d9a2a83cb 100644
--- a/selectinf/randomized/query.py
+++ b/selectinf/randomized/query.py
@@ -16,7 +16,7 @@ class QuerySpec(NamedTuple):
 
     # how S enters into E[o|S,u]
 
-    opt_linear : np.ndarray
+    opt_linear : np.ndarray # not sure if needed -- absorbed into M4,M5?
 
     # constraints
 
@@ -139,8 +139,8 @@ def _setup_sampler(self,
         (cond_mean,
          cond_cov,
          cond_precision) = self._setup_implied_gaussian(opt_linear,
-                                            observed_subgrad,
-                                            dispersion=dispersion)[:3]
+                                                        observed_subgrad,
+                                                        dispersion=dispersion)[:3]
 
         self.cond_mean, self.cond_cov = cond_mean, cond_cov
 

From 74369baf55e63ae62826308b2530bc9f4a8d7238 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:30:22 -0700
Subject: [PATCH 179/187] updating docs

---
 .readthedocs.yml                             |  7 +++++--
 doc-requirements.txt => doc/requirements.txt |  7 ++++---
 doc/source/conf.py                           | 21 +++++++++++++++-----
 3 files changed, 25 insertions(+), 10 deletions(-)
 rename doc-requirements.txt => doc/requirements.txt (72%)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index bbfd45f45..8418b6d1e 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -18,12 +18,15 @@ sphinx:
 #formats: all
 
 # Optionally set the version of Python and requirements required to build your docs
+
 python:
-  version: 3.6
+  version: 3.9
   install:
     - requirements: requirements.txt
-    - requirements: doc-requirements.txt
+    - requirements: doc/requirements.txt
     - method: setuptools
       path: .
 
+submodules:
+  include: all
   
\ No newline at end of file
diff --git a/doc-requirements.txt b/doc/requirements.txt
similarity index 72%
rename from doc-requirements.txt
rename to doc/requirements.txt
index ab7ed399c..9833fbac5 100644
--- a/doc-requirements.txt
+++ b/doc/requirements.txt
@@ -1,6 +1,6 @@
 # Requirements for building docs
 # Check these dependencies against doc/conf.py
--r dev-requirements.txt
+-r ../dev-requirements.txt
 sphinx>=1.4
 numpydoc
 matplotlib
@@ -8,8 +8,9 @@ texext
 nb2plots
 seaborn
 statsmodels
-tensorflow
+#tensorflow
 keras
 nbsphinx
 jupytext
-sphinx_rtd_theme
+sphinx-book-theme
+myst_nb
diff --git a/doc/source/conf.py b/doc/source/conf.py
index addf6895c..5ab98cea8 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -44,7 +44,7 @@
               'sphinx_rtd_theme',
               'texext.math_dollar',
               'numpydoc',
-              'nbsphinx'
+              'myst_nb'
               ]
 
 # Current version (as of 11/2010) of numpydoc is only compatible with sphinx >
@@ -118,16 +118,27 @@
 # must exist either in Sphinx' static/ path, or in one of the custom paths
 # given in html_static_path.
 
+# -- Options for HTML output
+
+html_theme = "sphinx_book_theme" 
 html_theme_options = {
-    'logo_only': True
+    "repository_url": "https://github.com/jonathan-taylor/selectinf.git",
+    "use_repository_button": True,
+}
+html_title = "Introduction to Statistical Learning (Python)"
+html_logo = "logo.png"
+
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.ipynb': 'myst-nb',
+    '.myst': 'myst-nb',
 }
-html_theme_path = ["../.."]
-html_logo = "_static/logo.png"
+
 html_show_sourcelink = True
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-html_title = 'Selection Documentation'
+html_title = 'Selection Inference Documentation'
 
 # The name of an image file (within the static path) to place at the top of
 # the sidebar.

From 4aee7ad1a06ecbdf9275e0ee776a21537de1b222 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:31:23 -0700
Subject: [PATCH 180/187] update requirements, remove some np.float

---
 requirements.txt                           | 1 +
 selectinf/distributions/discrete_family.py | 6 +++---
 selectinf/sampling/truncnorm.pyx           | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 3ab08e8a6..efe900f71 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ regreg
 # keras
 # tensorflow
 traitlets
+scikit-learn
diff --git a/selectinf/distributions/discrete_family.py b/selectinf/distributions/discrete_family.py
index 6bdf10f55..7b96476db 100644
--- a/selectinf/distributions/discrete_family.py
+++ b/selectinf/distributions/discrete_family.py
@@ -25,7 +25,7 @@ def crit_func(test_statistic, left_cut, right_cut):
     Parameters
     ----------
 
-    test_statistic : np.float
+    test_statistic : float
         Observed value of test statistic.
 
     left_cut : (float, float)
@@ -37,7 +37,7 @@ def crit_func(test_statistic, left_cut, right_cut):
     Returns
     -------
 
-    decision : np.float
+    decision : float
 
     """
     CL, gammaL = left_cut
@@ -80,7 +80,7 @@ def __init__(self, sufficient_stat, weights, theta=0.):
 
         The weights are normalized to sum to 1.
         """
-        xw = np.array(sorted(zip(sufficient_stat, weights)), np.float)
+        xw = np.array(sorted(zip(sufficient_stat, weights)), float)
         self._x = xw[:,0]
         self._w = xw[:,1]
         self._lw = np.log(xw[:,1])
diff --git a/selectinf/sampling/truncnorm.pyx b/selectinf/sampling/truncnorm.pyx
index a9d415a1e..04cb2bbe8 100644
--- a/selectinf/sampling/truncnorm.pyx
+++ b/selectinf/sampling/truncnorm.pyx
@@ -15,9 +15,9 @@ This module has a code to sample from a truncated normal distribution
 specified by a set of affine constraints.
 """
 
-DTYPE_float = np.float
+DTYPE_float = float
 ctypedef cnp.float_t DTYPE_float_t
-DTYPE_int = np.int
+DTYPE_int = int
 ctypedef cnp.int_t DTYPE_int_t
 ctypedef cnp.intp_t DTYPE_intp_t
 

From 14f022d5967c7cd57db4222320f07174c345aafc Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:33:00 -0700
Subject: [PATCH 181/187] update python version for readthedocs

---
 .readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 8418b6d1e..96ad0c8eb 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -20,7 +20,7 @@ sphinx:
 # Optionally set the version of Python and requirements required to build your docs
 
 python:
-  version: 3.9
+  version: 3.8
   install:
     - requirements: requirements.txt
     - requirements: doc/requirements.txt

From 5552303f83aa939f1932d9a8c09ded107eea0a0e Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:36:14 -0700
Subject: [PATCH 182/187] adding regreg as a submodule

---
 .gitmodules | 3 +++
 regreg      | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 regreg

diff --git a/.gitmodules b/.gitmodules
index 134b4cb57..9f883b6df 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,3 +5,6 @@
 	path = C-software
 	url = https://github.com/selective-inference/C-software.git
 
+[submodule "regreg"]
+	path = regreg
+	url = https://github.com/jonathan-taylor/regreg.git
diff --git a/regreg b/regreg
new file mode 160000
index 000000000..1e411d1c8
--- /dev/null
+++ b/regreg
@@ -0,0 +1 @@
+Subproject commit 1e411d1c8edfae9d96c7247b19af2b7a7094f345

From 57da65973a31bf02658e9908c0441c5cd0690466 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:36:41 -0700
Subject: [PATCH 183/187] trying to build regreg first

---
 .readthedocs.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 96ad0c8eb..bec99bda9 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -22,6 +22,8 @@ sphinx:
 python:
   version: 3.8
   install:
+    - method: setuptools
+      path: regreg
     - requirements: requirements.txt
     - requirements: doc/requirements.txt
     - method: setuptools

From f503c5cc0cc867de04d54fcc2e47972480b77168 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:38:06 -0700
Subject: [PATCH 184/187] trying path of regreg

---
 .readthedocs.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index bec99bda9..28ab7b116 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -22,8 +22,8 @@ sphinx:
 python:
   version: 3.8
   install:
-    - method: setuptools
-      path: regreg
+    - method: pip
+      path: ./regreg
     - requirements: requirements.txt
     - requirements: doc/requirements.txt
     - method: setuptools

From 6d2260dcd452d35c480cda03c08bc7ca5ddd8176 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:39:40 -0700
Subject: [PATCH 185/187] trying again

---
 .readthedocs.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 28ab7b116..96ad0c8eb 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -22,8 +22,6 @@ sphinx:
 python:
   version: 3.8
   install:
-    - method: pip
-      path: ./regreg
     - requirements: requirements.txt
     - requirements: doc/requirements.txt
     - method: setuptools

From 456dcb803197b1dbf8175d6884257401785d16cb Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:42:53 -0700
Subject: [PATCH 186/187] using URL in requirements file

---
 .gitmodules      | 3 ---
 regreg           | 1 -
 requirements.txt | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)
 delete mode 160000 regreg

diff --git a/.gitmodules b/.gitmodules
index 9f883b6df..134b4cb57 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,6 +5,3 @@
 	path = C-software
 	url = https://github.com/selective-inference/C-software.git
 
-[submodule "regreg"]
-	path = regreg
-	url = https://github.com/jonathan-taylor/regreg.git
diff --git a/regreg b/regreg
deleted file mode 160000
index 1e411d1c8..000000000
--- a/regreg
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 1e411d1c8edfae9d96c7247b19af2b7a7094f345
diff --git a/requirements.txt b/requirements.txt
index efe900f71..c08d325af 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ pandas
 mpmath
 pyinter
 sklearn
-regreg
+git+https://github.com/jonathan-taylor/regreg
 # keras
 # tensorflow
 traitlets

From e448111f8241f92e8a67abfe38b6f9b802e83062 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Tue, 25 Apr 2023 16:46:17 -0700
Subject: [PATCH 187/187] trying again

---
 .readthedocs.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 96ad0c8eb..cb5b32965 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -22,6 +22,9 @@ sphinx:
 python:
   version: 3.8
   install:
+    - requirements: https://raw.githubusercontent.com/jonathan-taylor/regreg/master/requirements.txt
+    - method: pip
+      path: https://github.com/jonathan-taylor/regreg.git
     - requirements: requirements.txt
     - requirements: doc/requirements.txt
     - method: setuptools