From 5e23cd83df25f00f434eac2175cc8bac8ff5de21 Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Wed, 28 Feb 2024 15:43:47 -0700
Subject: [PATCH] doc+container updates

---
 .flake8                                       |   2 +
 .github/workflows/build.yml                   |  20 +-
 .github/workflows/container.yml               |   6 +-
 .github/workflows/documentation-ci.yml        |  69 +++++
 .github/workflows/documentation.yml           |  16 +-
 Dockerfile                                    |   9 +-
 Makefile                                      |   4 +-
 README.container.rst                          |  10 +-
 README.rst                                    |  39 +--
 .../LinearOperator_{SCALAR}.pxi               |  69 ++++-
 base/PyNucleus_base/utilsFem.py               |   2 +
 compose.yaml                                  |  38 ++-
 docs/Makefile                                 |  14 -
 docs/conf.py                                  |  28 +-
 docs/example1.py                              | 150 -----------
 docs/example1.rst                             | 139 ----------
 docs/example2.py                              | 146 ----------
 docs/example2.rst                             | 126 ---------
 docs/example3.py                              | 109 --------
 docs/example3.rst                             |  82 ------
 docs/features.rst                             |  37 ---
 docs/index.rst                                |   7 +-
 docs/installation.rst                         |  50 ++--
 drivers/runNonlocal.py                        |   2 +-
 examples/README.rst                           |   2 +
 examples/example1.py                          | 123 ---------
 examples/example2.py                          | 124 ---------
 examples/example_InfHorizonDirichlet.py       |  99 +++++++
 examples/example_Neumann.py                   | 106 ++++++++
 examples/example_nonlocal.py                  | 249 ++++++++++++++++++
 ...3.py => example_operator_interpolation.py} | 100 +++++--
 examples/example_pde.py                       | 232 ++++++++++++++++
 metisCy/PyNucleus_metisCy/__init__.py         |   1 +
 nl/PyNucleus_nl/clusterMethodCy.pyx           | 145 +++++-----
 nl/PyNucleus_nl/panelTypes.pxi                |  17 +-
 setup.py                                      |   3 +
 ...ixFormatH2--interactionellipse(0.5,1.0,0.) |  10 +
 ...ixFormatH2--interactionellipse(0.5,1.0,0.) |  10 +
 ...ixFormatH2--interactionellipse(0.5,1.0,0.) |  10 +
 ...utedH2Bcast--buildDistributedH2--no-write4 |   8 +
 ...utedH2Bcast--buildDistributedH2--no-write4 |   8 +
 ...utedH2Bcast--buildDistributedH2--no-write4 |   8 +
 ...utedH2Bcast--buildDistributedH2--no-write4 |   8 +
 43 files changed, 1191 insertions(+), 1246 deletions(-)
 create mode 100644 .github/workflows/documentation-ci.yml
 delete mode 100644 docs/Makefile
 delete mode 100755 docs/example1.py
 delete mode 100644 docs/example1.rst
 delete mode 100755 docs/example2.py
 delete mode 100644 docs/example2.rst
 delete mode 100755 docs/example3.py
 delete mode 100644 docs/example3.rst
 delete mode 100644 docs/features.rst
 create mode 100644 examples/README.rst
 delete mode 100644 examples/example1.py
 delete mode 100644 examples/example2.py
 create mode 100755 examples/example_InfHorizonDirichlet.py
 create mode 100755 examples/example_Neumann.py
 create mode 100755 examples/example_nonlocal.py
 rename examples/{example3.py => example_operator_interpolation.py} (53%)
 mode change 100644 => 100755
 create mode 100755 examples/example_pde.py
 create mode 100644 tests/cache_runNonlocal.py--domainsquare--kernelTypeconstant--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
 create mode 100644 tests/cache_runNonlocal.py--domainsquare--kernelTypefractional--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
 create mode 100644 tests/cache_runNonlocal.py--domainsquare--kernelTypeinverseDistance--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
 create mode 100644 tests/cache_testDistOp.py--domaininterval--sconst(0.25)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
 create mode 100644 tests/cache_testDistOp.py--domaininterval--sconst(0.75)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
 create mode 100644 tests/cache_testDistOp.py--domainsquare--sconst(0.25)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
 create mode 100644 tests/cache_testDistOp.py--domainsquare--sconst(0.75)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4

diff --git a/.flake8 b/.flake8
index be74fd90..3b133ada 100644
--- a/.flake8
+++ b/.flake8
@@ -2,6 +2,8 @@
 extend-ignore =
     # E402: module level import not at top of file
     E402,
+    # E741 ambiguous variable name
+    E741,
     # H301: one import per line
     H301,
     # H306: imports not in alphabetical order
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 511c0d84..bfb684a0 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -44,17 +44,17 @@ jobs:
     steps:
     - name: Check out repo
       if: always()
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Pull ccache cache
       if: always()
       id: ccache-restore
-      uses: actions/cache/restore@v3
+      uses: actions/cache/restore@v4
       with:
         path: /home/runner/.cache/ccache
         key: ccache-${{ env.BUILD_IDENTIFIER }}
 
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       if: always()
       with:
         python-version: ${{ matrix.py-version }}
@@ -89,7 +89,7 @@ jobs:
 
     - name: Push ccache cache
       if: always()
-      uses: actions/cache/save@v3
+      uses: actions/cache/save@v4
       with:
         path: /home/runner/.cache/ccache
         key: ccache-${{ env.BUILD_IDENTIFIER }}
@@ -115,7 +115,7 @@ jobs:
         mv cython-lint.xml cython-lint-${{ env.BUILD_IDENTIFIER }}.xml
 
     - name: Archive results
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       if: always()
       with:
         name: Results (${{ env.BUILD_PRETTY_IDENTIFIER }})
@@ -154,12 +154,12 @@ jobs:
 
     steps:
     - name: Check out repo
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Pull ccache cache
       if: always()
       id: ccache-restore
-      uses: actions/cache/restore@v3
+      uses: actions/cache/restore@v4
       with:
         path: /Users/runner/Library/Caches/ccache
         key: ccache-${{ runner.os }}-${{ matrix.py-version }}
@@ -167,7 +167,7 @@ jobs:
     - name: Setup GNU Fortran
       uses: fortran-lang/setup-fortran@v1
 
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.py-version }}
 
@@ -203,7 +203,7 @@ jobs:
 
     - name: Push ccache cache
       if: always()
-      uses: actions/cache/save@v3
+      uses: actions/cache/save@v4
       with:
         path: /Users/runner/Library/Caches/ccache
         key: ccache-${{ runner.os }}-${{ matrix.py-version }}
@@ -229,7 +229,7 @@ jobs:
         mv cython-lint.xml cython-lint-${{ runner.os }}-${{ matrix.py-version }}.xml
 
     - name: Archive results
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       if: always()
       with:
         name: Results ${{ github.job }}
diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml
index 5314567e..ab0aa17a 100644
--- a/.github/workflows/container.yml
+++ b/.github/workflows/container.yml
@@ -3,6 +3,8 @@ name: Container
 on:
   push:
     branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
   workflow_dispatch:
 
 env:
@@ -21,7 +23,7 @@ jobs:
     steps:
     - name: Check out
       if: always()
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -44,6 +46,7 @@ jobs:
         podman run -e MPIEXEC_FLAGS="--allow-run-as-root --oversubscribe" --rm ${{ steps.build_image.outputs.image }}:${{ github.sha }} python3 -m pytest --junit-xml=test-results.xml tests/
 
     - name: Push To GHCR
+      if: github.event_name == 'push'
       uses: redhat-actions/push-to-registry@v2
       id: push
       with:
@@ -56,5 +59,6 @@ jobs:
           --disable-content-trust
 
     - name: Echo outputs
+      if: github.event_name == 'push'
       run: |
         echo "${{ toJSON(steps.push.outputs) }}"
diff --git a/.github/workflows/documentation-ci.yml b/.github/workflows/documentation-ci.yml
new file mode 100644
index 00000000..247e4ebf
--- /dev/null
+++ b/.github/workflows/documentation-ci.yml
@@ -0,0 +1,69 @@
+name: Documentation CI
+
+on:
+  pull_request:
+    branches: [ "master" ]
+
+permissions:
+  contents: read
+  id-token: write
+
+jobs:
+
+  linux:
+    runs-on: ubuntu-latest
+    timeout-minutes: 180
+    env:
+      MPIEXEC_FLAGS: "--allow-run-as-root --oversubscribe"
+      PYNUCLEUS_BUILD_PARALLELISM: 2
+
+    steps:
+    - name: Check out repo
+      uses: actions/checkout@v4
+
+    - name: Pull ccache cache
+      id: ccache-restore
+      uses: actions/cache/restore@v4
+      with:
+        path: /home/runner/.cache/ccache
+        key: ccache
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install Ubuntu packages
+      run: |
+        sudo apt-get update
+        sudo apt-get install mpi-default-bin mpi-default-dev libmetis-dev libparmetis-dev libsuitesparse-dev ccache
+
+    - name: Install Python dependencies
+      run: make prereq && make prereq-extra && python -m pip install wheel
+
+    - name: Install
+      run: make dev
+
+    - name: Remove ccache cache
+      if: ${{ steps.ccache-restore.outputs.cache-hit }}
+      shell: bash
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        gh extension install actions/gh-actions-cache
+        gh actions-cache delete ccache --confirm
+      continue-on-error: true
+
+    - name: Push ccache cache
+      uses: actions/cache/save@v4
+      with:
+        path: /home/runner/.cache/ccache
+        key: ccache
+
+    - name: Build documentation
+      run: |
+        make docs
+
+    - name: Upload artifact
+      uses: actions/upload-artifact@v4
+      with:
+        path: 'docs/build'
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index cd3e5767..15a83d9b 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -1,4 +1,3 @@
-
 name: Documentation
 
 on:
@@ -26,16 +25,16 @@ jobs:
 
     steps:
     - name: Check out repo
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Pull ccache cache
       id: ccache-restore
-      uses: actions/cache/restore@v3
+      uses: actions/cache/restore@v4
       with:
         path: /home/runner/.cache/ccache
         key: ccache
 
-    - uses: actions/setup-python@v4
+    - uses: actions/setup-python@v5
       with:
         python-version: '3.11'
 
@@ -61,7 +60,7 @@ jobs:
       continue-on-error: true
 
     - name: Push ccache cache
-      uses: actions/cache/save@v3
+      uses: actions/cache/save@v4
       with:
         path: /home/runner/.cache/ccache
         key: ccache
@@ -69,16 +68,15 @@ jobs:
     - name: Build documentation
       run: |
         make docs
-        cat docs/example1_stepMesh.py
 
     - name: Setup Pages
-      uses: actions/configure-pages@v3
+      uses: actions/configure-pages@v4
 
     - name: Upload artifact
-      uses: actions/upload-pages-artifact@v1
+      uses: actions/upload-pages-artifact@v3
       with:
         path: 'docs/build'
 
     - name: Deploy to GitHub Pages
       id: deployment
-      uses: actions/deploy-pages@v2
+      uses: actions/deploy-pages@v4
diff --git a/Dockerfile b/Dockerfile
index bedbb330..6fa9e3ed 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -25,6 +25,7 @@ RUN sed -i 's/Components: main/Components: main contrib non-free/' /etc/apt/sour
         libmetis-dev libparmetis-dev \
         texlive texlive-extra-utils texlive-latex-extra ttf-staypuft dvipng cm-super \
         jupyter-notebook \
+        emacs-nox vim \
   --no-install-recommends \
   && rm -rf /var/lib/apt/lists/*
 
@@ -36,12 +37,10 @@ WORKDIR /pynucleus
 RUN make prereq PIP_FLAGS=--no-cache-dir && \
     make prereq-extra PIP_FLAGS=--no-cache-dir && \
     make install && \
+    make docs && \
     python -m pip install --no-cache-dir ipykernel && \
     rm -rf build packageTools/build base/build metisCy/build fem/build multilevelSolver/build nl/build
 
-RUN echo "alias ls='ls --color=auto -FN'" >> /root/.bashrc \
-    && echo 'set completion-ignore-case On' >> /root/.inputrc
-
 # allow running MPI as root in the container
 # bind MPI ranks to hwthreads
 ENV OMPI_MCA_hwloc_base_binding_policy=hwthread \
@@ -53,4 +52,6 @@ RUN python -m ipykernel install --name=PyNucleus
 
 COPY README.container.rst /README.container.rst
 # hadolint ignore=SC2016
-RUN echo '[ ! -z "$TERM" -a -r /README.container.rst ] && cat /README.container.rst' >> /etc/bash.bashrc
+RUN echo '[ ! -z "$TERM" -a -r /README.container.rst ] && printf "\e[32m" && cat /README.container.rst && printf "\e[0m"' >> /etc/bash.bashrc
+
+WORKDIR /root
diff --git a/Makefile b/Makefile
index 3a73925f..296a6b19 100644
--- a/Makefile
+++ b/Makefile
@@ -115,8 +115,8 @@ clean_package :
 
 .PHONY: docs
 docs :
-	cd docs && make
 	$(PYTHON) -m sphinx -b html docs docs/build
+	find docs/build/_downloads -name "*.ipynb" | xargs -I {} cp {} examples
 
 clean_docs :
 	cd docs; rm -rf build
@@ -169,7 +169,7 @@ prereq:
 	$(PYTHON) -m pip install $(PIP_FLAGS) $(PIP_INSTALL_FLAGS) scikit-sparse
 
 prereq-extra:
-	$(PYTHON) -m pip install $(PIP_FLAGS) pytest pytest-html pytest-xdist Sphinx sphinxcontrib-programoutput flake8 flake8-junit-report cython-lint
+	$(PYTHON) -m pip install $(PIP_FLAGS) pytest pytest-html pytest-xdist Sphinx sphinxcontrib-programoutput sphinx-gallery sphinx-rtd-theme flake8 flake8-junit-report cython-lint
 
 flake8:
 	$(PYTHON) -m flake8 --output-file=flake8.txt --exit-zero drivers examples packageTools base metisCy fem multilevelSolver nl tests
diff --git a/README.container.rst b/README.container.rst
index 7dd78449..aa88c13b 100644
--- a/README.container.rst
+++ b/README.container.rst
@@ -1,6 +1,12 @@
 
 This is a container image for PyNucleus.
 
-The drivers and examples for PyNucleus can be found in /pynucleus/drivers and /pynucleus/examples
+The directory from which the container was launched on the host system is mapped to /root.
+PyNucleus is installed at /pynucleus.
+A copy of the drivers and examples for PyNucleus can be found in /root/drivers and /root/examples.
+The Jupyter notebook interface is available at https://localhost:8889 on the host.
+A quick way to check that everything works is to run
 
-The directory from which the container was launched on the host system is mapped to /user
+  /root/drivers/runFractional.py
+
+This should print some information about the solution of a fractional Laplacian problem and show several plots.
diff --git a/README.rst b/README.rst
index 94ff2aa8..f86fc654 100644
--- a/README.rst
+++ b/README.rst
@@ -73,10 +73,14 @@ and open ``docs/build/index.html`` in your browser.
 Possible ways to install and use PyNucleus
 ==================================
 
+There are several ways to install and run PyNucleus:
+
 * container image
 * Spack installation
 * manual installation
 
+The easiest way to get up and running is probably the container image.
+
 
 Container image
 ----------------
@@ -84,44 +88,47 @@ Container image
 The simplest way to use PyNucleus is to pull a container image from the GitHub Container Registry.
 This requires an installation of either
 
-* podman (https://podman.io/) and podman-compose (https://github.com/containers/podman-compose) or
-* Docker (https://www.docker.com/) and Docker Compose (https://docs.docker.com/compose/install/).
+* `podman <https://podman.io/>`_ and `podman-compose <https://github.com/containers/podman-compose?tab=readme-ov-file#installation>`_ or
+* `Docker <https://www.docker.com/>`_ and `Docker Compose <https://docs.docker.com/compose/install/>`_.
 
 For many Linux distributions these can be installed from the package repositories.
-In what follows we will assume that we are using podman.
+In what follows we will assume that we are using `podman`.
 All commands for Docker should be identical up to the substitution of `podman` with `docker`.
 
-For example, on Ubuntu podman can be installed with
+For example, on Ubuntu podman and podman-compose can be installed with
 
 .. code-block:: shell
 
    sudo apt-get install podman podman-compose
 
-Instructions for other platforms can be found here: https://podman.io/docs/installation
+Instructions for other platforms can be found `here <https://podman.io/docs/installation>`_.
+
+Once podman is installed, we download a copy of `compose.yaml <https://github.com/sandialabs/PyNucleus/blob/master/compose.yaml>`_ and save it to an empty directory.
+
+.. warning::
+   Please do not copy this file to your home directory and launch the container from there.
+   The container keeps its state in the directory where it is launched from.
 
-Once podman is installed, we can download a copy of https://github.com/sandialabs/PyNucleus/blob/master/compose.yaml and save it to an empty directory.
 In that directory we then run
 
 .. code-block:: shell
 
-   podman compose run pynucleus
+   podman-compose run pynucleus
+
+podman will download a container image for PyNucleus and then launch a shell in the container.
+
+.. note::
+   The download of the image will only happen once, but it could be several GB in size.
 
-This launches a shell on the container with PyNucleus.
 A simple way to test if things work is to run
 
 .. code-block:: shell
 
    drivers/runFractional.py
 
-This should print some information about the solution of a fractional Laplacian problem and open up several plots.
-
-For development using PyNucleus it can be useful to launch a Jupyter notebook server:
-
-.. code-block:: shell
-
-   podman compose up pynucleus-jupyter
+This should print some information about the solution of a fractional Laplacian problem and show several plots.
 
-and then open the Jupyter notebook interface at https://localhost:8889
+For development using PyNucleus there is the Jupyter notebook interface that is available while the container is running at https://localhost:8889 on the host system.
 
 
 Spack install
diff --git a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
index 7a46414d..7fe29652 100644
--- a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
+++ b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
@@ -97,6 +97,7 @@ cdef class {SCALAR_label}LinearOperator:
         cdef:
             np.ndarray[{SCALAR}_t, ndim=1] y
             {SCALAR}_t[::1] x_mv
+            {SCALAR_label}TimeStepperLinearOperator tsOp
         try:
             x_mv = x
             y = np.zeros((self.num_rows), dtype={SCALAR})
@@ -107,6 +108,9 @@ cdef class {SCALAR_label}LinearOperator:
                 return {SCALAR_label}Product_Linear_Operator(self, x)
             elif isinstance(self, {SCALAR_label}LinearOperator) and hasattr(x, 'ndim') and x.ndim == 2:
                 return self.dotMV(x)
+            elif isinstance(self, {SCALAR_label}TimeStepperLinearOperator) and isinstance(x, (float, int, {SCALAR})):
+                tsOp = self
+                return {SCALAR_label}TimeStepperLinearOperator(self, tsOp.M, tsOp.S, tsOp.facS*x, tsOp.facM*x)
             elif isinstance(self, {SCALAR_label}LinearOperator) and isinstance(x, (float, int, {SCALAR})):
                 return {SCALAR_label}Multiply_Linear_Operator(self, x)
             elif isinstance(x, {SCALAR_label}LinearOperator) and isinstance(self, (float, int, {SCALAR})):
@@ -339,6 +343,35 @@ cdef class {SCALAR_label}TimeStepperLinearOperator({SCALAR_label}LinearOperator)
             assign3(y, y, 1.0, self.z, self.facM)
         return 0
 
+    cdef INDEX_t matvecTrans(self,
+                             {SCALAR}_t[::1] x,
+                             {SCALAR}_t[::1] y) except -1:
+        if self.facS != 0.:
+            self.S.matvecTrans(x, y)
+            if self.facS != 1.0:
+                scaleScalar(y, self.facS)
+        if self.facM == 1.0:
+            self.M.matvecTrans_no_overwrite(x, y)
+        else:
+            self.M.matvecTrans(x, self.z)
+            assign3(y, y, 1.0, self.z, self.facM)
+        return 0
+
+    cdef INDEX_t matvecTrans_no_overwrite(self,
+                                          {SCALAR}_t[::1] x,
+                                          {SCALAR}_t[::1] y) except -1:
+        if self.facS == 1.0:
+            self.S.matvecTrans_no_overwrite(x, y)
+        elif self.facS != 0.:
+            self.S.matvecTrans(x, self.z)
+            assign3(y, y, 1.0, self.z, self.facS)
+        if self.facM == 1.0:
+            self.M.matvecTrans_no_overwrite(x, y)
+        elif self.facM != 0.:
+            self.M.matvecTrans(x, self.z)
+            assign3(y, y, 1.0, self.z, self.facM)
+        return 0
+
     def get_diagonal(self):
         return (self.facM*np.array(self.M.diagonal, copy=False) +
                 self.facS*np.array(self.S.diagonal, copy=False))
@@ -413,6 +446,22 @@ cdef class {SCALAR_label}Multiply_Linear_Operator({SCALAR_label}LinearOperator):
             scaleScalar(y, self.factor)
         return 0
 
+    cdef INDEX_t matvecTrans(self,
+                             {SCALAR}_t[::1] x,
+                             {SCALAR}_t[::1] y) except -1:
+        self.A.matvecTrans(x, y)
+        scaleScalar(y, self.factor)
+        return 0
+
+    cdef INDEX_t matvecTrans_no_overwrite(self,
+                                          {SCALAR}_t[::1] x,
+                                          {SCALAR}_t[::1] y) except -1:
+        if self.factor != 0.:
+            scaleScalar(y, 1./self.factor)
+            self.A.matvecTrans_no_overwrite(x, y)
+            scaleScalar(y, self.factor)
+        return 0
+
     def isSparse(self):
         return self.A.isSparse()
 
@@ -472,17 +521,31 @@ cdef class {SCALAR_label}Product_Linear_Operator({SCALAR_label}LinearOperator):
     cdef INDEX_t matvec(self,
                         {SCALAR}_t[::1] x,
                         {SCALAR}_t[::1] y) except -1:
-        self.B(x, self.temporaryMemory)
-        self.A(self.temporaryMemory, y)
+        self.B.matvec(x, self.temporaryMemory)
+        self.A.matvec(self.temporaryMemory, y)
         return 0
 
     cdef INDEX_t matvec_no_overwrite(self,
                                      {SCALAR}_t[::1] x,
                                      {SCALAR}_t[::1] y) except -1:
-        self.B(x, self.temporaryMemory)
+        self.B.matvec(x, self.temporaryMemory)
         self.A.matvec_no_overwrite(self.temporaryMemory, y)
         return 0
 
+    cdef INDEX_t matvecTrans(self,
+                             {SCALAR}_t[::1] x,
+                             {SCALAR}_t[::1] y) except -1:
+        self.A.matvecTrans(x, self.temporaryMemory)
+        self.B.matvecTrans(self.temporaryMemory, y)
+        return 0
+
+    cdef INDEX_t matvecTrans_no_overwrite(self,
+                                          {SCALAR}_t[::1] x,
+                                          {SCALAR}_t[::1] y) except -1:
+        self.A.matvecTrans(x, self.temporaryMemory)
+        self.B.matvecTrans_no_overwrite(self.temporaryMemory, y)
+        return 0
+
     cdef void _residual(self,
                         {SCALAR}_t[::1] x,
                         {SCALAR}_t[::1] rhs,
diff --git a/base/PyNucleus_base/utilsFem.py b/base/PyNucleus_base/utilsFem.py
index c6d1b720..61c80b67 100644
--- a/base/PyNucleus_base/utilsFem.py
+++ b/base/PyNucleus_base/utilsFem.py
@@ -1376,6 +1376,8 @@ def runDriver(path, py, python=None, timeout=600, ranks=None, cacheDir='',
             cache += str(ranks)
             runOutput += str(ranks)
         py += ['--test', '--testCache={}'.format(cache)]
+        if 'OVERWRITE_CACHE' in os.environ:
+            overwriteCache = True
         if overwriteCache:
             py += ['--overwriteCache']
     else:
diff --git a/compose.yaml b/compose.yaml
index 995e3abe..dc004c50 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -1,9 +1,8 @@
 version: 3
 
 services:
-
   # Launch with:
-  # docker compose run pynucleus
+  # podman-compose run pynucleus
   pynucleus:
     image: ghcr.io/sandialabs/pynucleus:latest
     build: .
@@ -16,30 +15,23 @@ services:
       - HTTP_PROXY=${HTTP_PROXY}
       - HTTPS_PROXY=${HTTPS_PROXY}
     volumes:
-      # The current directory on host gets mapped to /pynucleus/user in the container
-      - $PWD:/pynucleus/user
+      # The current directory on host gets mapped to /pynucleus/root in the container
+      - $PWD:/root
       # map files to container to allow GUI windows
       - /tmp/.X11-unix:/tmp/.X11-unix
       - $XAUTHORITY:/root/.Xauthority
-    network_mode: host
-    hostname: pynucleus-container
-
-  # Launch with:
-  # docker compose up
-  # Then open localhost:8889 in your browser
-  pynucleus-jupyter:
-    image: ghcr.io/sandialabs/pynucleus:latest
-    build: .
-    command: jupyter notebook --port=8888 --no-browser --ip=0.0.0.0 --allow-root --NotebookApp.token='' --NotebookApp.password='' --notebook-dir=/notebooks --KernelSpecManager.ensure_native_kernel=False --KernelSpecManager.allowed_kernelspecs=pynucleus
-    environment:
-      # expose host proxies
-      - http_proxy=${http_proxy}
-      - https_proxy=${https_proxy}
-      - HTTP_PROXY=${HTTP_PROXY}
-      - HTTPS_PROXY=${HTTPS_PROXY}
-    volumes:
-      # The notebook subdirectory on host gets mapped to /notebooks in the container
-      - $PWD/notebooks:/notebooks
     ports:
       # Expose a Jupyter notebook server from the container
       - 8889:8888
+    network_mode: host
+    hostname: pynucleus-container
+    command: >
+      sh -c "
+             mkdir -p /root/notebooks &&
+             mkdir -p /root/drivers &&
+             cp --update=none /pynucleus/examples/* /root/examples &&
+             cp --update=none /pynucleus/drivers/* /root/drivers &&
+             if [ ! -f /root/.bashrc ]; then echo \"alias ls='ls --color=auto -FN'\" >> /root/.bashrc ; fi &&
+             if [ ! -f /root/.inputrc ]; then echo \"set completion-ignore-case On\" >> /root/.inputrc ; fi &&
+             jupyter notebook --port=8888 --no-browser --ip=0.0.0.0 --allow-root --NotebookApp.token='' --NotebookApp.password='' --notebook-dir=/root/ --KernelSpecManager.ensure_native_kernel=False --KernelSpecManager.allowed_kernelspecs=pynucleus > /dev/null 2>&1 &
+             /bin/bash"
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index 5494e5b7..00000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-PYTHON ?= python3
-
-
-example1:
-	$(PYTHON) example1.py --export example1_stepMesh.py --finalTarget mesh
-	$(PYTHON) example1.py --export example1_stepDoFMap.py --finalTarget dofmap
-	$(PYTHON) example1.py --export example1_stepFunctions.py --finalTarget functions
-	$(PYTHON) example1.py --export example1_stepSolvers.py --finalTarget solvers
-	$(PYTHON) example1.py --export example1_stepErrors.py --finalTarget innerNorm
-
-	$(PYTHON) example2.py --export example2_stepKernelFracInf.py --finalTarget kernelFracInf
-	$(PYTHON) example2.py --export example2_stepMeshFracInf.py --finalTarget meshFracInf
-	$(PYTHON) example2.py --export example2_stepSolveFracInf.py --finalTarget solveFracInf
-	$(PYTHON) example2.py --export example2_stepFiniteHorizon.py --finalTarget finiteHorizon
diff --git a/docs/conf.py b/docs/conf.py
index e26e5f92..dcb9aba6 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -38,7 +38,8 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
     'sphinxcontrib.programoutput',
-    'matplotlib.sphinxext.plot_directive'
+    'matplotlib.sphinxext.plot_directive',
+    'sphinx_gallery.gen_gallery'
 ]
 
 autodoc_default_options = {
@@ -49,6 +50,26 @@
     'special-members': '__init__,__call__'
 }
 
+
+from sphinx_gallery.sorting import _SortKey
+
+examples_filenames_in_order = ['example_pde.py',
+                               'example_nonlocal.py',
+                               'example_InfHorizonDirichlet.py',
+                               'example_Neumann.py',
+                               'example_operator_interpolation.py']
+
+class ExplicitExampleOrder(_SortKey):
+    def __call__(self, filename):
+        return examples_filenames_in_order.index(filename)
+
+
+sphinx_gallery_conf = {
+    'examples_dirs': ['../examples'],
+    'filename_pattern': '/example',
+    'within_subsection_order': ExplicitExampleOrder
+}
+
 # Add any paths that contain templates here, relative to this directory.
 # templates_path = ['_templates']
 
@@ -63,10 +84,11 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'bizstyle'
+html_theme = 'sphinx_rtd_theme'
 
 html_theme_options = {
-    'sidebarwidth': '400px'
+    'sidebarwidth': '400px',
+    'collapse_navigation': False,
 }
 
 
diff --git a/docs/example1.py b/docs/example1.py
deleted file mode 100755
index 85c73819..00000000
--- a/docs/example1.py
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/usr/bin/env python3
-###################################################################################
-# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
-# U.S. Government retains certain rights in this software.                        #
-# If you want to use this code, please refer to the README.rst and LICENSE files. #
-###################################################################################
-
-from PyNucleus.packageTools.sphinxTools import codeRegionManager
-
-mgr = codeRegionManager()
-
-with mgr.add('imports'):
-    import matplotlib.pyplot as plt
-    from numpy import sqrt
-
-with mgr.add('mesh'):
-    ######################################################################
-    # Get a mesh and refine it
-
-    from PyNucleus import meshFactory
-
-with mgr.add('mesh', onlyIfFinal=True):
-    # show available options
-    meshFactory.print()
-
-with mgr.add('mesh'):
-    mesh = meshFactory('square', ax=0., ay=0., bx=1., by=1.)
-    for _ in range(3):
-        mesh = mesh.refine()
-with mgr.add('mesh', onlyIfFinal=True):
-    print('Mesh:', mesh)
-    plt.figure().gca().set_title('Mesh')
-    mesh.plot()
-
-with mgr.add('dofmap'):
-    ######################################################################
-    # Construct a finite element space
-    from PyNucleus import dofmapFactory
-
-with mgr.add('dofmap', onlyIfFinal=True):
-    # show available options
-    dofmapFactory.print()
-
-with mgr.add('dofmap'):
-    # We use piecewise linears
-    dm = dofmapFactory('P1', mesh)
-with mgr.add('dofmap', onlyIfFinal=True):
-    print('DoFMap:', dm)
-
-    plt.figure().gca().set_title('DoFMap')
-    dm.plot()
-
-with mgr.add('functions'):
-    ######################################################################
-    # Construct some simple functions
-    from PyNucleus import functionFactory
-
-with mgr.add('functions', onlyIfFinal=True):
-    # show available options
-    functionFactory.print()
-
-with mgr.add('functions'):
-    # functions defined via Python lambdas
-    rhs_1 = functionFactory('Lambda', lambda x: 2*x[0]*(1-x[0]) + 2*x[1]*(1-x[1]))
-    exact_solution_1 = functionFactory('Lambda', lambda x: x[0]*(1-x[0])*x[1]*(1-x[1]))
-
-    # Functions defined via Cython implementations -> faster evaluation
-    rhs_2 = functionFactory('rhsFunSin2D')
-    exact_solution_2 = functionFactory('solSin2D')
-
-    # assemble right-hand side vectors and interpolate the exact solutions
-    b1 = dm.assembleRHS(rhs_1)
-    u_interp_1 = dm.interpolate(exact_solution_1)
-
-with mgr.add('functions', onlyIfFinal=True):
-    print('Linear system RHS:', b1)
-    print('Interpolated solution:', u_interp_1)
-
-with mgr.add('functions'):
-    b2 = dm.assembleRHS(rhs_2)
-    u_interp_2 = dm.interpolate(exact_solution_2)
-
-with mgr.add('functions', onlyIfFinal=True):
-    plt.figure().gca().set_title('Interpolated solution')
-    u_interp_1.plot()
-
-with mgr.add('matrices'):
-    ######################################################################
-    # Assemble mass and Laplacian stiffness matrices
-    mass = dm.assembleMass()
-    laplacian = dm.assembleStiffness()
-with mgr.add('matrices', onlyIfFinal=True):
-    print('Linear system matrix:', laplacian)
-
-with mgr.add('solvers'):
-    ######################################################################
-    # Construct solvers
-    from PyNucleus import solverFactory
-
-with mgr.add('solvers', onlyIfFinal=True):
-    # show available options
-    solverFactory.print()
-
-with mgr.add('solvers'):
-    solver_direct = solverFactory('lu', A=laplacian)
-    solver_direct.setup()
-with mgr.add('solvers', onlyIfFinal=True):
-    print('Direct solver:', solver_direct)
-
-with mgr.add('solvers'):
-    solver_krylov = solverFactory('cg', A=laplacian)
-    solver_krylov.setup()
-    solver_krylov.maxIter = 100
-    solver_krylov.tolerance = 1e-8
-with mgr.add('solvers', onlyIfFinal=True):
-    print('Krylov solver:', solver_krylov)
-
-with mgr.add('solvers'):
-    u1 = dm.zeros()
-    solver_direct(b1, u1)
-
-    u2 = dm.zeros()
-    numIter = solver_krylov(b2, u2)
-with mgr.add('solvers', onlyIfFinal=True):
-    print('Number of iterations:', numIter)
-
-    plt.figure().gca().set_title('Error')
-    (u_interp_1-u1).plot(flat=True)
-
-with mgr.add('innerNorm', onlyIfFinal=True):
-    ######################################################################
-    # Inner products and norms
-    print('Residual norm 1st solve: ', (b1-laplacian*u1).norm())
-    print('Residual norm 2nd solve: ', (b2-laplacian*u2).norm())
-
-with mgr.add('innerNorm'):
-    # Compute errors
-    H10_error_1 = sqrt(b1.inner(u_interp_1-u1))
-    L2_error_1 = sqrt((u_interp_1-u1).inner(mass*(u_interp_1-u1)))
-    H10_error_2 = sqrt(b2.inner(u_interp_2-u2))
-    L2_error_2 = sqrt((u_interp_2-u2).inner(mass*(u_interp_2-u2)))
-
-with mgr.add('innerNorm', onlyIfFinal=True):
-    print('1st problem - H10:', H10_error_1, 'L2:', L2_error_1)
-    print('2nd problem - H10:', H10_error_2, 'L2:', L2_error_2)
-
-
-with mgr.add('final'):
-    plt.show()
diff --git a/docs/example1.rst b/docs/example1.rst
deleted file mode 100644
index f7349544..00000000
--- a/docs/example1.rst
+++ /dev/null
@@ -1,139 +0,0 @@
-
-Example 1 - A simple PDE problem
-================================
-
-In this first example, we will construct a finite element discretization of a classical PDE problem and solve it.
-The full code of this example can be found in `examples/example1.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example1.py>`_.
-
-Factories
----------
-
-The creation of different groups of objects, such as finite element spaces or meshes, use factories.
-The available classes that a factory provides can be displayed by calling the ``print()`` method of the factory.
-An object is built by passing the name of the desired class and additional parameters to the factory.
-If this sounds vague now, don't worry, the examples below will make it clear.
-
-Meshes
-------
-
-The first object we need to create is a mesh to support the finite element discretization.
-We start by construction a mesh for a square domain :math:`\Omega=[0, 1] \times [0, 1]` and refining it uniformly three times:
-
-.. literalinclude:: ../examples/example1.py
-   :start-after: Get a mesh
-   :end-before: #################
-   :lineno-match:
-
-The output of the above code snippet is given below.
-In particular, we see what other meshes we could have constructed using the ``meshFactory``, apart from 'square', and what parameters we can pass to the factory,
-We also see that we created a 2d mesh with 289 vertices and 512 cells.
-
-.. program-output:: python3 example1.py --finalTarget mesh
-
-.. plot:: example1_stepMesh.py
-
-Many PyNucleus objects have a ``plot`` method, similar to the mesh that we just created.
-
-DoFMaps
--------
-
-In the next step, we create a finite element space on the mesh.
-By default, we assume a Dirichlet condition on the entire boundary of the domain.
-We build a piecewise linear finite element space.
-
-.. literalinclude:: ../examples/example1.py
-   :start-after: Construct a finite element space
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example1.py --finalTarget dofmap
-
-.. plot:: example1_stepDoFMap.py
-
-Functions and vectors
----------------------
-
-Functions can either be defined in Python, or in Cython.
-The advantage of the latter is that their code is compiled, which speeds up evaluation significantly.
-A couple of compiled functions are already available via the ``functionFactory``.
-A generic Python function can be used via the ``Lambda`` function class.
-
-We will be solving the problem
-
-.. math::
-
-   -\Delta u &= f & \text{ in } \Omega, \\
-   u &= 0 & \text{ on } \partial \Omega,
-
-for two different forcing functions :math:`f`.
-
-We assemble the right-hand side
-
-.. math::
-
-   \int_\Omega f v
-
-of the linear system by calling the ``assembleRHS`` method of the DoFMap object, and interpolate the exact solutions into the finite element space.
-
-
-.. literalinclude:: ../examples/example1.py
-   :start-after: Construct some simple functions
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example1.py --finalTarget functions
-
-.. plot:: example1_stepFunctions.py
-
-Matrices
---------
-
-We assemble two matrices, the mass matrix
-
-.. math::
-
-   \int_\Omega u v
-
-and the stiffness matrix associated with the Laplacian
-
-.. math::
-
-   \int_\Omega \nabla u \cdot \nabla v
-
-.. literalinclude:: ../examples/example1.py
-   :start-after: Assemble mass
-   :end-before: #######
-   :lineno-match:
-
-.. program-output:: python3 example1.py --finalTarget matrices
-
-Solvers
--------
-
-Now that we have assembled our linear system, we want to solve it.
-We choose to solve one system using an LU solver, and the other one using a CG solver.
-
-.. literalinclude:: ../examples/example1.py
-   :start-after: Construct solvers
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example1.py --finalTarget solvers
-
-.. plot:: example1_stepSolvers.py
-
-Norms and inner products
-------------------------
-
-Finally, we want to check that we actually solved the system by computing residual errors.
-We also compute errors in :math:`H^1_0` and :math:`L^2` norms.
-
-.. literalinclude:: ../examples/example1.py
-   :start-after: Inner products
-   :end-before: plt.show
-   :lineno-match:
-
-.. program-output:: python3 example1.py --finalTarget innerNorm
-
-This concludes our first example.
-Next, we turn to nonlocal equations.
diff --git a/docs/example2.py b/docs/example2.py
deleted file mode 100755
index f7d98a40..00000000
--- a/docs/example2.py
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/usr/bin/env python3
-###################################################################################
-# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
-# U.S. Government retains certain rights in this software.                        #
-# If you want to use this code, please refer to the README.rst and LICENSE files. #
-###################################################################################
-
-from PyNucleus.packageTools.sphinxTools import codeRegionManager
-
-mgr = codeRegionManager()
-
-with mgr.add('imports'):
-    import matplotlib.pyplot as plt
-    from time import time
-
-with mgr.add('kernelFracInf'):
-    ######################################################################
-    # Get a fractional kernel
-    from PyNucleus import kernelFactory
-
-with mgr.add('kernelFracInf', onlyIfFinal=True):
-    # show available options
-    kernelFactory.print()
-
-with mgr.add('kernelFracInf'):
-    from numpy import inf
-    kernelFracInf = kernelFactory('fractional', dim=2, s=0.75, horizon=inf)
-
-with mgr.add('kernelFracInf', onlyIfFinal=True):
-    print(kernelFracInf)
-    plt.figure().gca().set_title('Fractional kernel')
-    kernelFracInf.plot()
-
-
-with mgr.add('meshFracInf'):
-    ######################################################################
-    # Generate an appropriate mesh
-    from PyNucleus import nonlocalMeshFactory, HOMOGENEOUS_DIRICHLET
-
-    # Get a mesh that is appropriate for the problem, i.e. with the required interaction domain.
-    meshFracInf, _ = nonlocalMeshFactory('disc', kernel=kernelFracInf, boundaryCondition=HOMOGENEOUS_DIRICHLET, hTarget=0.15)
-
-with mgr.add('meshFracInf', onlyIfFinal=True):
-    print(meshFracInf)
-    plt.figure().gca().set_title('Mesh for fractional kernel')
-    meshFracInf.plot()
-
-with mgr.add('assemblyFracInf'):
-    ######################################################################
-    # Assemble the operator
-    from PyNucleus import dofmapFactory, functionFactory
-
-    dmFracInf = dofmapFactory('P1', meshFracInf)
-
-    rhs = functionFactory('constant', 1.)
-    exact_solution = functionFactory('solFractional', dim=2, s=0.75)
-
-    b = dmFracInf.assembleRHS(rhs)
-    u_exact = dmFracInf.interpolate(exact_solution)
-    u = dmFracInf.zeros()
-
-    # Assemble the operator in dense format.
-    start = time()
-    A_fracInf = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='dense')
-with mgr.add('assemblyFracInf', onlyIfFinal=True):
-    print('Dense assembly took {}s'.format(time()-start))
-
-with mgr.add('assemblyFracInf'):
-    start = time()
-    A_fracInf_h2 = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='h2')
-with mgr.add('assemblyFracInf', onlyIfFinal=True):
-    print('Hierarchical assembly took {}s'.format(time()-start))
-
-    print(A_fracInf)
-    print(A_fracInf_h2)
-
-with mgr.add('solveFracInf'):
-    ######################################################################
-    # Solve the linear system
-    from PyNucleus import solverFactory
-    from numpy import sqrt
-
-    solver = solverFactory('lu', A=A_fracInf, setup=True)
-    solver(b, u)
-
-    Hs_err = sqrt(abs(b.inner(u-u_exact)))
-
-with mgr.add('solveFracInf', onlyIfFinal=True):
-    print('Hs error: {}'.format(Hs_err))
-    plt.figure().gca().set_title('Numerical solution, fractional kernel')
-    u.plot()
-
-with mgr.add('finiteHorizon'):
-    ######################################################################
-    # Solve a problem with finite horizon
-    kernelConst = kernelFactory('constant', dim=2, horizon=0.2)
-
-with mgr.add('finiteHorizon', onlyIfFinal=True):
-    print(kernelConst)
-    plt.figure().gca().set_title('Constant kernel')
-    kernelConst.plot()
-
-with mgr.add('finiteHorizon'):
-    from PyNucleus import DIRICHLET
-
-    meshConst, nIConst = nonlocalMeshFactory('square', kernel=kernelConst, boundaryCondition=DIRICHLET, hTarget=0.18)
-
-with mgr.add('finiteHorizon', onlyIfFinal=True):
-    print(meshConst)
-    plt.figure().gca().set_title('Mesh for constant kernel')
-    meshConst.plot()
-
-with mgr.add('finiteHorizon'):
-    dmConst = dofmapFactory('P1', meshConst, nIConst['domain'])
-    dmConstInteraction = dmConst.getComplementDoFMap()
-
-    A_const = dmConst.assembleNonlocal(kernelConst, matrixFormat='sparse')
-    B_const = dmConst.assembleNonlocal(kernelConst, dm2=dmConstInteraction, matrixFormat='sparse')
-
-    g = functionFactory('Lambda', lambda x: -(x[0]**2 + x[1]**2)/4)
-    g_interp = dmConstInteraction.interpolate(g)
-
-    b = dmConst.assembleRHS(rhs)-(B_const*g_interp)
-    u = dmConst.zeros()
-
-    solver = solverFactory('cg', A=A_const, setup=True)
-    solver.maxIter = 1000
-    solver.tolerance = 1e-8
-
-    solver(b, u)
-
-    u_global = u.augmentWithBoundaryData(g_interp)
-
-    plt.figure().gca().set_title('Numerical solution, constant kernel')
-    u_global.plot()
-
-    plt.figure().gca().set_title('Analytic solution, constant kernel')
-    u_global.dm.interpolate(g).plot()
-
-with mgr.add('finiteHorizon', onlyIfFinal=True):
-    print(A_const)
-
-with mgr.add('final'):
-    ######################################################################
-    plt.show()
diff --git a/docs/example2.rst b/docs/example2.rst
deleted file mode 100644
index 7d8cd90d..00000000
--- a/docs/example2.rst
+++ /dev/null
@@ -1,126 +0,0 @@
-
-Example 2 - Nonlocal problems
-=============================
-
-I this second example, we will assemble and solve several nonlocal equations.
-The full code of this example can be found in `examples/example2.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example2.py>`_.
-
-PyNucleus can assemble operators of the form
-
-.. math::
-
-   \mathcal{L}u(x) = \int_{\mathbb{R}^d} [u(y)-u(x)] \gamma(x, y) dy.
-
-The kernel :math:`\gamma` is of the form
-
-.. math::
-
-   \gamma(x,y) = \phi(x, y) |x-y|^{-\beta(x,y)} \chi_{V_\delta(x)}(y).
-
-Here, :math:`\phi` is a positive function, and :math:`\chi` is the indicator function.
-:math:`0<\delta\le\infty` is called the horizon and determines the size of the kernel support :math:`V_\delta(x) \subset \mathbb{R}^d`.
-The singularity :math:`\beta` of the kernel depends on the family of kernels:
-
-- fractional type: :math:`\beta(x,y)=d+2s(x,y)`, where :math:`d` is the spatial dimension and :math:`s(x,y)` is the fractional order.
-- constant type :math:`\beta(x,y)=0`
-- peridynamic type :math:`\beta(x,y)=1`
-
-At present, the only implemented interaction regions are balls in the 2-norm:
-
-.. math::
-
-   V_{\delta}^{(2)}(x) = \{y \in \mathbb{R}^d | ||x-y||_2<\delta\}.
-
-
-A fractional kernel
--------------------
-
-We start off by creating a fractional kernel with infinite horizon and constant fractional order :math:`s=0.75`.
-
-.. literalinclude:: ../examples/example2.py
-   :start-after: Get a fractional kernel
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example2.py --finalTarget kernelFracInf
-
-.. plot:: example2_stepKernelFracInf.py
-
-By default, kernels are normalized. This can be disabled by passing `normalized=False`.
-
-
-Nonlocal assembly
------------------
-
-We will be solving the problem
-
-.. math::
-
-   -\mathcal{L} u &= f && \text{ in } \Omega=B(0,1)\subset\mathbb{R}^2, \\
-   u &= 0 && \text{ in } \mathbb{R}^2 \setminus \Omega,
-
-for constant forcing function :math:`f=1`.
-
-First, we generate a mesh.
-Instead of the `meshFactory` used in the previous example, we now use the `nonlocalMeshFactory`.
-The advantage is that this factory can generate meshes with appropriate interaction domains.
-For this particular example, the factory will not generate any interaction domain, since the homogeneous Dirichlet condition on :math:`\mathbb{R}^2\setminus\Omega` can be enforced via a boundary integral.
-
-.. literalinclude:: ../examples/example2.py
-   :start-after: Generate an appropriate mesh
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example2.py --finalTarget meshFracInf
-
-.. plot:: example2_stepMeshFracInf.py
-
-Next, we obtain a piecewise linear, continuous DoFMap on the mesh, assemble the RHS and interpolate the known analytic solution.
-We assemble the nonlocal operator by passing the kernel to the `assembleNonlocal` method of the DoFMap object.
-The optional parameter `matrixFormat` determines what kind of linear operator is assembled.
-We time the assembly of the operator as a dense matrix, and as a hierarchical matrix, and inspect the resulting objects.
-
-.. literalinclude:: ../examples/example2.py
-   :start-after: Assemble the operator
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example2.py --finalTarget assemblyFracInf
-
-It can be observed that both assembly routines take roughly the same amount of time.
-The reason for this is that the operator itself has quite small dimensions.
-For larger number of unknowns, we expect the hierarchical assembly scale like :math:`\mathcal{O}(N \log^{2d} N)`, whereas the dense assembly will scale at best like :math:`\mathcal{O}(N^2)`.
-
-Similar to the local PDE example, we can then solve the resulting linear equation and compute the error in energy norm.
-
-.. literalinclude:: ../examples/example2.py
-   :start-after: Solve the linear system
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example2.py --finalTarget solveFracInf
-
-.. plot:: example2_stepSolveFracInf.py
-
-
-A finite horizon case
----------------------
-
-Next, we solve a nonlocal Poisson problem involving a constant kernel with finite horizon.
-We will choose :math:`\gamma(x,y) \sim \chi_{V_{\delta}^{(2)}(x)}(y)` for :math:`\delta=0.2`, and solve
-
-.. math::
-
-   -\mathcal{L} u &= f && \text{ in } \Omega=[0,1]^2, \\
-   u &= -(x_1^2 + x_2^2)/4 && \text{ in } \mathcal{I},
-
-where :math:`\mathcal{I}:=\{y\in\mathbb{R}^2\setminus\Omega | \exists x\in\Omega: \gamma(x,y)\neq 0\}` is the interaction domain.
-
-.. literalinclude:: ../examples/example2.py
-   :start-after: Solve a problem with finite horizon
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example2.py --finalTarget finiteHorizon
-
-.. plot:: example2_stepFiniteHorizon.py
diff --git a/docs/example3.py b/docs/example3.py
deleted file mode 100755
index d96d0962..00000000
--- a/docs/example3.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/bin/env python3
-###################################################################################
-# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
-# U.S. Government retains certain rights in this software.                        #
-# If you want to use this code, please refer to the README.rst and LICENSE files. #
-###################################################################################
-
-from PyNucleus.packageTools.sphinxTools import codeRegionManager
-
-mgr = codeRegionManager()
-
-with mgr.add('preamble'):
-    ######################################################################
-    # preamble
-    import logging
-    from PyNucleus_base.utilsFem import TimerManager
-    from PyNucleus import meshFactory, dofmapFactory, kernelFactory, functionFactory, solverFactory
-    from PyNucleus_nl.operatorInterpolation import admissibleSet
-    import h5py
-    from PyNucleus_base.linear_operators import LinearOperator
-    from PyNucleus_fem.DoFMaps import DoFMap
-
-    fmt = '{message}'
-    logging.basicConfig(level=logging.INFO,
-                        format=fmt,
-                        style='{',
-                        datefmt="%Y-%m-%d %H:%M:%S")
-    logger = logging.getLogger('__main__')
-    timer = TimerManager(logger)
-
-with mgr.add('setup'):
-    # Set up a mesh and a dofmap on it.
-    mesh = meshFactory('interval', hTarget=2e-3, a=-1, b=1)
-    dm = dofmapFactory('P1', mesh)
-
-    # Construct a RHS vector and a vector for the solution.
-    f = functionFactory('constant', 1.)
-    b = dm.assembleRHS(f)
-    u = dm.zeros()
-
-    # construct a fractional kernel with fractional order in S = [0.05, 0.95]
-    s = admissibleSet([0.05, 0.95])
-    kernel = kernelFactory('fractional', s=s, dim=mesh.dim)
-
-with mgr.add('operator'):
-    ######################################################################
-    # The operator is set up to be constructed on-demand.
-    # We partition the interval S into several sub-interval and construct a Chebyshev interpolant on each sub-interval.
-    # Therefore this operation is fast.
-    with timer('operator creation'):
-        A = dm.assembleNonlocal(kernel, matrixFormat='H2')
-
-with mgr.add('firstSolve'):
-    ######################################################################
-    # Set s = 0.75.
-    A.set(0.75)
-
-    # Let's solve a system.
-    # This triggers the assembly of the operators for the matrices at the interpolation nodes of the interval that contains s.
-    # The required matrices are constructed on-demand and then stay in memory.
-    with timer('solve 1 (slow)'):
-        solver = solverFactory('cg-jacobi', A=A, setup=True)
-        solver.maxIter = 1000
-        numIter = solver(b, u)
-    logger.info('Solved problem for s={} in {} iterations (residual norm {})'.format(A.get(), numIter, solver.residuals[-1]))
-
-with mgr.add('secondSolve'):
-    ######################################################################
-    # Let's solve a second sytem for a closeby value of s.
-    # This should be faster since we no longer need to assemble any matrices.
-    with timer('solve 2 (fast)'):
-        A.set(0.76)
-        solver = solverFactory('cg-jacobi', A=A, setup=True)
-        solver.maxIter = 1000
-        numIter = solver(b, u)
-    logger.info('Solved problem for s={} in {} iterations (residual norm {})'.format(A.get(), numIter, solver.residuals[-1]))
-
-with mgr.add('saveToFile'):
-    ######################################################################
-    # We can save the operator and the DoFMap to a file.
-    # This will trigger the assembly of all matrices.
-    with timer('save operator'):
-        h5_file = h5py.File('test.hdf5', 'w')
-        A.HDF5write(h5_file.create_group('A'))
-        dm.HDF5write(h5_file.create_group('dm'))
-        h5_file.close()
-
-with mgr.add('readFromFile'):
-    ######################################################################
-    # Now we can read them back in.
-    with timer('load operator'):
-        h5_file = h5py.File('test.hdf5', 'r')
-        A_2 = LinearOperator.HDF5read(h5_file['A'])
-        dm_2 = DoFMap.HDF5read(h5_file['dm'])
-        h5_file.close()
-
-with mgr.add('thirdSolve'):
-    # Set up and solve a system with the operator we loaded.
-    f_2 = functionFactory('constant', 2.)
-    b_2 = dm_2.assembleRHS(f_2)
-    u_2 = dm_2.zeros()
-    with timer('solve 3 (fast)'):
-        A_2.set(0.8)
-        solver = solverFactory('cg-jacobi', A=A_2, setup=True)
-        solver.maxIter = 1000
-        numIter = solver(b_2, u_2)
-    logger.info('Solved problem for s={} in {} iterations (residual norm {})'.format(A_2.get(), numIter, solver.residuals[-1]))
-    ######################################################################
diff --git a/docs/example3.rst b/docs/example3.rst
deleted file mode 100644
index e2a5f230..00000000
--- a/docs/example3.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-
-Example 3 - Operator interpolation
-==================================
-
-This example demostrates the construction of a family of fractional
-Laplacians parametrized by the fractional order using operator
-interpolation. This can reduce the cost compared to assembling a new
-matrix for each value.
-
-The fractional Laplacian
-
-.. math::
-
-  (-\Delta)^{s} \text{ for } s \in [s_{\min}, s_{\max}] \subset (0, 1)
-
-is approximated by
-
-.. math::
-
-   (-\Delta)^{s} \approx \sum_{m=0}^{M} \Theta_{k,m}(s) (-\Delta)^{s_{k,m}} \text{ for } s \in \mathcal{S}_{k}
-
-for a sequence of intervals :math:`\mathcal{S}_{k}` that cover :math:`[s_{\min}, s_{\max}]` and scalar coefficients :math:`\Theta_{k,m}(s)`.
-The number of intervals and interpolation nodes is picked so that the interpolation error is dominated by the discretization error.
-
-The following example can be found at `examples/example3.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example3.py>`_.
-
-We set up a mesh, a dofmap and a fractional kernel.
-Instead of specifying a single value for the fractional order, we allow a range of values :math:`[s_{\min}, s_{\max}]=[0.05, 0.95]`.
-
-.. literalinclude:: ../examples/example3.py
-   :start-after: preamble
-   :end-before: #################
-   :lineno-match:
-
-Next, we call the assembly of a nonlocal operator as before.
-Since the operator for a particular value of the fractional order will be constructed on demand, this operation is fast.
-
-.. literalinclude:: ../examples/example3.py
-   :start-after: The operator is set up to be constructed on-demand
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example3.py --finalTarget operator
-
-Next, we choose the value of the fractional order. This needs to be within the range that we specified earlier.
-We then solve a linear system involving the operator.
-
-.. literalinclude:: ../examples/example3.py
-   :start-after: Set s = 0.75
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example3.py --finalTarget firstSolve
-
-This solve is relatively slow, as it involves the assembly of the nonlocal operators that are needed for the interpolation.
-We select a different value for the fractional order that is close to the first.
-Solving a linear system with this value is faster as we have already assembled the operator needed for the interpolation.
-
-.. literalinclude:: ../examples/example3.py
-   :start-after: This should be faster
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example3.py --finalTarget secondSolve
-
-Next, we save the operator to file.
-This first triggers the assembly of all operators nescessary to represent every value in :math:`s\in[0.05,0.95]`.
-
-.. literalinclude:: ../examples/example3.py
-   :start-after: We can save the operator
-   :end-before: #################
-   :lineno-match:
-
-.. program-output:: python3 example3.py --finalTarget saveToFile
-
-Next, we read the operator back in and solve another linear system.
-
-.. literalinclude:: ../examples/example3.py
-   :start-after: Now we can read them
-   :lineno-match:
-
-.. program-output:: python3 example3.py --finalTarget thirdSolve
diff --git a/docs/features.rst b/docs/features.rst
deleted file mode 100644
index ad222014..00000000
--- a/docs/features.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-
-Features
-========
-
-* Simplical meshes in 1D, 2D, 3D
-
-* Dense and sparse (CSR, symmetric CSR, hierarchical) matrix formats
-
-* Finite Elements:
-
-  * continuous P1, P2, P3 spaces,
-  * discontinuous P0 space
-
-* Solvers/preconditioners:
-
-  * LU,
-  * Cholesky,
-  * incomplete LU & Cholesky,
-  * CG,
-  * BiCGStab,
-  * GMRES,
-  * geometric multigrid
-
-* Assembly of local operators
-
-* Nonlocal kernels:
-
-  * Finite and infinite horizon
-  * Interaction domains defined with respect to different norms
-  * Singularities: fractional, peridynamic, indicator kernel
-  * spatially variable kernels
-
-* Nonlocal assembly (1D and 2D) into dense, sparse and hierarchical matrices
-
-* Distributed computing using MPI
-
-* Partitioning using METIS / ParMETIS
diff --git a/docs/index.rst b/docs/index.rst
index 79a83cd1..cd132e06 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -64,15 +64,10 @@ Getting started
    installation
 
 
-Examples
---------
-
 .. toctree::
    :maxdepth: 2
 
-   example1
-   example2
-   example3
+   auto_examples/index
 
 Drivers
 -------
diff --git a/docs/installation.rst b/docs/installation.rst
index 8a5d37f3..160327c3 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -1,60 +1,68 @@
 
-Possible ways to install and use PyNucleus
-==========================================
+Installation
+============
+
+There are several ways to install and run PyNucleus:
 
 * container image
 * Spack installation
 * manual installation
 
+The easiest way to get up and running is probably the container image.
 
-Container image
-----------------
+Pre-built container image
+-------------------------
 
 The simplest way to use PyNucleus is to pull a container image from the GitHub Container Registry.
 This requires an installation of either
 
-* podman (https://podman.io/) and podman-compose (https://github.com/containers/podman-compose) or
-* Docker (https://www.docker.com/) and Docker Compose (https://docs.docker.com/compose/install/).
+* `podman <https://podman.io/>`_ and `podman-compose <https://github.com/containers/podman-compose?tab=readme-ov-file#installation>`_ or
+* `Docker <https://www.docker.com/>`_ and `Docker Compose <https://docs.docker.com/compose/install/>`_.
 
 For many Linux distributions these can be installed from the package repositories.
-In what follows we will assume that we are using podman.
+In what follows we will assume that we are using `podman`.
 All commands for Docker should be identical up to the substitution of `podman` with `docker`.
 
-For example, on Ubuntu podman can be installed with
+For example, on Ubuntu podman and podman-compose can be installed with
 
 .. code-block:: shell
 
    sudo apt-get install podman podman-compose
 
-Instructions for other platforms can be found here: https://podman.io/docs/installation
+Instructions for other platforms can be found `here <https://podman.io/docs/installation>`_.
+
+Once podman is installed, we download a copy of `compose.yaml <https://github.com/sandialabs/PyNucleus/blob/master/compose.yaml>`_ and save it to an empty directory.
+
+.. warning::
+   Please do not copy this file to your home directory and launch the container from there.
+   The container keeps its state in the directory where it is launched from.
 
-Once podman is installed, we can download a copy of https://github.com/sandialabs/PyNucleus/blob/master/compose.yaml and save it to an empty directory.
 In that directory we then run
 
 .. code-block:: shell
 
-   podman compose run pynucleus
+   podman-compose run pynucleus
+
+podman will download a container image for PyNucleus and then launch a shell in the container.
+
+.. note::
+   The download of the image will only happen once, but it could be several GB in size.
 
-This launches a shell on the container with PyNucleus.
 A simple way to test if things work is to run
 
 .. code-block:: shell
 
    drivers/runFractional.py
 
-This should print some information about the solution of a fractional Laplacian problem and open up several plots.
-
-For development using PyNucleus it can be useful to launch a Jupyter notebook server:
-
-.. code-block:: shell
+This should print some information about the solution of a fractional Laplacian problem and show several plots.
 
-   podman compose up pynucleus-jupyter
+For development using PyNucleus there is the Jupyter notebook interface that is available while the container is running at https://localhost:8889 on the host system.
 
-and then open the Jupyter notebook interface at https://localhost:8889
 
+Spack installation
+------------------
 
-Spack install
--------------
+This installation compiles PyNucleus and all its dependencies from scratch.
 
 In order to install Spack itself, follow the instructions at https://github.com/spack/spack.
 
diff --git a/drivers/runNonlocal.py b/drivers/runNonlocal.py
index 7286dcf7..97122936 100755
--- a/drivers/runNonlocal.py
+++ b/drivers/runNonlocal.py
@@ -7,7 +7,7 @@
 ###################################################################################
 
 from mpi4py import MPI
-from PyNucleus import driver, NEUMANN
+from PyNucleus import driver
 from PyNucleus.nl import (nonlocalPoissonProblem,
                           discretizedNonlocalProblem)
 
diff --git a/examples/README.rst b/examples/README.rst
new file mode 100644
index 00000000..347c2770
--- /dev/null
+++ b/examples/README.rst
@@ -0,0 +1,2 @@
+Examples
+--------
diff --git a/examples/example1.py b/examples/example1.py
deleted file mode 100644
index 2f13dcc9..00000000
--- a/examples/example1.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-###################################################################################
-# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
-# U.S. Government retains certain rights in this software.                        #
-# If you want to use this code, please refer to the README.rst and LICENSE files. #
-###################################################################################
-
-import matplotlib.pyplot as plt
-from numpy import sqrt
-
-######################################################################
-# Get a mesh and refine it
-
-from PyNucleus import meshFactory
-
-# show available options
-meshFactory.print()
-
-mesh = meshFactory('square', ax=0., ay=0., bx=1., by=1.)
-for _ in range(3):
-    mesh = mesh.refine()
-
-print('Mesh:', mesh)
-plt.figure().gca().set_title('Mesh')
-mesh.plot()
-
-######################################################################
-# Construct a finite element space
-from PyNucleus import dofmapFactory
-
-# show available options
-dofmapFactory.print()
-
-# We use piecewise linears
-dm = dofmapFactory('P1', mesh)
-
-print('DoFMap:', dm)
-
-plt.figure().gca().set_title('DoFMap')
-dm.plot()
-
-######################################################################
-# Construct some simple functions
-from PyNucleus import functionFactory
-
-# show available options
-functionFactory.print()
-
-# functions defined via Python lambdas
-rhs_1 = functionFactory('Lambda', lambda x: 2*x[0]*(1-x[0]) + 2*x[1]*(1-x[1]))
-exact_solution_1 = functionFactory('Lambda', lambda x: x[0]*(1-x[0])*x[1]*(1-x[1]))
-
-# Functions defined via Cython implementations -> faster evaluation
-rhs_2 = functionFactory('rhsFunSin2D')
-exact_solution_2 = functionFactory('solSin2D')
-
-# assemble right-hand side vectors and interpolate the exact solutions
-b1 = dm.assembleRHS(rhs_1)
-u_interp_1 = dm.interpolate(exact_solution_1)
-
-print('Linear system RHS:', b1)
-print('Interpolated solution:', u_interp_1)
-
-b2 = dm.assembleRHS(rhs_2)
-u_interp_2 = dm.interpolate(exact_solution_2)
-
-plt.figure().gca().set_title('Interpolated solution')
-u_interp_1.plot()
-
-######################################################################
-# Assemble mass and Laplacian stiffness matrices
-mass = dm.assembleMass()
-laplacian = dm.assembleStiffness()
-
-print('Linear system matrix:', laplacian)
-
-######################################################################
-# Construct solvers
-from PyNucleus import solverFactory
-
-# show available options
-solverFactory.print()
-
-solver_direct = solverFactory('lu', A=laplacian)
-solver_direct.setup()
-
-print('Direct solver:', solver_direct)
-
-solver_krylov = solverFactory('cg', A=laplacian)
-solver_krylov.setup()
-solver_krylov.maxIter = 100
-solver_krylov.tolerance = 1e-8
-
-print('Krylov solver:', solver_krylov)
-
-u1 = dm.zeros()
-solver_direct(b1, u1)
-
-u2 = dm.zeros()
-numIter = solver_krylov(b2, u2)
-
-print('Number of iterations:', numIter)
-
-plt.figure().gca().set_title('Error')
-(u_interp_1-u1).plot(flat=True)
-
-######################################################################
-# Inner products and norms
-print('Residual norm 1st solve: ', (b1-laplacian*u1).norm())
-print('Residual norm 2nd solve: ', (b2-laplacian*u2).norm())
-
-# Compute errors
-H10_error_1 = sqrt(b1.inner(u_interp_1-u1))
-L2_error_1 = sqrt((u_interp_1-u1).inner(mass*(u_interp_1-u1)))
-H10_error_2 = sqrt(b2.inner(u_interp_2-u2))
-L2_error_2 = sqrt((u_interp_2-u2).inner(mass*(u_interp_2-u2)))
-
-print('1st problem - H10:', H10_error_1, 'L2:', L2_error_1)
-print('2nd problem - H10:', H10_error_2, 'L2:', L2_error_2)
-
-plt.show()
-
diff --git a/examples/example2.py b/examples/example2.py
deleted file mode 100644
index 72673ca0..00000000
--- a/examples/example2.py
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env python3
-###################################################################################
-# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
-# U.S. Government retains certain rights in this software.                        #
-# If you want to use this code, please refer to the README.rst and LICENSE files. #
-###################################################################################
-
-import matplotlib.pyplot as plt
-from time import time
-
-######################################################################
-# Get a fractional kernel
-from PyNucleus import kernelFactory
-
-# show available options
-kernelFactory.print()
-
-from numpy import inf
-kernelFracInf = kernelFactory('fractional', dim=2, s=0.75, horizon=inf)
-
-print(kernelFracInf)
-plt.figure().gca().set_title('Fractional kernel')
-kernelFracInf.plot()
-
-######################################################################
-# Generate an appropriate mesh
-from PyNucleus import nonlocalMeshFactory, HOMOGENEOUS_DIRICHLET
-
-# Get a mesh that is appropriate for the problem, i.e. with the required interaction domain.
-meshFracInf, _ = nonlocalMeshFactory('disc', kernel=kernelFracInf, boundaryCondition=HOMOGENEOUS_DIRICHLET, hTarget=0.15)
-
-print(meshFracInf)
-plt.figure().gca().set_title('Mesh for fractional kernel')
-meshFracInf.plot()
-
-######################################################################
-# Assemble the operator
-from PyNucleus import dofmapFactory, functionFactory
-
-dmFracInf = dofmapFactory('P1', meshFracInf)
-
-rhs = functionFactory('constant', 1.)
-exact_solution = functionFactory('solFractional', dim=2, s=0.75)
-
-b = dmFracInf.assembleRHS(rhs)
-u_exact = dmFracInf.interpolate(exact_solution)
-u = dmFracInf.zeros()
-
-# Assemble the operator in dense format.
-start = time()
-A_fracInf = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='dense')
-
-print('Dense assembly took {}s'.format(time()-start))
-
-start = time()
-A_fracInf_h2 = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='h2')
-
-print('Hierarchical assembly took {}s'.format(time()-start))
-
-print(A_fracInf)
-print(A_fracInf_h2)
-
-######################################################################
-# Solve the linear system
-from PyNucleus import solverFactory
-from numpy import sqrt
-
-solver = solverFactory('lu', A=A_fracInf, setup=True)
-solver(b, u)
-
-Hs_err = sqrt(abs(b.inner(u-u_exact)))
-
-print('Hs error: {}'.format(Hs_err))
-plt.figure().gca().set_title('Numerical solution, fractional kernel')
-u.plot()
-
-######################################################################
-# Solve a problem with finite horizon
-kernelConst = kernelFactory('constant', dim=2, horizon=0.2)
-
-print(kernelConst)
-plt.figure().gca().set_title('Constant kernel')
-kernelConst.plot()
-
-from PyNucleus import DIRICHLET
-
-meshConst, nIConst = nonlocalMeshFactory('square', kernel=kernelConst, boundaryCondition=DIRICHLET, hTarget=0.18)
-
-print(meshConst)
-plt.figure().gca().set_title('Mesh for constant kernel')
-meshConst.plot()
-
-dmConst = dofmapFactory('P1', meshConst, nIConst['domain'])
-dmConstInteraction = dmConst.getComplementDoFMap()
-
-A_const = dmConst.assembleNonlocal(kernelConst, matrixFormat='sparse')
-B_const = dmConst.assembleNonlocal(kernelConst, dm2=dmConstInteraction, matrixFormat='sparse')
-
-g = functionFactory('Lambda', lambda x: -(x[0]**2 + x[1]**2)/4)
-g_interp = dmConstInteraction.interpolate(g)
-
-b = dmConst.assembleRHS(rhs)-(B_const*g_interp)
-u = dmConst.zeros()
-
-solver = solverFactory('cg', A=A_const, setup=True)
-solver.maxIter = 1000
-solver.tolerance = 1e-8
-
-solver(b, u)
-
-u_global = u.augmentWithBoundaryData(g_interp)
-
-plt.figure().gca().set_title('Numerical solution, constant kernel')
-u_global.plot()
-
-plt.figure().gca().set_title('Analytic solution, constant kernel')
-u_global.dm.interpolate(g).plot()
-
-print(A_const)
-
-######################################################################
-plt.show()
-
diff --git a/examples/example_InfHorizonDirichlet.py b/examples/example_InfHorizonDirichlet.py
new file mode 100755
index 00000000..0d4c052c
--- /dev/null
+++ b/examples/example_InfHorizonDirichlet.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+"""
+Dirichlet condition for infinite horizon kernel
+==================================================================================
+"""
+
+# %%
+# The following example can be found at `examples/example_InfHorizonDirichlet.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example_InfHorizonDirichlet.py>`_ in the PyNucleus repository.
+#
+# We want to solve a problem with infinite horizon fractional kernel
+#
+# .. math::
+#
+#    \gamma(x,y) = \frac{c}{|x-y|^{d+2s}},
+#
+# where :math:`c` is the usual normalization constant, and imposes an inhomogeneous Dirichlet volume condition.
+# We will have to make some assumption on the volume condition in order to make it computationally tractable.
+# Here we assume that it is zero at some distance away from the domain.
+#
+# .. math::
+#
+#    (-\Delta)^s u &= f &&~in~ \Omega:=B_{1/2}(0),\\
+#    u &= g  &&~in~ \Omega_\mathcal{I}:=B_{1}(0)\setminus B_{1/2}(0), \\
+#    u &= 0  &&~in~ \mathbb{R}^d \setminus B_{1}(0),
+#
+# where :math:`f\equiv 1` and :math:`$g` is chosen to match the known exact solution :math:`u(x)=C(1-|x|^2)_+^s` with some constant :math:`C`.
+
+import numpy as np
+import matplotlib.pyplot as plt
+from PyNucleus import meshFactory, dofmapFactory, kernelFactory, functionFactory, solverFactory
+
+# %%
+# Construct a mesh for :math:`\Omega\cup \Omega_\mathcal{I}` and set up degree of freedom maps on :math:`\Omega` and :math:`\Omega_\mathcal{I}`.
+
+radius = 1.0
+mesh = meshFactory('disc', n=8, radius=radius)
+for _ in range(4):
+    mesh = mesh.refine()
+
+# %%
+# Get an indicator function for :math:`\Omega`.
+# Subtract 1e-6 to avoid roundoff errors for nodes that are exactly on :math:`|x| = 0.5`.
+OmegaIndicator = functionFactory('radialIndicator', 0.5*radius-1e-6)
+dm = dofmapFactory('P1', mesh, OmegaIndicator)
+dmBC = dm.getComplementDoFMap()
+
+plt.figure()
+dm.plot(printDoFIndices=False)
+plt.figure()
+dmBC.plot(printDoFIndices=False)
+
+# %%
+# Set up the kernel, rhs and known solution.
+
+s = 0.75
+kernel = kernelFactory('fractional', dim=mesh.dim, s=s, horizon=np.inf)
+rhs = functionFactory('constant', 1.)
+uex = functionFactory('solFractional', dim=mesh.dim, s=s, radius=radius)
+
+# %%
+# Assemble the linear system
+#
+# .. math::
+#
+#   A_{\Omega,\Omega} u_{\Omega} + A_{\Omega,\Omega_\mathcal{I}} u_{\Omega_\mathcal{I}} &= f \\
+#                    u_{\Omega_\mathcal{I}} &= g
+#
+# Set up a solver for :math:`A_{\Omega,\Omega}`.
+
+A_OmegaOmega = dm.assembleNonlocal(kernel, matrixFormat='H2')
+A_OmegaOmegaI = dm.assembleNonlocal(kernel, dm2=dmBC)
+f = dm.assembleRHS(rhs)
+g = g = dmBC.interpolate(uex)
+solver = solverFactory('lu', A=A_OmegaOmega, setup=True)
+
+# %%
+# Compute FE solution and error between FE solution and interpolation of analytic expression
+
+u_Omega = dm.zeros()
+solver(f-A_OmegaOmegaI*g, u_Omega)
+u = u_Omega.augmentWithBoundaryData(g)
+err = u.dm.interpolate(uex) - u
+
+# %%
+# Plot FE solution and error
+
+plt.figure()
+plt.title('FE solution')
+u.plot(flat=True)
+plt.figure()
+plt.title('error')
+err.plot(flat=True)
diff --git a/examples/example_Neumann.py b/examples/example_Neumann.py
new file mode 100755
index 00000000..569bffa3
--- /dev/null
+++ b/examples/example_Neumann.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+"""
+Neumann condition for finite horizon kernel
+==============================================================================
+"""
+
+# %%
+# The following example can be found at `examples/example_Neumann.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example_Neumann.py>`_ in the PyNucleus repository.
+#
+# .. math::
+#
+#    \gamma(x,y) = c(\delta) \chi_{B_\delta(x)}(y),
+#
+# where :math:`c(\delta)` is the usual normalization constant.
+#
+# .. math::
+#
+#    \int_{\Omega\cup\Omega_\mathcal{I}} (u(x)-u(y))\gamma(x,y)  &= f(x) &&~in~ \Omega:=B_{1}(0),\\
+#    \int_{\Omega\cup\Omega_\mathcal{I}} (u(x)-u(y))\gamma(x,y)  &= g(x)  &&~in~ \Omega_\mathcal{I}:=B_{1+\delta}(0)\setminus B_{1}(0),
+#
+# where :math:`f\equiv 2` and
+#
+# .. math::
+#
+#    g(x)=c \left[ |x| \left((1+\delta-|x|)^2-\delta^2\right) + \frac{1}{3} \left((1+\delta-|x|)^3+\delta^3\right)\right].
+#
+# The exact solution is :math:`u(x)=C-x^2` where :math:`C` is an arbitrary constant.
+
+import numpy as np
+import matplotlib.pyplot as plt
+from PyNucleus import (nonlocalMeshFactory, dofmapFactory, kernelFactory,
+                       functionFactory, solverFactory, NEUMANN, NO_BOUNDARY)
+from PyNucleus_base.linear_operators import Dense_LinearOperator
+
+# %%
+# Set up kernel, load $f$, the analytic solution and the flux data :math:`g`.
+
+kernel = kernelFactory('constant', dim=1, horizon=0.4)
+load = functionFactory('constant', 2.)
+analyticSolution = functionFactory('Lambda', lambda x: -x[0]**2)
+
+def fluxFun(x):
+    horizon = kernel.horizonValue
+    dist = 1+horizon-abs(x[0])
+    assert dist >= 0
+    return 2*kernel.scalingValue * (abs(x[0]) * (dist**2-horizon**2) + 1./3. * (dist**3+horizon**3))
+
+flux = functionFactory('Lambda', fluxFun)
+
+# %%
+# Construct a degree of freedom map for the entire mesh
+
+mesh, nI = nonlocalMeshFactory('interval', kernel=kernel, boundaryCondition=NEUMANN)
+for _ in range(3):
+    mesh = mesh.refine()
+
+dm = dofmapFactory('P1', mesh, NO_BOUNDARY)
+dm
+
+# %%
+# The second return value of the nonlocal mesh factory contains indicator functions:
+
+dm.interpolate(nI['domain']).plot(label='domain')
+dm.interpolate(nI['boundary']).plot(label='local boundary')
+dm.interpolate(nI['interaction']).plot(label='interaction')
+plt.legend()
+
+# %%
+# Assemble the RHS vector by using the load on the domain :math:`\Omega` and the flux function on the interaction domain :math:`\Omega_\mathcal{I}`
+
+A = dm.assembleNonlocal(kernel)
+b = dm.assembleRHS(load*nI['domain'] + flux*(nI['interaction']+nI['boundary']))
+
+# %%
+# Solve the linear system. Since it is singular (shifts by a constant form the nullspace) we augment the system and then project out the zero mode.
+
+u = dm.zeros()
+
+# %%
+# Augment the system
+correction = Dense_LinearOperator(np.ones(A.shape))
+solver = solverFactory('lu', A=A+correction, setup=True)
+solver(b, u)
+
+# %%
+# project out the nullspace component
+const = dm.ones()
+u = u - (u.inner(const)/const.inner(const))*const
+
+# %%
+# Interpolate the exact solution and project out zero mode as well.
+
+uex = dm.interpolate(analyticSolution)
+uex = uex - (uex.inner(const)/const.inner(const))*const
+
+# %%
+u.plot(label='numerical', marker='x')
+uex.plot(label='analytic')
+plt.legend()
diff --git a/examples/example_nonlocal.py b/examples/example_nonlocal.py
new file mode 100755
index 00000000..48616867
--- /dev/null
+++ b/examples/example_nonlocal.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+"""
+Nonlocal problems
+=================
+"""
+
+# %%
+# I this second example, we will assemble and solve several nonlocal equations.
+# The full code of this example can be found in `examples/example_nonlocal.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example_nonlocal.py>`_ in the PyNucleus repository.
+#
+# PyNucleus can assemble operators of the form
+#
+# .. math::
+#
+#    \mathcal{L}u(x) = \operatorname{p.v.} \int_{\mathbb{R}^d} [u(y)-u(x)] \gamma(x, y) dy.
+#
+# The kernel :math:`\gamma` is of the form
+#
+# .. math::
+#
+#    \gamma(x,y) = \frac{\phi(x, y)}{|x-y|^{\beta(x,y)}} \chi_{\mathcal{N}(x)}(y).
+#
+# Here, :math:`\phi` is a positive function, and :math:`\chi` is the indicator function.
+# The interaction neighborhood :math:`\mathcal{N}(x) \subset \mathbb{R}^d` is often given as parametrized by the so-called horizon :math:`0<\delta\le\infty`:
+#
+# .. math::
+#
+#    \mathcal{N}(x) = \{y \in \mathbb{R}^d \text{ such that } ||x-y||_p<\delta\}.
+#
+# where :math:`p \in \{1,2,\infty\}`. Other types of neighborhoods are also possible.
+#
+# The singularity :math:`\beta` of the kernel crucially influences properties of the kernel:
+#
+# - fractional type: :math:`\beta(x,y)=d+2s(x,y)`, where :math:`d` is the spatial dimension and :math:`s(x,y)` is the fractional order.
+# - constant type :math:`\beta(x,y)=0`
+# - peridynamic type :math:`\beta(x,y)=1`
+#
+#
+# A fractional kernel
+# -------------------
+#
+# We start off by creating a fractional kernel with infinite horizon and constant fractional order :math:`s=0.75`.
+
+import matplotlib.pyplot as plt
+from time import time
+from PyNucleus import kernelFactory
+kernelFactory.print()
+
+# %%
+
+from numpy import inf
+
+kernelFracInf = kernelFactory('fractional', dim=2, s=0.75, horizon=inf)
+print(kernelFracInf)
+
+# %%
+
+plt.figure().gca().set_title('Fractional kernel')
+kernelFracInf.plot()
+
+# %%
+# By default, kernels are normalized so that their local limits recover the classical Laplacian. This can be disabled by passing ``normalized=False``.
+#
+#
+# Nonlocal assembly
+# -----------------
+#
+# We will be solving the problem
+#
+#  .. math::
+#
+#     -\mathcal{L} u &= f && \text{ in } \Omega=B(0,1)\subset\mathbb{R}^2, \\
+#     u &= 0 && \text{ in } \mathbb{R}^2 \setminus \Omega,
+#
+# for constant forcing function :math:`f=1`.
+#
+# First, we generate a mesh.
+# Instead of the ``meshFactory`` used in the previous example, we now use the ``nonlocalMeshFactory``.
+# The advantage is that this factory can generate meshes with appropriate interaction domains.
+# For this particular example, the factory will not generate any interaction domain, since the homogeneous Dirichlet condition on :math:`\mathbb{R}^2\setminus\Omega` can be enforced via a boundary integral.
+
+from PyNucleus import nonlocalMeshFactory, HOMOGENEOUS_DIRICHLET
+
+# Get a mesh that is appropriate for the problem, i.e. with the required interaction domain.
+meshFracInf, _ = nonlocalMeshFactory('disc', kernel=kernelFracInf, boundaryCondition=HOMOGENEOUS_DIRICHLET, hTarget=0.15)
+
+print(meshFracInf)
+
+# %%
+plt.figure().gca().set_title('Mesh for fractional kernel')
+meshFracInf.plot()
+
+# %%
+# Next, we obtain a piecewise linear, continuous DoFMap on the mesh, assemble the RHS and interpolate the known analytic solution.
+# We assemble the nonlocal operator by passing the kernel to the `assembleNonlocal` method of the DoFMap object.
+# The optional parameter `matrixFormat` determines what kind of linear operator is assembled.
+# We time the assembly of the operator as a dense matrix, and as a hierarchical matrix, and inspect the resulting objects.
+
+from PyNucleus import dofmapFactory, functionFactory
+
+dmFracInf = dofmapFactory('P1', meshFracInf)
+
+rhs = functionFactory('constant', 1.)
+exact_solution = functionFactory('solFractional', dim=2, s=0.75)
+
+b = dmFracInf.assembleRHS(rhs)
+u_exact = dmFracInf.interpolate(exact_solution)
+u = dmFracInf.zeros()
+
+# %%
+# We assemble the operator in dense format.
+start = time()
+A_fracInf = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='dense')
+
+print('Dense assembly took {}s'.format(time()-start))
+print(A_fracInf)
+print("Memory size: {} KB".format(A_fracInf.getMemorySize() >> 10))
+
+# %%
+# Then we assemble the operator in hierarchical matrix format.
+
+start = time()
+A_fracInf_h2 = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='h2')
+
+print('Hierarchical assembly took {}s'.format(time()-start))
+print(A_fracInf_h2)
+print("Memory size: {} KB".format(A_fracInf_h2.getMemorySize() >> 10))
+
+# %%
+# It can be observed that both assembly routines take roughly the same amount of time.
+# The reason for this is that the operator itself has quite small dimensions.
+# For larger number of unknowns, we expect the hierarchical assembly scale like :math:`\mathcal{O}(N \log^{2d} N)`, whereas the dense assembly will scale at best like :math:`\mathcal{O}(N^2)`.
+# The memory usage of the hierarchical matrix is slightly better and scales similar to the complexity of assembly.
+#
+# Similar to the local PDE example, we can then solve the resulting linear equation and compute the error in energy norm.
+
+from PyNucleus import solverFactory
+from numpy import sqrt
+
+solver = solverFactory('lu', A=A_fracInf, setup=True)
+solver(b, u)
+
+Hs_err = sqrt(abs(b.inner(u-u_exact)))
+
+print('Hs error: {}'.format(Hs_err))
+
+# %%
+
+plt.figure().gca().set_title('Numerical solution, fractional kernel')
+u.plot()
+
+
+# %%
+# A finite horizon case with Dirichlet volume condition
+# -----------------------------------------------------
+#
+# Next, we solve a nonlocal Poisson problem involving a constant kernel with finite horizon.
+# We will choose :math:`\gamma(x,y) \sim \chi_{\mathcal{N}(x)}(y)` for a neighborhood defined by the 2-norm and horizon :math:`\delta=0.2`, and solve
+#
+# .. math::
+#
+#    -\mathcal{L} u &= f && \text{ in } \Omega=[0,1]^2, \\
+#    u &= -(x_1^2 + x_2^2)/4 && \text{ in } \mathcal{I},
+#
+# where the interaction domain is given by
+#
+# .. math::
+#
+#    \mathcal{I} := \{y\in\mathbb{R}^2\setminus\Omega \text{ such that } \exists x\in\Omega: \gamma(x,y)\neq 0\}.
+#
+
+kernelConst = kernelFactory('constant', dim=2, horizon=0.2)
+print(kernelConst)
+
+# %%
+
+plt.figure().gca().set_title('Constant kernel')
+kernelConst.plot()
+
+# %%
+
+from PyNucleus import DIRICHLET
+
+meshConst, nIConst = nonlocalMeshFactory('square', kernel=kernelConst, boundaryCondition=DIRICHLET, hTarget=0.18)
+
+print(meshConst)
+print(nIConst)
+
+# %%
+# The dictionary ``nIConst`` contains several indicator functions that we will use to distinguish between interior and interaction domain.
+
+plt.figure().gca().set_title('Mesh for constant kernel')
+meshConst.plot()
+
+# %%
+# We can observe that the ``nonlocalMeshFactory`` generated a mesh that includes the interaction domain.
+#
+# We set up 2 DoFMaps this time, one for the unknown interior degrees of freedom and one for the Dirichlet volume condition.
+
+dmConst = dofmapFactory('P1', meshConst, nIConst['domain'])
+dmConstInteraction = dmConst.getComplementDoFMap()
+
+print(dmConst)
+print(dmConstInteraction)
+
+# %%
+# We can see that they are complementary to each other.
+# Next, we assemble two matrices.
+
+A_const = dmConst.assembleNonlocal(kernelConst, matrixFormat='sparse')
+B_const = dmConst.assembleNonlocal(kernelConst, dm2=dmConstInteraction, matrixFormat='sparse')
+
+g = functionFactory('Lambda', lambda x: -(x[0]**2 + x[1]**2)/4)
+g_interp = dmConstInteraction.interpolate(g)
+
+b = dmConst.assembleRHS(rhs)-(B_const*g_interp)
+u = dmConst.zeros()
+
+solver = solverFactory('cg', A=A_const, setup=True)
+solver.maxIter = 1000
+solver.tolerance = 1e-8
+
+solver(b, u)
+
+u_global = u.augmentWithBoundaryData(g_interp)
+
+plt.figure().gca().set_title('Numerical solution, constant kernel')
+u_global.plot()
+
+# %%
+
+plt.figure().gca().set_title('Analytic solution, constant kernel')
+u_global.dm.interpolate(g).plot()
+
+# %%
+
+print(A_const)
+
+# %%
+
+plt.show()
+# sphinx_gallery_thumbnail_number = 3
diff --git a/examples/example3.py b/examples/example_operator_interpolation.py
old mode 100644
new mode 100755
similarity index 53%
rename from examples/example3.py
rename to examples/example_operator_interpolation.py
index 58d706e2..3a8a1b3c
--- a/examples/example3.py
+++ b/examples/example_operator_interpolation.py
@@ -6,15 +6,38 @@
 # If you want to use this code, please refer to the README.rst and LICENSE files. #
 ###################################################################################
 
-######################################################################
-# preamble
+"""
+Operator interpolation
+======================
+"""
+
+# %%
+# This example demostrates the construction of a family of fractional
+# Laplacians parametrized by the fractional order using operator
+# interpolation. This can reduce the cost compared to assembling a new
+# matrix for each value.
+#
+# The fractional Laplacian
+#
+# .. math::
+#
+#   (-\Delta)^{s} \text{ for } s \in [s_{\min}, s_{\max}] \subset (0, 1)
+#
+# is approximated by
+#
+# .. math::
+#
+#    (-\Delta)^{s} \approx \sum_{m=0}^{M} \Theta_{k,m}(s) (-\Delta)^{s_{k,m}} \text{ for } s \in \mathcal{S}_{k}
+#
+# for a sequence of intervals :math:`\mathcal{S}_{k}` that cover :math:`[s_{\min}, s_{\max}]` and scalar coefficients :math:`\Theta_{k,m}(s)`.
+# The number of intervals and interpolation nodes is picked so that the interpolation error is dominated by the discretization error.
+#
+# The following example can be found at `examples/example_operator_interpolation.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example_operator_interpolation.py>`_ in the PyNucleus repository.
+
 import logging
 from PyNucleus_base.utilsFem import TimerManager
-from PyNucleus import meshFactory, dofmapFactory, kernelFactory, functionFactory, solverFactory
-from PyNucleus_nl.operatorInterpolation import admissibleSet
-import h5py
-from PyNucleus_base.linear_operators import LinearOperator
-from PyNucleus_fem.DoFMaps import DoFMap
+import numpy as np
+import matplotlib.pyplot as plt
 
 fmt = '{message}'
 logging.basicConfig(level=logging.INFO,
@@ -24,6 +47,13 @@
 logger = logging.getLogger('__main__')
 timer = TimerManager(logger)
 
+# %%
+# We set up a mesh, a dofmap and a fractional kernel.
+# Instead of specifying a single value for the fractional order, we allow a range of values :math:`[s_{\min}, s_{\max}]=[0.05, 0.95]`.
+
+from PyNucleus import meshFactory, dofmapFactory, kernelFactory, functionFactory, solverFactory
+from PyNucleus_nl.operatorInterpolation import admissibleSet
+
 # Set up a mesh and a dofmap on it.
 mesh = meshFactory('interval', hTarget=2e-3, a=-1, b=1)
 dm = dofmapFactory('P1', mesh)
@@ -37,17 +67,20 @@
 s = admissibleSet([0.05, 0.95])
 kernel = kernelFactory('fractional', s=s, dim=mesh.dim)
 
-######################################################################
+# %%
+# Next, we call the assembly of a nonlocal operator as before.
 # The operator is set up to be constructed on-demand.
 # We partition the interval S into several sub-interval and construct a Chebyshev interpolant on each sub-interval.
 # Therefore this operation is fast.
 with timer('operator creation'):
     A = dm.assembleNonlocal(kernel, matrixFormat='H2')
 
-######################################################################
-# Set s = 0.75.
+# %%
+# Next, we choose the value of the fractional order. This needs to be within the range that we specified earlier.
+# We set s = 0.75.
 A.set(0.75)
 
+# %%
 # Let's solve a system.
 # This triggers the assembly of the operators for the matrices at the interpolation nodes of the interval that contains s.
 # The required matrices are constructed on-demand and then stay in memory.
@@ -57,9 +90,11 @@
     numIter = solver(b, u)
 logger.info('Solved problem for s={} in {} iterations (residual norm {})'.format(A.get(), numIter, solver.residuals[-1]))
 
-######################################################################
-# Let's solve a second sytem for a closeby value of s.
-# This should be faster since we no longer need to assemble any matrices.
+# %%
+# This solve is relatively slow, as it involves the assembly of the nonlocal operators that are needed for the interpolation.
+# We select a different value for the fractional order that is close to the first.
+# Solving a linear system with this value is faster as we have already assembled the operator needed for the interpolation.
+
 with timer('solve 2 (fast)'):
     A.set(0.76)
     solver = solverFactory('cg-jacobi', A=A, setup=True)
@@ -67,31 +102,44 @@
     numIter = solver(b, u)
 logger.info('Solved problem for s={} in {} iterations (residual norm {})'.format(A.get(), numIter, solver.residuals[-1]))
 
-######################################################################
-# We can save the operator and the DoFMap to a file.
-# This will trigger the assembly of all matrices.
+# %%
+# Next, we save the operator to file.
+# This first triggers the assembly of all operators nescessary to represent every value in :math:`s\in[0.05,0.95]`.
+
+import h5py
+
 with timer('save operator'):
     h5_file = h5py.File('test.hdf5', 'w')
     A.HDF5write(h5_file.create_group('A'))
     dm.HDF5write(h5_file.create_group('dm'))
     h5_file.close()
 
-######################################################################
-# Now we can read them back in.
+# %%
+# Next, we read the operator back in.
+
+from PyNucleus_base.linear_operators import LinearOperator
+from PyNucleus_fem.DoFMaps import DoFMap
+
 with timer('load operator'):
     h5_file = h5py.File('test.hdf5', 'r')
     A_2 = LinearOperator.HDF5read(h5_file['A'])
     dm_2 = DoFMap.HDF5read(h5_file['dm'])
     h5_file.close()
 
-# Set up and solve a system with the operator we loaded.
+# %%
+# Finally, we set up and solve a series of linear systems with the operator we loaded.
 f_2 = functionFactory('constant', 2.)
 b_2 = dm_2.assembleRHS(f_2)
-u_2 = dm_2.zeros()
-with timer('solve 3 (fast)'):
-    A_2.set(0.8)
-    solver = solverFactory('cg-jacobi', A=A_2, setup=True)
-    solver.maxIter = 1000
-    numIter = solver(b_2, u_2)
-logger.info('Solved problem for s={} in {} iterations (residual norm {})'.format(A_2.get(), numIter, solver.residuals[-1]))
+for sVal in np.linspace(0.1, 0.9, 9):
+    with timer('solve 3 (fast)'):
+        u_2 = dm_2.zeros()
+        A_2.set(sVal)
+        solver = solverFactory('cg-jacobi', A=A_2, setup=True)
+        solver.maxIter = 1000
+        numIter = solver(b_2, u_2)
+    logger.info('Solved problem for s={:.1} in {} iterations (residual norm {})'.format(A_2.get(), numIter, solver.residuals[-1]))
+    u_2.plot(label='s={:.1}'.format(sVal))
+plt.legend()
 
+# %%
+plt.show()
diff --git a/examples/example_pde.py b/examples/example_pde.py
new file mode 100755
index 00000000..c1870af5
--- /dev/null
+++ b/examples/example_pde.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+"""
+A simple PDE problem
+================================
+"""
+
+# %%
+# In this first example, we will construct a finite element discretization of a classical PDE problem and solve it.
+# The full code of this example can be found in `examples/example_pde.py <https://github.com/sandialabs/PyNucleus/blob/master/examples/example_pde.py>`_ in the PyNucleus repository.
+#
+# Factories
+# ---------
+#
+# The creation of different groups of objects, such as finite element spaces or meshes, use factories.
+# The available classes that a factory provides can be displayed by calling the ``print()`` method of the factory.
+# An object is built by passing the name of the desired class and additional parameters to the factory.
+# If this sounds vague now, don't worry, the examples below will make it clear.
+#
+# Meshes
+# ------
+#
+# The first object we need to create is a mesh to support the finite element discretization.
+# The output of the code snippet is given below.
+
+import matplotlib.pyplot as plt
+from PyNucleus import meshFactory
+meshFactory.print()
+
+# %%
+# We see what the different meshes that the ``meshFactory`` can construct and the default values for associated parameters.
+# We choose to construct a mesh for a square domain :math:`\Omega=[0, 1] \times [0, 1]` and refining it uniformly three times:
+
+mesh = meshFactory('square', ax=0., ay=0., bx=1., by=1.)
+for _ in range(3):
+    mesh = mesh.refine()
+print('Mesh:', mesh)
+
+# %%
+# We have created a 2d mesh with 289 vertices and 512 cells.
+# The mesh (as well as many other objects) can be plotted:
+
+plt.figure().gca().set_title('Mesh')
+mesh.plot()
+
+# %%
+# Many PyNucleus objects have a ``plot`` method, similar to the mesh that we just created.
+#
+# DoFMaps
+# -------
+#
+# In the next step, we create a finite element space on the mesh.
+# By default, we assume a Dirichlet condition on the entire boundary of the domain.
+# We build a piecewise linear finite element space.
+
+from PyNucleus import dofmapFactory
+dofmapFactory.print()
+
+# %%
+# We use a piecewise linear continuous finite element space.
+dm = dofmapFactory('P1c', mesh)
+print('DoFMap:', dm)
+
+# %%
+# By default all degrees of freedom on the boundary of the domain are considered to be "boundary dofs".
+# The ``dofmapFactory`` take an optional second argument that allows to pass an indicator function to control what is considered a boundary dof.
+#
+# Next, we plot the locations of the degrees of freedom on the mesh.
+plt.figure().gca().set_title('DoFMap')
+dm.plot()
+
+# %%
+# Functions and vectors
+# ---------------------
+#
+# We will be solving the problem
+#
+# .. math::
+#
+#    -\Delta u &= f & \text{ in } \Omega, \\
+#    u &= 0 & \text{ on } \partial \Omega,
+#
+# or, more precisely, its weak form
+#
+# .. math::
+#
+#    &\text{Find } u \in H^1(\Omega) \text{ such that}\\
+#    &\int_\Omega \nabla u \cdot \nabla v = \int_\Omega fv  \qquad\forall v \in H^1_0(\Omega).
+#
+# Functions are created using the ``functionFactory``.
+from PyNucleus import functionFactory
+functionFactory.print()
+
+# %%
+# We will consider two different forcing functions :math:`f`.
+# Pointwise evalutated functions such as :math:`f` can either be defined in Python, or in Cython.
+# A generic Python function can be used via the ``Lambda`` function class.
+rhs_1 = functionFactory('Lambda', lambda x: 2*x[0]*(1-x[0]) + 2*x[1]*(1-x[1]))
+exact_solution_1 = functionFactory('Lambda', lambda x: x[0]*(1-x[0])*x[1]*(1-x[1]))
+
+# %%
+# The advantage of Cython functions is that their code is compiled, which speeds up evaluation significantly.
+# A couple of compiled functions are already available via the ``functionFactory``.
+rhs_2 = functionFactory('rhsFunSin2D')
+exact_solution_2 = functionFactory('solSin2D')
+
+# %%
+# We assemble the right-hand side
+#
+# .. math::
+#
+#    \int_\Omega f v
+#
+# of the linear system by calling the ``assembleRHS`` method of the DoFMap object, and interpolate the exact solutions into the finite element space.
+
+b1 = dm.assembleRHS(rhs_1)
+u_interp_1 = dm.interpolate(exact_solution_1)
+
+print('Linear system RHS:', b1)
+print('Interpolated solution:', u_interp_1)
+
+b2 = dm.assembleRHS(rhs_2)
+u_interp_2 = dm.interpolate(exact_solution_2)
+
+# %%
+# We plot the interpolated exact solution.
+plt.figure().gca().set_title('Interpolated solution')
+u_interp_1.plot()
+
+# %%
+# Matrices
+# --------
+#
+# We assemble the stiffness matrix associated with the Laplacian
+#
+# .. math::
+#
+#    \int_\Omega \nabla u \cdot \nabla v
+
+laplacian = dm.assembleStiffness()
+
+print('Linear system matrix:', laplacian)
+
+# %%
+# Solvers
+# -------
+#
+# Now that we have assembled our linear system, we want to solve it.
+
+from PyNucleus import solverFactory
+solverFactory.print()
+
+# %%
+# We choose to set up an LU direct solver.
+
+solver_direct = solverFactory('lu', A=laplacian)
+solver_direct.setup()
+print('Direct solver:', solver_direct)
+
+# %%
+# We also set up an iterative solver. Note that we need to specify some additional parameters.
+
+solver_krylov = solverFactory('cg', A=laplacian)
+solver_krylov.setup()
+solver_krylov.maxIter = 100
+solver_krylov.tolerance = 1e-8
+print('Krylov solver:', solver_krylov)
+
+# %%
+# We allocate a zero vector for the solution of the linear system and solve the equation using the first right-hand side.
+
+u1 = dm.zeros()
+solver_direct(b1, u1)
+
+# %%
+# We use the interative solver for the second right-hand side.
+
+u2 = dm.zeros()
+numIter = solver_krylov(b2, u2)
+
+print('Number of iterations:', numIter)
+
+# %%
+# We plot the difference between one of the numerical solutions we just computed and the interpolant of the known analytic solution.
+plt.figure().gca().set_title('Error')
+(u_interp_1-u1).plot(flat=True)
+
+
+# %%
+# Norms and inner products
+# ------------------------
+#
+# Finally, we want to check that we actually solved the system by computing :math:`\ell^2` residual errors.
+
+print('Residual error 1st solve: ', (b1-laplacian*u1).norm())
+print('Residual error 2nd solve: ', (b2-laplacian*u2).norm())
+
+# %%
+# We observe that the first solution is accurate up to machine epsilon, and that the second solution satisfies the tolerance that we specified earlier.
+#
+# We also compute errors in :math:`H^1_0` and :math:`L^2` norms.
+# In order to compute the :math:`L^2` error we need to assemble the mass matrix
+#
+# .. math::
+#
+#    \int_\Omega u v.
+#
+# For the :math:`H^1_0` error we can reuse the stiffness matrix that we assembled earlier.
+
+mass = dm.assembleMass()
+
+from numpy import sqrt
+
+H10_error_1 = sqrt(b1.inner(u_interp_1-u1))
+L2_error_1 = sqrt((u_interp_1-u1).inner(mass*(u_interp_1-u1)))
+H10_error_2 = sqrt(b2.inner(u_interp_2-u2))
+L2_error_2 = sqrt((u_interp_2-u2).inner(mass*(u_interp_2-u2)))
+
+print('1st problem - H10:', H10_error_1, 'L2:', L2_error_1)
+print('2nd problem - H10:', H10_error_2, 'L2:', L2_error_2)
+
+# %%
+# This concludes our first example.
+# Next, we turn to nonlocal equations.
+
+plt.show()
diff --git a/metisCy/PyNucleus_metisCy/__init__.py b/metisCy/PyNucleus_metisCy/__init__.py
index 0e8232af..06f8701a 100644
--- a/metisCy/PyNucleus_metisCy/__init__.py
+++ b/metisCy/PyNucleus_metisCy/__init__.py
@@ -9,6 +9,7 @@
 A Cython interface to METIS and ParMETIS.
 
 http://glaros.dtc.umn.edu/gkhome/metis/metis/overview
+
 http://glaros.dtc.umn.edu/gkhome/metis/parmetis/overview
 
 """
diff --git a/nl/PyNucleus_nl/clusterMethodCy.pyx b/nl/PyNucleus_nl/clusterMethodCy.pyx
index 02bb77ce..7da014f6 100644
--- a/nl/PyNucleus_nl/clusterMethodCy.pyx
+++ b/nl/PyNucleus_nl/clusterMethodCy.pyx
@@ -1519,14 +1519,16 @@ cdef class tree_node:
 
     def HDF5writeNew(self, node):
         cdef:
-            INDEX_t c = -1
+            INDEX_t c = -1, valueSize = -1
             tree_node n
             indexSetIterator it = arrayIndexSetIterator()
             INDEX_t dim = self.box.shape[0], i, j
         numNodes = self.get_max_id()+1
+        interpolationOrders = uninitialized((numNodes), dtype=INDEX)
         indptrChildren = uninitialized((numNodes+1), dtype=INDEX)
         boxes = uninitialized((numNodes, dim, 2), dtype=REAL)
         for n in self.get_tree_nodes():
+            interpolationOrders[n.id] = n.interpolation_order
             indptrChildren[n.id+1] = len(n.children)
             for i in range(dim):
                 for j in range(2):
@@ -1547,18 +1549,20 @@ cdef class tree_node:
         node.create_dataset('boxes', data=boxes, compression=COMPRESSION)
         del children
 
-        M = self.children[0].transferOperator.shape[0]
-        transferOperators = uninitialized((numNodes, M, M), dtype=REAL)
+        node.create_dataset('interpolationOrders', data=interpolationOrders, compression=COMPRESSION)
+
+        trOps = node.create_group('transferOperators')
         for n in self.get_tree_nodes():
             try:
                 if n.transferOperator.shape[0] > 0:
-                    transferOperators[n.id, :, :] = n.transferOperator
-                else:
-                    transferOperators[n.id, :, :] = 0.0
+                    trOps.create_dataset(str(n.id), data=np.array(n.transferOperator), compression=COMPRESSION)
             except:
                 pass
-        node.create_dataset('transferOperators', data=transferOperators,
-                            compression=COMPRESSION)
+            if n.coefficientsDown is not None:
+                valueSize = 1
+            elif n.coefficientsDownVec is not None:
+                valueSize = n.coefficientsDownVec.shape[1]
+        node.attrs['valueSize'] = valueSize
 
         indptrDofs = uninitialized((numNodes+1), dtype=INDEX)
         for n in self.get_tree_nodes():
@@ -1606,30 +1610,11 @@ cdef class tree_node:
         cells.HDF5write(node['cells'])
         del cells
 
-        noCoefficients = next(self.leaves()).value.shape[0]
-        values = uninitialized((maxDof+1, noCoefficients, M), dtype=REAL)
-        mapping = {}
-        k = 0
+        values = node.create_group('values')
         for n in self.leaves():
-            mapping[n.id] = k, k+n.value.shape[1]
-            k += n.value.shape[1]
-            values[mapping[n.id][0]:mapping[n.id][1], :, :] = np.swapaxes(n.value, 0, 1)
-        node.create_group('mapping')
-        keys = uninitialized((len(mapping)), dtype=INDEX)
-        vals = uninitialized((len(mapping), 2), dtype=INDEX)
-        k = 0
-        for i in mapping:
-            keys[k] = i
-            vals[k][0] = mapping[i][0]
-            vals[k][1] = mapping[i][1]
-            k += 1
-        node['mapping'].create_dataset('keys', data=keys, compression=COMPRESSION)
-        node['mapping'].create_dataset('vals', data=vals, compression=COMPRESSION)
-        node.create_dataset('values', data=values,
-                            compression=COMPRESSION)
-
+            values.create_dataset(str(n.id), data=np.array(n.value), compression=COMPRESSION)
         node.attrs['dim'] = self.dim
-        node.attrs['M'] = M
+        writeRefinementParams(node.create_group('refinementParams'), self.refParams)
 
     @staticmethod
     def HDF5readNew(node):
@@ -1637,36 +1622,50 @@ cdef class tree_node:
             list nodes
             LinearOperator children
             REAL_t[:, :, ::1] boxes
-            INDEX_t M
             tree_node n
+            INDEX_t M, dim
             INDEX_t k, start, end, my_id
             dict mapping
+            INDEX_t[::1] interpolationOrders
             INDEX_t[::1] keys
             INDEX_t[:, ::1] vals
             indexSet cluster_dofs
             LinearOperator dofs
+            refinementParams refParams
+            INDEX_t valueSize
         children = LinearOperator.HDF5read(node['children'])
         nodes = [0]*children.shape[0]
-        M = node.attrs['M']
 
-        transferOperators = np.array(node['transferOperators'], dtype=REAL)
+        valueSize = node.attrs['valueSize']
+        refParams = readRefinementParams(node['refinementParams'])
+        interpolationOrders = np.array(node['interpolationOrders'], dtype=INDEX)
+        trOps = node['transferOperators']
         boxes = np.array(node['boxes'], dtype=REAL)
-        tree = readNode(nodes, 0, None, boxes, children, M, transferOperators)
+        dim = node.attrs['dim']
+        tree = readNode(nodes, 0, None, children)
         dofs = LinearOperator.HDF5read(node['dofs'])
         cells = LinearOperator.HDF5read(node['cells'])
-        keys = np.array(node['mapping']['keys'], dtype=INDEX)
-        vals = np.array(node['mapping']['vals'], dtype=INDEX)
-        mapping = {}
-        for k in range(keys.shape[0]):
-            mapping[keys[k]] = k
-        values = np.array(node['values'], dtype=REAL)
+        for n in tree.get_tree_nodes():
+            n.refParams = refParams
+            n.box = uninitialized((boxes.shape[1],
+                                   boxes.shape[2]),
+                                  dtype=REAL)
+            for i in range(boxes.shape[1]):
+                for j in range(boxes.shape[2]):
+                    n.box[i, j] = boxes[n.id, i, j]
+            n.interpolation_order = interpolationOrders[n.id]
+            M = n.interpolation_order**dim
+            n.coefficientsUp = uninitialized((M), dtype=REAL)
+            if valueSize == 1:
+                n.coefficientsDown = uninitialized((M), dtype=REAL)
+            else:
+                n.coefficientsDownVec = uninitialized((M, valueSize), dtype=REAL)
+            if str(n.id) in trOps:
+                n.transferOperator = np.array(trOps[str(n.id)], dtype=REAL)
         for n in tree.leaves():
             n._dofs = arrayIndexSet(dofs.indices[dofs.indptr[n.id]:dofs.indptr[n.id+1]], sorted=True)
             n._cells = arrayIndexSet(cells.indices[cells.indptr[n.id]:cells.indptr[n.id+1]], sorted=True)
-            my_id = mapping[n.id]
-            start = vals[my_id, 0]
-            end = vals[my_id, 1]
-            n.value = np.ascontiguousarray(np.swapaxes(np.array(values[start:end, :, :], dtype=REAL), 0, 1))
+            n.value = np.array(node['values'][str(n.id)])
         # setDoFsFromChildren(tree)
         return tree, nodes
 
@@ -1868,32 +1867,56 @@ cdef class tree_node:
         return s
 
 
-cdef tree_node readNode(list nodes, INDEX_t myId, parent, REAL_t[:, :, ::1] boxes, LinearOperator children, INDEX_t M, REAL_t[:, :, ::1] transferOperators):
+cdef tree_node readNode(list nodes, INDEX_t myId, parent, LinearOperator children):
     cdef:
         indexSet bA = arrayIndexSet()
-        tree_node n = tree_node(parent, bA, boxes)
+        REAL_t[:, :, ::1] fake_boxes = np.empty((0, 0, 0), dtype=REAL)
+        REAL_t[::1] fake_hVector = np.empty((0), dtype=REAL)
+        refinementParams refParams
+        tree_node n = tree_node(parent, bA, fake_boxes, fake_hVector, refParams)
         INDEX_t i, j
     n.id = myId
     nodes[myId] = n
-    n.transferOperator = uninitialized((transferOperators.shape[1],
-                                        transferOperators.shape[2]),
-                                       dtype=REAL)
-    n.box = uninitialized((boxes.shape[1],
-                           boxes.shape[2]),
-                          dtype=REAL)
-    for i in range(transferOperators.shape[1]):
-        for j in range(transferOperators.shape[2]):
-            n.transferOperator[i, j] = transferOperators[myId, i, j]
-    for i in range(boxes.shape[1]):
-        for j in range(boxes.shape[2]):
-            n.box[i, j] = boxes[myId, i, j]
-    n.coefficientsUp = uninitialized((M), dtype=REAL)
-    n.coefficientsDown = uninitialized((M), dtype=REAL)
     for i in range(children.indptr[myId], children.indptr[myId+1]):
-        n.children.append(readNode(nodes, children.indices[i], n, boxes, children, M, transferOperators))
+        n.children.append(readNode(nodes, children.indices[i], n, children))
     return n
 
 
+cdef void writeRefinementParams(node, refinementParams refParams):
+    node.attrs['maxLevels'] = refParams.maxLevels
+    node.attrs['maxLevelsMixed'] = refParams.maxLevelsMixed
+    node.attrs['minSize'] = refParams.minSize
+    node.attrs['minMixedSize'] = refParams.minMixedSize
+    node.attrs['refType'] = refParams.refType
+    node.attrs['splitEveryDim'] = refParams.splitEveryDim
+    node.attrs['eta'] = refParams.eta
+    node.attrs['farFieldInteractionSize'] = refParams.farFieldInteractionSize
+    node.attrs['interpolation_order'] = refParams.interpolation_order
+    node.attrs['attemptRefinement'] = refParams.attemptRefinement
+    node.attrs['targetOrder'] = refParams.targetOrder
+    node.attrs['meshDiam'] = refParams.meshDiam
+    node.attrs['maxSingularity'] = refParams.maxSingularity
+
+
+cdef readRefinementParams(node):
+    cdef:
+        refinementParams refParams
+    refParams.maxLevels = node.attrs['maxLevels']
+    refParams.maxLevelsMixed = node.attrs['maxLevelsMixed']
+    refParams.minSize = node.attrs['minSize']
+    refParams.minMixedSize = node.attrs['minMixedSize']
+    refParams.refType = node.attrs['refType']
+    refParams.splitEveryDim = node.attrs['splitEveryDim']
+    refParams.eta = node.attrs['eta']
+    refParams.farFieldInteractionSize = node.attrs['farFieldInteractionSize']
+    refParams.interpolation_order = node.attrs['interpolation_order']
+    refParams.attemptRefinement = node.attrs['attemptRefinement']
+    refParams.targetOrder = node.attrs['targetOrder']
+    refParams.meshDiam = node.attrs['meshDiam']
+    refParams.maxSingularity = node.attrs['maxSingularity']
+    return refParams
+
+
 cdef indexSet setDoFsFromChildren(tree_node n):
     if n.isLeaf:
         return n.dofs
diff --git a/nl/PyNucleus_nl/panelTypes.pxi b/nl/PyNucleus_nl/panelTypes.pxi
index d4c85b23..a9ec5fd5 100644
--- a/nl/PyNucleus_nl/panelTypes.pxi
+++ b/nl/PyNucleus_nl/panelTypes.pxi
@@ -5,11 +5,12 @@
 # If you want to use this code, please refer to the README.rst and LICENSE files. #
 ###################################################################################
 
-DEF DISTANT = 0
-DEF COMMON_VERTEX = -1
-DEF COMMON_EDGE = -2
-DEF COMMON_FACE = -3
-DEF COMMON_VOLUME = -4
-DEF SEPARATED = -5
-DEF IGNORED = -6
-DEF ON_HORIZON = -7
+cdef enum elementPositionType:
+    DISTANT = 0
+    COMMON_VERTEX = -1
+    COMMON_EDGE = -2
+    COMMON_FACE = -3
+    COMMON_VOLUME = -4
+    SEPARATED = -5
+    IGNORED = -6
+    ON_HORIZON = -7
diff --git a/setup.py b/setup.py
index 9f3d213b..cd825305 100644
--- a/setup.py
+++ b/setup.py
@@ -55,4 +55,7 @@
       license_files=['LICENSE'],
       python_requires='>=3.10',
       install_requires=requirements,
+      extras_require={'testing': ['pytest', 'pytest-html', 'pytest-xdist'],
+                      'docs': ['Sphinx', 'sphinxcontrib-programoutput', 'sphinx-gallery', 'sphinx-rtd-theme'],
+                      'dev': ['flake8', 'flake8-junit-report', 'cython-lint']},
       zip_safe=False)
diff --git a/tests/cache_runNonlocal.py--domainsquare--kernelTypeconstant--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.) b/tests/cache_runNonlocal.py--domainsquare--kernelTypeconstant--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
new file mode 100644
index 00000000..7096d6f2
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domainsquare--kernelTypeconstant--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
@@ -0,0 +1,10 @@
+Timers: {}
+errors:
+  L2 error interpolated: 0.001886040034810939
+  Linf error interpolated: 0.0038417175129086267
+  relative interpolated L2 error: 0.001052394803584935
+  relative interpolated Linf error: 0.0018954333673722038
+meshes: {}
+results: {}
+sysInfo: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domainsquare--kernelTypefractional--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.) b/tests/cache_runNonlocal.py--domainsquare--kernelTypefractional--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
new file mode 100644
index 00000000..c607af8f
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domainsquare--kernelTypefractional--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
@@ -0,0 +1,10 @@
+Timers: {}
+errors:
+  L2 error interpolated: 0.007487819634085953
+  Linf error interpolated: 0.006736897902834271
+  relative interpolated L2 error: 0.0041781416765541935
+  relative interpolated Linf error: 0.0033238625783143607
+meshes: {}
+results: {}
+sysInfo: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domainsquare--kernelTypeinverseDistance--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.) b/tests/cache_runNonlocal.py--domainsquare--kernelTypeinverseDistance--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
new file mode 100644
index 00000000..1e448f39
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domainsquare--kernelTypeinverseDistance--problemquadratic(1.0,0.5,0.3)--solvercg-mg--matrixFormatH2--interactionellipse(0.5,1.0,0.)
@@ -0,0 +1,10 @@
+Timers: {}
+errors:
+  L2 error interpolated: 0.005142478951544218
+  Linf error interpolated: 0.004238822312545576
+  relative interpolated L2 error: 0.002869460894923444
+  relative interpolated Linf error: 0.0020913576343299938
+meshes: {}
+results: {}
+sysInfo: {}
+vectors: {}
diff --git a/tests/cache_testDistOp.py--domaininterval--sconst(0.25)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4 b/tests/cache_testDistOp.py--domaininterval--sconst(0.25)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
new file mode 100644
index 00000000..3117dac9
--- /dev/null
+++ b/tests/cache_testDistOp.py--domaininterval--sconst(0.25)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
@@ -0,0 +1,8 @@
+Timers: {}
+TimersH2: {}
+info: {}
+matvec errors:
+  '|(A_sparse - A_distributed_bcast) * x|': 6.19687293232095e-09
+  '|(A_sparse - A_distributed_halo) * x|': 6.196872932320949e-09
+stats: {}
+sysInfo: {}
diff --git a/tests/cache_testDistOp.py--domaininterval--sconst(0.75)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4 b/tests/cache_testDistOp.py--domaininterval--sconst(0.75)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
new file mode 100644
index 00000000..019bb0e1
--- /dev/null
+++ b/tests/cache_testDistOp.py--domaininterval--sconst(0.75)--horizon0.01--horizonToMeshSize100.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
@@ -0,0 +1,8 @@
+Timers: {}
+TimersH2: {}
+info: {}
+matvec errors:
+  '|(A_sparse - A_distributed_bcast) * x|': 4.192348333051432e-07
+  '|(A_sparse - A_distributed_halo) * x|': 4.1923483330514297e-07
+stats: {}
+sysInfo: {}
diff --git a/tests/cache_testDistOp.py--domainsquare--sconst(0.25)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4 b/tests/cache_testDistOp.py--domainsquare--sconst(0.25)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
new file mode 100644
index 00000000..cad3ce37
--- /dev/null
+++ b/tests/cache_testDistOp.py--domainsquare--sconst(0.25)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
@@ -0,0 +1,8 @@
+Timers: {}
+TimersH2: {}
+info: {}
+matvec errors:
+  '|(A_sparse - A_distributed_bcast) * x|': 3.7565878890201824e-05
+  '|(A_sparse - A_distributed_halo) * x|': 3.756587889020183e-05
+stats: {}
+sysInfo: {}
diff --git a/tests/cache_testDistOp.py--domainsquare--sconst(0.75)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4 b/tests/cache_testDistOp.py--domainsquare--sconst(0.75)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
new file mode 100644
index 00000000..2c5d7f74
--- /dev/null
+++ b/tests/cache_testDistOp.py--domainsquare--sconst(0.75)--horizon1.0--horizonToMeshSize20.0--buildSparse--buildH2Reduced--buildDistributedH2Bcast--buildDistributedH2--no-write4
@@ -0,0 +1,8 @@
+Timers: {}
+TimersH2: {}
+info: {}
+matvec errors:
+  '|(A_sparse - A_distributed_bcast) * x|': 3.317510977531871e-05
+  '|(A_sparse - A_distributed_halo) * x|': 3.317510977531871e-05
+stats: {}
+sysInfo: {}