diff --git a/.github/workflows/fv3_translate_tests.yaml b/.github/workflows/fv3_translate_tests.yaml
new file mode 100644
index 00000000..9d030d72
--- /dev/null
+++ b/.github/workflows/fv3_translate_tests.yaml
@@ -0,0 +1,10 @@
+name: "FV3 translate tests"
+on:
+  pull_request:
+
+jobs:
+  fv3_translate_tests:
+    uses: NOAA-GFDL/pyFV3/.github/workflows/translate.yaml@develop
+    with:
+      component_trigger:  true
+      component_name: NDSL
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 0cab2313..a450ac1b 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -1,7 +1,11 @@
 name: "Lint"
 on:
   pull_request:
-      types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
+
+# cancel running jobs if theres a newer push
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
 
 jobs:
   lint:
@@ -14,7 +18,7 @@ jobs:
         - name: Step Python
           uses: actions/setup-python@v4.6.0
           with:
-            python-version: '3.8.12'
+            python-version: '3.11.7'
         - name: Install OpenMPI for gt4py
           run: |
             sudo apt-get install libopenmpi-dev
diff --git a/.github/workflows/pace_tests.yaml b/.github/workflows/pace_tests.yaml
new file mode 100644
index 00000000..3837beec
--- /dev/null
+++ b/.github/workflows/pace_tests.yaml
@@ -0,0 +1,10 @@
+name: "pace main tests"
+on:
+  pull_request:
+
+jobs:
+  pace_main_tests:
+    uses: NOAA-GFDL/pace/.github/workflows/main_unit_tests.yaml@develop
+    with:
+      component_trigger:  true
+      component_name: NDSL
diff --git a/.github/workflows/shield_tests.yaml b/.github/workflows/shield_tests.yaml
new file mode 100644
index 00000000..fcc86f20
--- /dev/null
+++ b/.github/workflows/shield_tests.yaml
@@ -0,0 +1,10 @@
+name: "SHiELD Translate tests"
+on:
+  pull_request:
+
+jobs:
+  shield_translate_tests:
+    uses: NOAA-GFDL/pySHiELD/.github/workflows/translate.yaml@develop
+    with:
+      component_trigger:  true
+      component_name: NDSL
diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml
index 77a0d5c2..e780c190 100644
--- a/.github/workflows/unit_tests.yaml
+++ b/.github/workflows/unit_tests.yaml
@@ -1,33 +1,38 @@
-name: "Unit tests"
+name: "NDSL unit tests"
 on:
   pull_request:
-      types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
+
+# cancel running jobs if theres a newer push
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
 
 jobs:
-  all:
+  ndsl_unit_tests:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python: [3.8.12, 3.11.7]
+    container:
+      image: ghcr.io/noaa-gfdl/miniforge:mpich
     steps:
+
         - name: Checkout repository
-          uses: actions/checkout@v3.5.2
+          uses: actions/checkout@v4
           with:
             submodules: 'recursive'
-        - name: Setup Python
-          uses: actions/setup-python@v4.6.0
-          with:
-            python-version: ${{ matrix.python }}
-        - name: Install OpenMPI & Boost for gt4py
-          run: |
-            sudo apt-get install libopenmpi-dev libboost1.74-dev
+
         - name: Install Python packages
+          run: pip3 install .[test]
+
+        - name: prepare input eta files
           run: |
-            python -m pip install --upgrade pip setuptools wheel
-            pip install .[test]
+            python tests/grid/generate_eta_files.py
+
         - name: Run serial-cpu tests
-          run: |
-            pytest -x tests
+          run: coverage run --rcfile=setup.cfg -m pytest -x tests
+
         - name: Run parallel-cpu tests
+          run: mpiexec -np 6 --oversubscribe coverage run --rcfile=setup.cfg -m mpi4py -m pytest -x tests/mpi
+
+        - name: Output code coverage
           run: |
-            mpirun -np 6 --oversubscribe pytest -x tests/mpi
+            coverage combine
+            coverage report
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8e207343..8df5e5ae 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,11 +15,12 @@ repos:
       args: ["--profile", "black"]
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.812
+    rev: v1.4.1
     hooks:
     - id: mypy
       name: mypy-ndsl
       args: [--config-file, setup.cfg]
+      additional_dependencies: [types-PyYAML]
       files: ndsl
       exclude: |
         (?x)^(
diff --git a/examples/Fortran_serialization/01_serialize_fortran_data.ipynb b/examples/Fortran_serialization/01_serialize_fortran_data.ipynb
new file mode 100644
index 00000000..743d15ee
--- /dev/null
+++ b/examples/Fortran_serialization/01_serialize_fortran_data.ipynb
@@ -0,0 +1,667 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## **Serialbox Tutorial : Serializing Fortran Data**\n",
+    "\n",
+    "This notebook will cover the basics on extracting data within a Fortran program using [Serialbox](https://gridtools.github.io/serialbox/).\n",
+    "\n",
+    "### **Notebook Requirements**\n",
+    "\n",
+    "- Python v3.11.x to v3.12.x\n",
+    "- [NOAA/NASA Domain Specific Language Middleware](https://github.com/NOAA-GFDL/NDSL)\n",
+    "- `ipykernel==6.1.0`\n",
+    "- [`ipython_genutils`](https://pypi.org/project/ipython_genutils/)\n",
+    "- Fortran compiler that built Serialbox in the `NDSL` middleware (Note: The default build instructions for `NDSL` builds Serialbox such that it outputs to binary data files from Fortran.  Serialbox has compiler options that enable it to write netCDF files.)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Brief Serialbox Overview**\n",
+    "\n",
+    "[Serialbox](https://gridtools.github.io/serialbox/) is a library that can extract data from Fortran programs for use in code porting and verification.  It uses directive-based code statements that are translated later into actual Serialbox library calls, which makes it approachable to use.  Extracting data from a Fortran program using Serialbox essentially follows these steps.\n",
+    "\n",
+    "1) Initialize Serialbox\n",
+    "2) Create a savepoint\n",
+    "3) Save the data of interest\n",
+    "4) \"Clean up\" the savepoint\n",
+    "\n",
+    "These four steps corrolate to the following directives in Serialbox.\n",
+    "\n",
+    "1) `!$ser init directory='<Directory Path to store Serialbox data>' prefix='<Name of Data Group>'`\n",
+    "2) `!$ser savepoint <Name of Savepoint>`\n",
+    "3) `!$ser data <Serialbox Variable Name>=<Fortran Variable Name>`\n",
+    "4) `!$ser cleanup`\n",
+    "\n",
+    "Note that in 3, multiple variables can be specified (ex: `!$ser data serialA=fortranA serialB=fortranB serialC=fortranC`)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Serialbox Example 1**\n",
+    "\n",
+    "We'll step through a basic example that extracts data from a Fortran code using Serialbox.\n",
+    "\n",
+    "The following sets the environment variables `SERIALBOX_EXAMPLE_PATH` and `SERIALBOX_INSTALL_PATH`.  Afterwards, a Bash script issues commands that create a `Fortran` directory within `SERIALBOX_EXAMPLE_PATH` that will store the Fortran code used to demonstrate Serialbox commands.  Be sure to change the environment variables `SERIALBOX_EXAMPLE_PATH` and `SERIALBOX_INSTALL_PATH` to ones that're appropriate for your machine."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: SERIALBOX_EXAMPLE_PATH=/home/ckung/Documents/Code/SMT-Nebulae-Tutorial/tutorial/Fortran_porting\n",
+      "env: SERIALBOX_INSTALL_PATH=/home/ckung/Documents/Code/SMT-Nebulae/sw_stack_path/install/serialbox/\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Change SERIALBOX_EXAMPLE_PATH and SERIALBOX_INSTALL_PATH to appropriate paths\n",
+    "%env SERIALBOX_EXAMPLE_PATH=/home/ckung/Documents/Code/SMT-Nebulae-Tutorial/tutorial/Fortran_porting\n",
+    "%env SERIALBOX_INSTALL_PATH=/home/ckung/Documents/Code/SMT-Nebulae/sw_stack_path/install/serialbox/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH\n",
+    "\n",
+    "if [ ! -d \"./Fortran\" ]; then\n",
+    "    mkdir Fortran\n",
+    "else\n",
+    "    rm -rf Fortran\n",
+    "    mkdir Fortran\n",
+    "fi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### **Serialbox directive calls in Fortran code**\n",
+    "\n",
+    "Next we'll issue commands that create and write the file `testSerialBox.F90` and move it to the previously created `Fortran` directory. This file will contain the Fortran program `testSerialBox` that allocates three arrays, writes random numbers into two arrays (`Qin_out`, `MASS`), and passes the arrays into the subroutine `FILLQ2ZERO1`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing testSerialBox.F90\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile testSerialBox.F90\n",
+    "\n",
+    "program testSerialBox\n",
+    "\n",
+    "  implicit none\n",
+    "\n",
+    "  real, dimension(:,:,:), allocatable :: Qin_out, MASS\n",
+    "  real, dimension(:,:),   allocatable :: FILLQ_out\n",
+    "\n",
+    "  integer :: N = 5\n",
+    "\n",
+    "  allocate(Qin_out(N,N,N), MASS(N,N,N), FILLQ_out(N,N))\n",
+    "\n",
+    "  call random_number(Qin_out)\n",
+    "  call random_number(MASS)\n",
+    "\n",
+    "  where(Qin_out < 0.1) Qin_out = -Qin_out\n",
+    "\n",
+    "  print*, 'sum(Qin_out) = ', sum(Qin_out)\n",
+    "  print*, 'sum(MASS) = ', sum(MASS)\n",
+    "\n",
+    "\n",
+    "!$ser init directory='.' prefix='FILLQ2ZERO_InOut'\n",
+    "!$ser savepoint sp1\n",
+    "!$ser mode write\n",
+    "!$ser data q_in=Qin_out m_in=MASS fq_in=FILLQ_out\n",
+    "\n",
+    "  call FILLQ2ZERO1(Qin_out, MASS, FILLQ_out)\n",
+    "\n",
+    "!$ser data q_out=Qin_out m_out=MASS fq_out=FILLQ_out\n",
+    "!$ser cleanup\n",
+    "  print*, 'sum(Qin_out) = ', sum(Qin_out)\n",
+    "  print*, 'sum(FILLQ_out) = ', sum(FILLQ_out)\n",
+    "\n",
+    "   contains\n",
+    "\n",
+    "  subroutine FILLQ2ZERO1( Q, MASS, FILLQ  )\n",
+    "    real, dimension(:,:,:),   intent(inout)  :: Q\n",
+    "    real, dimension(:,:,:),   intent(in)     :: MASS\n",
+    "    real, dimension(:,:),     intent(  out)  :: FILLQ\n",
+    "    integer                                  :: IM,JM,LM\n",
+    "    integer                                  :: I,J,K,L\n",
+    "    real                                     :: TPW, NEGTPW\n",
+    "    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
+    "    ! Fills in negative q values in a mass conserving way.\n",
+    "    ! Conservation of TPW was checked.\n",
+    "    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
+    "    IM = SIZE( Q, 1 )\n",
+    "    JM = SIZE( Q, 2 )\n",
+    "    LM = SIZE( Q, 3 )\n",
+    "    do j=1,JM\n",
+    "       do i=1,IM\n",
+    "          TPW = SUM( Q(i,j,:)*MASS(i,j,:) )\n",
+    "          NEGTPW = 0.\n",
+    "          do l=1,LM\n",
+    "             if ( Q(i,j,l) < 0.0 ) then\n",
+    "                NEGTPW   = NEGTPW + ( Q(i,j,l)*MASS( i,j,l ) )\n",
+    "                Q(i,j,l) = 0.0\n",
+    "             endif\n",
+    "          enddo\n",
+    "          do l=1,LM\n",
+    "             if ( Q(i,j,l) >= 0.0 ) then\n",
+    "                Q(i,j,l) = Q(i,j,l)*( 1.0+NEGTPW/(TPW-NEGTPW) )\n",
+    "             endif\n",
+    "          enddo\n",
+    "          FILLQ(i,j) = -NEGTPW\n",
+    "       end do\n",
+    "    end do\n",
+    "  end subroutine FILLQ2ZERO1\n",
+    "end program"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "mv testSerialBox.F90 $SERIALBOX_EXAMPLE_PATH/Fortran"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Assuming that we are interested in porting the subroutine `FILLQ2ZERO1`, we need the array data before and after calling `FILLQ2ZERO1`, which will let us set the initial data state in our ported code appropriately and have output data for comparison purposes.  To get this data, there are directive-based Serialbox commands inserted before and after the call to `FILLQ2ZERO1` that follow the steps presented in the [Serialbox overview](#brief-serialbox-overview).  Let's quickly examine the Serialbox commands before the call to `FILLQ2ZERO1`.\n",
+    "\n",
+    "- `!$ser init directory='.' prefix='FILLQ2ZERO_In'` : Initializes Serialbox and specifies that the extracted data will be written into the current path where the code is executed.  The data will be grouped and named with the prefix `FILLQ2ZERO_In`.\n",
+    "\n",
+    "- `!$ser savepoint sp1` : Creates a savepoint with the name `sp1`.\n",
+    "\n",
+    "- `!$ser mode write` : Serialbox's operation mode will be to write data files.  This is the default mode (have to check this).  Other modes include `read`.\n",
+    "\n",
+    "- `!$ser data q_in=Qin_out m_in=MASS fq_in=FILLQ_out` : Serialbox will write the arrays out into data files.  Note that the variable on the left side of `=` is the variable name that Serialbox will use, and the variable on the right side of `=` is the Fortran variable.\n",
+    "\n",
+    "After the `FILLQ2ZERO1` call, the Serialbox command `!$ser data...`  records the resulting output arrays from `FILLQ2ZERO1` .  `!$ser cleanup` indicates we're done with writing data and finalizes the files.\n",
+    "\n",
+    "#### **Translating Serialbox directive calls into actual library calls**\n",
+    "\n",
+    "While we've expressed the Serialbox commands using directives, these directives will need to be mapped to the appropriate Serialbox library calls. To do this, we run a Python script `pp_ser.py` (found in the Serialbox installation directory) that will replace the `!ser` directive statements will the appropriate Fortran Serialbox calls and will write a new `testSerialBox.F90` file.  The following Bash commands will create an `sb` directory with the `Fortran` directory and execute the `pp_ser.py` script."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing file testSerialBox.F90\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran\n",
+    "if [ ! -d \"./sb\" ]; then\n",
+    "    mkdir sb\n",
+    "else\n",
+    "    rm -rf sb\n",
+    "    mkdir sb\n",
+    "fi\n",
+    "\n",
+    "python /home/ckung/Documents/Code/SMT-Nebulae/sw_stack/discover/sles15/src/2024.03.00/install/serialbox/python/pp_ser/pp_ser.py --output-dir=./sb testSerialBox.F90"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that we specified the option `--output-dir=./sb` when running `pp_ser.py`, which specifies the location where we want the resulting Fortran code with the Serialbox directives replaced with library calls.  If we did not specify the output directory, executing `pp_ser.py` would simply print the Fortran code to the terminal.  In the `sb` directory, we'll find a `testSerialBox.F90` file that contains the appropriate Serialbox calls."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 16\n",
+      "drwxrwxr-x 2 ckung ckung 4096 May 13 10:08 .\n",
+      "drwxrwxr-x 3 ckung ckung 4096 May 13 10:08 ..\n",
+      "-rw-rw-r-- 1 ckung ckung 5033 May 13 10:08 testSerialBox.F90\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran/sb\n",
+    "ls -al"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### **Building and Running Fortran code with Serialbox library**\n",
+    "\n",
+    "Compiling the Fortran code with Serialbox requires the following during compilation:\n",
+    "\n",
+    "- References to the following Serialbox libraries (assuming that we want the resulting binary with libraries statically linked)\n",
+    "    - `libSerialboxFortran.a`\n",
+    "    - `libSerialboxC.a`\n",
+    "    - `libSerialboxCore.a`\n",
+    "    - `-lstdc++`\n",
+    "    \n",
+    "- The `-DSERIALIZE` macro to activate the Serialbox codepath within the Fortran code.  Note that not having this macro during compilation will result in a binary without Serialbox calls.\n",
+    "\n",
+    "- The `include` path from the Serialbox installation\n",
+    "\n",
+    "The compilation line can look as follows."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran/sb\n",
+    "\n",
+    "# Note: Adjust the libraries and include paths appropriately\n",
+    "\n",
+    "gfortran testSerialBox.F90  \\\n",
+    "    $SERIALBOX_INSTALL_PATH/lib/libSerialboxFortran.a \\\n",
+    "    $SERIALBOX_INSTALL_PATH/lib/libSerialboxC.a \\\n",
+    "    $SERIALBOX_INSTALL_PATH/lib/libSerialboxCore.a \\\n",
+    "    -lstdc++ \\\n",
+    "    -DSERIALIZE \\\n",
+    "    -I$SERIALBOX_INSTALL_PATH/include \\\n",
+    "    -o testSerialBox.bin\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After successful compilation, we can execute the code.  Note that whenever Serialbox is running, the code displays `WARNING: SERIALIZATION IS ON` in the terminal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " sum(Qin_out) =    58.7446289    \n",
+      " sum(MASS) =    62.1698570    \n",
+      " >>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<\n",
+      " >>> WARNING: SERIALIZATION IS ON <<<\n",
+      " >>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<\n",
+      " sum(Qin_out) =    58.7851906    \n",
+      " sum(FILLQ_out) =   0.252184689    \n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran/sb\n",
+    "./testSerialBox.bin"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After the code executes, you will see several `.json` and `.dat` files that are named based on the Serialbox's written variables and the `prefix` specified during Serialbox's initialization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 1028\n",
+      "drwxrwxr-x 2 ckung ckung   4096 May 13 10:08 .\n",
+      "drwxrwxr-x 3 ckung ckung   4096 May 13 10:08 ..\n",
+      "-rw-rw-r-- 1 ckung ckung    872 May 13 10:08 ArchiveMetaData-FILLQ2ZERO_InOut.json\n",
+      "-rw-rw-r-- 1 ckung ckung    100 May 13 10:08 FILLQ2ZERO_InOut_fq_in.dat\n",
+      "-rw-rw-r-- 1 ckung ckung    100 May 13 10:08 FILLQ2ZERO_InOut_fq_out.dat\n",
+      "-rw-rw-r-- 1 ckung ckung    500 May 13 10:08 FILLQ2ZERO_InOut_m_in.dat\n",
+      "-rw-rw-r-- 1 ckung ckung    500 May 13 10:08 FILLQ2ZERO_InOut_m_out.dat\n",
+      "-rw-rw-r-- 1 ckung ckung    500 May 13 10:08 FILLQ2ZERO_InOut_q_in.dat\n",
+      "-rw-rw-r-- 1 ckung ckung    500 May 13 10:08 FILLQ2ZERO_InOut_q_out.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   7157 May 13 10:08 MetaData-FILLQ2ZERO_InOut.json\n",
+      "-rwxrwxr-x 1 ckung ckung 997608 May 13 10:08 testSerialBox.bin\n",
+      "-rw-rw-r-- 1 ckung ckung   5033 May 13 10:08 testSerialBox.F90\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran/sb\n",
+    "ls -al"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## **Serialbox Example 2 : Looping Region**\n",
+    "\n",
+    "There may be cases where a function or subroutine is located within a looping region, and we want to check the values of a looping region.  Serialbox enables saving data within a looping region by adding metadata to the `!$ser savepoint` declaration.  In general, it can look like this.\n",
+    "\n",
+    "- `!$ser savepoint <Savepoint Name> <Metadata variable>=<Fortran variable (Usually the timestep)>`\n",
+    "\n",
+    "For example, if there's a timestep looping region that increments the variable `currTS`, we can use that variable to create separate savepoints within that looping region.\n",
+    "\n",
+    "- `!$ser savepoint sp timestep=currTS`\n",
+    "\n",
+    "In the example below, we'll use Serialbox to create multiple savepoints within a looping region."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH\n",
+    "\n",
+    "if [ ! -d \"./Fortran_ts\" ]; then\n",
+    "    mkdir Fortran_ts\n",
+    "else\n",
+    "    rm -rf Fortran_ts\n",
+    "    mkdir Fortran_ts\n",
+    "fi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Writing testSerialBox_ts.F90\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile testSerialBox_ts.F90\n",
+    "\n",
+    "program testSerialBox_ts\n",
+    "\n",
+    "  implicit none\n",
+    "\n",
+    "  real, dimension(:,:,:), allocatable :: Qin_out, MASS\n",
+    "  real, dimension(:,:),   allocatable :: FILLQ_out\n",
+    "\n",
+    "  integer :: N = 5, N_ts = 10, t\n",
+    "\n",
+    "  allocate(Qin_out(N,N,N), MASS(N,N,N), FILLQ_out(N,N))\n",
+    "\n",
+    "!$ser init directory='.' prefix='FILLQ2ZERO_InOut'\n",
+    "\n",
+    "  do t = 1, N_ts\n",
+    "\n",
+    "   call random_number(Qin_out)\n",
+    "   call random_number(MASS)\n",
+    "\n",
+    "   where(Qin_out < 0.1) Qin_out = -Qin_out\n",
+    "\n",
+    "   print*, 'sum(Qin_out) = ', sum(Qin_out)\n",
+    "   print*, 'sum(MASS) = ', sum(MASS)\n",
+    "\n",
+    "\n",
+    "!$ser savepoint sp1 timestep=t\n",
+    "!$ser data q_in=Qin_out m_in=MASS fq_in=FILLQ_out\n",
+    "\n",
+    "    call FILLQ2ZERO1(Qin_out, MASS, FILLQ_out)\n",
+    "\n",
+    "!$ser data q_out=Qin_out m_out=MASS fq_out=FILLQ_out\n",
+    "\n",
+    "!   print*, 'sum(Qin_out) = ', sum(Qin_out)\n",
+    "!   print*, 'sum(FILLQ_out) = ', sum(FILLQ_out)\n",
+    "\n",
+    "  enddo\n",
+    "  \n",
+    "!$ser cleanup\n",
+    "   contains\n",
+    "\n",
+    "  subroutine FILLQ2ZERO1( Q, MASS, FILLQ  )\n",
+    "    real, dimension(:,:,:),   intent(inout)  :: Q\n",
+    "    real, dimension(:,:,:),   intent(in)     :: MASS\n",
+    "    real, dimension(:,:),     intent(  out)  :: FILLQ\n",
+    "    integer                                  :: IM,JM,LM\n",
+    "    integer                                  :: I,J,K,L\n",
+    "    real                                     :: TPW, NEGTPW\n",
+    "    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
+    "    ! Fills in negative q values in a mass conserving way.\n",
+    "    ! Conservation of TPW was checked.\n",
+    "    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
+    "    IM = SIZE( Q, 1 )\n",
+    "    JM = SIZE( Q, 2 )\n",
+    "    LM = SIZE( Q, 3 )\n",
+    "    do j=1,JM\n",
+    "       do i=1,IM\n",
+    "          TPW = SUM( Q(i,j,:)*MASS(i,j,:) )\n",
+    "          NEGTPW = 0.\n",
+    "          do l=1,LM\n",
+    "             if ( Q(i,j,l) < 0.0 ) then\n",
+    "                NEGTPW   = NEGTPW + ( Q(i,j,l)*MASS( i,j,l ) )\n",
+    "                Q(i,j,l) = 0.0\n",
+    "             endif\n",
+    "          enddo\n",
+    "          do l=1,LM\n",
+    "             if ( Q(i,j,l) >= 0.0 ) then\n",
+    "                Q(i,j,l) = Q(i,j,l)*( 1.0+NEGTPW/(TPW-NEGTPW) )\n",
+    "             endif\n",
+    "          enddo\n",
+    "          FILLQ(i,j) = -NEGTPW\n",
+    "       end do\n",
+    "    end do\n",
+    "  end subroutine FILLQ2ZERO1\n",
+    "end program"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing file testSerialBox_ts.F90\n",
+      " >>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<\n",
+      " >>> WARNING: SERIALIZATION IS ON <<<\n",
+      " >>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<\n",
+      " sum(Qin_out) =    61.9121895    \n",
+      " sum(MASS) =    59.9121780    \n",
+      " sum(Qin_out) =    56.1568756    \n",
+      " sum(MASS) =    64.7800751    \n",
+      " sum(Qin_out) =    61.0407639    \n",
+      " sum(MASS) =    63.4687958    \n",
+      " sum(Qin_out) =    58.9772873    \n",
+      " sum(MASS) =    62.4764175    \n",
+      " sum(Qin_out) =    62.8103752    \n",
+      " sum(MASS) =    63.0623398    \n",
+      " sum(Qin_out) =    64.0034027    \n",
+      " sum(MASS) =    59.7669296    \n",
+      " sum(Qin_out) =    66.0840454    \n",
+      " sum(MASS) =    58.6753502    \n",
+      " sum(Qin_out) =    60.5121956    \n",
+      " sum(MASS) =    62.7025185    \n",
+      " sum(Qin_out) =    65.6868591    \n",
+      " sum(MASS) =    70.1329956    \n",
+      " sum(Qin_out) =    60.6698227    \n",
+      " sum(MASS) =    63.8359032    \n",
+      "total 1052\n",
+      "drwxrwxr-x 2 ckung ckung   4096 May 13 10:08 .\n",
+      "drwxrwxr-x 3 ckung ckung   4096 May 13 10:08 ..\n",
+      "-rw-rw-r-- 1 ckung ckung   6457 May 13 10:08 ArchiveMetaData-FILLQ2ZERO_InOut.json\n",
+      "-rw-rw-r-- 1 ckung ckung   1000 May 13 10:08 FILLQ2ZERO_InOut_fq_in.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   1000 May 13 10:08 FILLQ2ZERO_InOut_fq_out.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   5000 May 13 10:08 FILLQ2ZERO_InOut_m_in.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   5000 May 13 10:08 FILLQ2ZERO_InOut_m_out.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   5000 May 13 10:08 FILLQ2ZERO_InOut_q_in.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   5000 May 13 10:08 FILLQ2ZERO_InOut_q_out.dat\n",
+      "-rw-rw-r-- 1 ckung ckung   9456 May 13 10:08 MetaData-FILLQ2ZERO_InOut.json\n",
+      "-rwxrwxr-x 1 ckung ckung 997648 May 13 10:08 testSerialBox_ts.bin\n",
+      "-rw-rw-r-- 1 ckung ckung   5117 May 13 10:08 testSerialBox_ts.F90\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "mv testSerialBox_ts.F90 $SERIALBOX_EXAMPLE_PATH/Fortran_ts\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran_ts\n",
+    "if [ ! -d \"./sb\" ]; then\n",
+    "    mkdir sb\n",
+    "else\n",
+    "    rm -rf sb\n",
+    "    mkdir sb\n",
+    "fi\n",
+    "\n",
+    "python /home/ckung/Documents/Code/SMT-Nebulae/sw_stack/discover/sles15/src/2024.03.00/install/serialbox/python/pp_ser/pp_ser.py --output-dir=./sb testSerialBox_ts.F90\n",
+    "\n",
+    "cd $SERIALBOX_EXAMPLE_PATH/Fortran_ts/sb\n",
+    "\n",
+    "gfortran testSerialBox_ts.F90  \\\n",
+    "    $SERIALBOX_INSTALL_PATH/lib/libSerialboxFortran.a \\\n",
+    "    $SERIALBOX_INSTALL_PATH/lib/libSerialboxC.a \\\n",
+    "    $SERIALBOX_INSTALL_PATH/lib/libSerialboxCore.a \\\n",
+    "    -lstdc++ \\\n",
+    "    -DSERIALIZE \\\n",
+    "    -I$SERIALBOX_INSTALL_PATH/include \\\n",
+    "    -o testSerialBox_ts.bin\n",
+    "\n",
+    "./testSerialBox_ts.bin\n",
+    "\n",
+    "ls -al"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "gt4py_jupyter",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/Fortran_serialization/02_read_serialized_data_python.ipynb b/examples/Fortran_serialization/02_read_serialized_data_python.ipynb
new file mode 100644
index 00000000..c8265202
--- /dev/null
+++ b/examples/Fortran_serialization/02_read_serialized_data_python.ipynb
@@ -0,0 +1,240 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## **Serialbox Tutorial : Incorporating Fortran Serialbox Data into Python**\n",
+    "\n",
+    "In the [previous notebook](./01_serialize_fortran_data.ipynb), we covered how to extract data from a Fortran code using Serialbox.  In this notebook, we'll cover how to read and incorporate those files within a Python code."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Notebook Requirements**\n",
+    "\n",
+    "- Python v3.11.x to v3.12.x\n",
+    "- [NOAA/NASA Domain Specific Language Middleware](https://github.com/NOAA-GFDL/NDSL)\n",
+    "- `ipykernel==6.1.0`\n",
+    "- [`ipython_genutils`](https://pypi.org/project/ipython_genutils/)\n",
+    "\n",
+    "This notebook assumes that the code from the [previous notebook](./01_serialize_fortran_data.ipynb) was run, and the serialized data from Fortran was written out."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Importing Fortran Serialbox Data From Example 1 into Python** ###\n",
+    "\n",
+    "We'll step through importing Serialbox data [created in Fortran](./01_serialize_fortran_data.ipynb#Serialbox-Example-1) into Python to test a Python port of `FILLQ2ZERO1`.  Importing Serialbox data into Python essentially comes from opening a file via a \"serializer\" object denoted by a particular Serialbox initialization prefix (see [Serialbox directive calls in Fortran code](./01_serialize_fortran_data.ipynb#Serialbox-directive-calls-in-Fortran-code)) and stepping through the savepoints within the \"serializer\" object to read the data.  This is done by the following Python calls assuming that the imported `serialbox` package is referenced via `ser`.\n",
+    "\n",
+    "- `ser.Serializer(ser.OpenModeKind.Read,\"<Path to Serialbox Data>\", \"<Name of prefix used during Serialbox initialization>\")` : This function call creates a \"serializer\" object that will read Serialbox files within a declared path and reference data from a particular Serialbox initialization prefix.\n",
+    "\n",
+    "- `serializer.savepoint_list()` : Using a \"serializer\" object called `serializer`, this function call creates a list of Serialbox savepoints\n",
+    "\n",
+    "- `serializer.read(\"<Serialbox variable name>\", <Savepoint from savepoint list>)` : Using a \"serializer\" object called `serializer`, this function call will look for the specified Serialbox variable name from the savepoint list and output that variable.\n",
+    "\n",
+    "Below is a Python example that uses these three calls to import the [Example 1](./01_serialize_fortran_data.ipynb#Serialbox-Example-1) Fortran data into Python.  You can check to see that the summation of the arrays with Python match closely with the [values presented in Fortran](./01_serialize_fortran_data.ipynb#Building-and-Running-Fortran-code-with-Serialbox-library)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sum of Qin_out =  57.30306911468506\n",
+      "Sum of mass =  65.57122611999512\n",
+      "Sum of fq_out =  0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "# Appends the Serialbox python path to PYTHONPATH.  If needed, change to appropriate path containing serialbox installation\n",
+    "sys.path.append('/home/ckung/Documents/Code/SMT-Nebulae/sw_stack_path/install/serialbox/python')\n",
+    "import serialbox as ser\n",
+    "import numpy as np\n",
+    "\n",
+    "# If needed, change the path in second parameter of ser.Serializer to appropriate path that contains Fortran data via Serialbox from 01.ipynb\n",
+    "serializer = ser.Serializer(ser.OpenModeKind.Read,\"./Fortran/sb/\",\"FILLQ2ZERO_InOut\")\n",
+    "\n",
+    "savepoints = serializer.savepoint_list()\n",
+    "\n",
+    "Qin_out = serializer.read(\"q_in\", savepoints[0])\n",
+    "mass    = serializer.read(\"m_in\", savepoints[0])\n",
+    "fq_out  = serializer.read(\"fq_in\", savepoints[0])\n",
+    "\n",
+    "print('Sum of Qin_out = ', sum(sum(sum(Qin_out))))\n",
+    "print('Sum of mass = ', sum(sum(sum(mass))))\n",
+    "print('Sum of fq_out = ', sum(sum(fq_out)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we'll create a rudimentary port of `fillq2zero1` and test whether or not it computes properly by comparing the output arrays `Qin_out` and `fq_out` to the corresonding arrays created from Fortran, which are retrieved using `serializer.read()`.  In this example, the comparison between the Fortran and Python data is performed using `np.allclose`; however, note that the proper metric of comparison will depend on the application.  We'll see that `np.allclose()` will report `True` for both the `Qin_out` and `fq_out` array comparisons. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sum of Qin_out =  57.2715950012207\n",
+      "Sum of fq_out =  0.36869711382314563\n",
+      "True\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "def fillq2zero1(Q, MASS, FILLQ):\n",
+    "    IM = Q.shape[0]\n",
+    "    JM = Q.shape[1]\n",
+    "    LM = Q.shape[2]\n",
+    "\n",
+    "    TPW = np.sum(Q*MASS,2)\n",
+    "    for J in range(JM):\n",
+    "        for I in range(IM):\n",
+    "            NEGTPW = 0.\n",
+    "            for L in range(LM):\n",
+    "                if(Q[I,J,L] < 0.0):\n",
+    "                    NEGTPW = NEGTPW + (Q[I,J,L]*MASS[I,J,L])\n",
+    "                    Q[I,J,L] = 0.0\n",
+    "            for L in range(LM):\n",
+    "                if(Q[I,J,L] >= 0.0):\n",
+    "                    Q[I,J,L] = Q[I,J,L]*(1.0 + NEGTPW/(TPW[I,J]-NEGTPW))\n",
+    "            FILLQ[I,J] = -NEGTPW\n",
+    "            \n",
+    "fillq2zero1(Qin_out,mass,fq_out)\n",
+    "\n",
+    "print('Sum of Qin_out = ', sum(sum(sum(Qin_out))))\n",
+    "print('Sum of fq_out = ', sum(sum(fq_out)))\n",
+    "\n",
+    "Qin_out_ref = serializer.read(\"q_out\", savepoints[0])\n",
+    "mass_ref    = serializer.read(\"m_out\", savepoints[0])\n",
+    "fq_out_ref  = serializer.read(\"fq_out\", savepoints[0])\n",
+    "\n",
+    "print(np.allclose(Qin_out,Qin_out_ref))\n",
+    "print(np.allclose(fq_out,fq_out_ref))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Importing Fortran Data from Example 2 into Python : Looping Regions** ###\n",
+    "\n",
+    "In [Example 2](./01_serialize_fortran_data.ipynb#Serialbox-Example-2), Serialbox was set up to record data within a looping region.  This results in a larger list of savepoints that we can step through in Python to recreating the looping process done in Fortran.  The code below replicates the looping of `FILLQ2ZERO1` and reads multiple savepoints to intialize the data and compare outputs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Current savepoint =  sp1 {\"timestep\": 1, \"ID\": 1}\n",
+      "SUM(Qin_out) =  63.43995475769043\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 2, \"ID\": 2}\n",
+      "SUM(Qin_out) =  59.70357894897461\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 3, \"ID\": 3}\n",
+      "SUM(Qin_out) =  59.850998878479004\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 4, \"ID\": 4}\n",
+      "SUM(Qin_out) =  62.012206077575684\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 5, \"ID\": 5}\n",
+      "SUM(Qin_out) =  60.80107021331787\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 6, \"ID\": 6}\n",
+      "SUM(Qin_out) =  60.730340003967285\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 7, \"ID\": 7}\n",
+      "SUM(Qin_out) =  61.0941276550293\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 8, \"ID\": 8}\n",
+      "SUM(Qin_out) =  59.69675540924072\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 9, \"ID\": 9}\n",
+      "SUM(Qin_out) =  67.9124870300293\n",
+      "True\n",
+      "True\n",
+      "Current savepoint =  sp1 {\"timestep\": 10, \"ID\": 10}\n",
+      "SUM(Qin_out) =  60.42111110687256\n",
+      "True\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# If needed, change the path in second parameter of ser.Serializer to appropriate path that contains Fortran data via Serialbox from 01.ipynb\n",
+    "serializer = ser.Serializer(ser.OpenModeKind.Read,\"./Fortran_ts/sb/\",\"FILLQ2ZERO_InOut\")\n",
+    "\n",
+    "savepoints = serializer.savepoint_list()\n",
+    "\n",
+    "for currentSavepoint in savepoints:\n",
+    "    Qin_out = serializer.read(\"q_in\", currentSavepoint)\n",
+    "    mass    = serializer.read(\"m_in\", currentSavepoint)\n",
+    "    fq_out  = serializer.read(\"fq_in\", currentSavepoint)\n",
+    "\n",
+    "    fillq2zero1(Qin_out,mass,fq_out)\n",
+    "\n",
+    "    Qin_out_ref = serializer.read(\"q_out\", currentSavepoint)\n",
+    "    mass_ref    = serializer.read(\"m_out\", currentSavepoint)\n",
+    "    fq_out_ref  = serializer.read(\"fq_out\", currentSavepoint)\n",
+    "\n",
+    "    print('Current savepoint = ', currentSavepoint)\n",
+    "    print('SUM(Qin_out) = ', sum(sum(sum(Qin_out))))\n",
+    "    print(np.allclose(Qin_out,Qin_out_ref))\n",
+    "    print(np.allclose(fq_out,fq_out_ref))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "gt4py_jupyter",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/NDSL/01_gt4py_basics.ipynb b/examples/NDSL/01_gt4py_basics.ipynb
new file mode 100644
index 00000000..ce66cfa2
--- /dev/null
+++ b/examples/NDSL/01_gt4py_basics.ipynb
@@ -0,0 +1,546 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# **GT4Py Tutorial : Stencil Basics**\n",
+    "\n",
+    "## **Introduction**\n",
+    "\n",
+    "This notebook will show how to create a simple GT4Py stencil that copies data from one variable to another.\n",
+    "\n",
+    "### **Notebook Requirements**\n",
+    "\n",
+    "- Python v3.11.x to v3.12.x\n",
+    "- [NOAA/NASA Domain Specific Language Middleware](https://github.com/NOAA-GFDL/NDSL)\n",
+    "- `ipykernel==6.1.0`\n",
+    "- [`ipython_genutils`](https://pypi.org/project/ipython_genutils/)\n",
+    "\n",
+    "### **Quick GT4Py (Cartesian version) Overview**\n",
+    "\n",
+    "GT4Py is a Domain Specific Language (DSL) in Python that enables a developer to write stencil computations.  Compared to simply running under Python, GT4Py achieves performance when the Python code is translated and compiled into a lower level language such as C++ and CUDA, which enables the codebase to execute on a multitude of architectures.  In this notebook, we will cover the basics of creating GT4Py stencils and demonstrate several intracies of the DSL. Additional information about GT4Py can be found at the [GT4Py site](https://gridtools.github.io/gt4py/latest/index.html).  One small note is that this tutorial covers and uses the Cartesian version of GT4Py and not the unstructured version.\n",
+    "\n",
+    "### **GT4Py Parallel/Execution Model**\n",
+    "\n",
+    "Within a 3-dimensional domain, GT4Py considers computations in two parts.  If we assume an `(I,J,K)` coordinate system as a reference, GT4Py separates computations in the Horizontal (`IJ`) spatial plane and Vertical (`K`) spatial interval.  In the Horizontal spatial plane, computations are implicitly executed in parallel, which also means that there is no assumed calculation order within the plane.  In the Vertical spatial interval, comptuations are specified by an iteration policy that will be discussed later through examples.\n",
+    "\n",
+    "Another quick note is that the computations are executed sequentially in the order they appear in code.\n",
+    "\n",
+    "## **Tutorial**\n",
+    "\n",
+    "### **Copy Stencil example**\n",
+    "\n",
+    "To demonstrate how to implement a GT4Py stencil, we'll step through an example that copies the values of one array into another array.  First, we import several packages. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gt4py.cartesian.gtscript import PARALLEL, computation, interval, stencil\n",
+    "from ndsl.dsl.typing import FloatField\n",
+    "from ndsl.quantity import Quantity\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As we walk through the example, we'll highlight different terms and such from the imported packages.  Let's first define, in GT4Py terms, two arrays of size 5 by 5 by 2 (dimensionally `I` by `J` by `K`).  These arrays are defined using a `Quantity` object, an NDSL data container for physical quantities.  More detailed information about the `Quantity` object and its arguments can be found from the [`Quantity` docstring](https://github.com/NOAA-GFDL/NDSL/blob/develop/ndsl/quantity.py#L270).  To make debugging easier, the `numpy` backend will be used."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "backend = 'numpy'\n",
+    "\n",
+    "nx = 5\n",
+    "ny = 5\n",
+    "nz = 2\n",
+    "\n",
+    "shape = (nx, ny, nz)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros([nx, ny, nz]),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "arr = np.indices(shape,dtype=float).sum(axis=0)  # Value of each entry is sum of the I and J index at each point\n",
+    "\n",
+    "qty_in = Quantity(data=arr,\n",
+    "                  dims=[\"I\", \"J\", \"K\"],\n",
+    "                  units=\"m\",\n",
+    "                  gt4py_backend=backend)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will next create a simple GT4Py stencil that copies values from one input to another.  A stencil will look like a Python subroutine or function except that it uses specific GT4Py functionalities."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@stencil(backend=backend)\n",
+    "def copy_stencil(input_field: FloatField,\n",
+    "                 output_field: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        output_field = input_field"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As mentioned before, GT4Py (cartesian version) was designed for stencil-based computation. Since stencil calculations generally are localized computations, GT4Py stencils are written using variables and the variable's relative location if it's an array.  If there are no indices in brackets next to a GT4Py type (such as `FloatField`), it's implied to be at the [0] (for 1-dimension), [0,0] (for 2-dimension), or [0,0,0] (for 3-dimension) location.  For the simple example `copy_stencil`, the value of `input_field` simply gets copied to `output_field` at every point in the domain of interest.\n",
+    "\n",
+    "We see that this stencil does not contain any explicit loops.  As mentioned above in the notebook, GT4Py has a particular computation policy that implicitly executes in parallel within an `IJ` plane and is user defined in the `K` interval.  This execution policy in the `K` interval is dictated by the `computation` and `interval` keywords.  \n",
+    "\n",
+    "- `with computation(PARALLEL)` means that there's no order preference to executing the `K` interval.  This also means that the `K` interval can be computed in parallel to potentially gain performace if computational resources are available.\n",
+    "\n",
+    "- `interval(...)` means that the entire `K` interval is executed.  Instead of `(...)`, more specific intervals can be specified using a tuple of two integers.  For example... \n",
+    "\n",
+    "    - `interval(0,2)` : The interval `K` = 0 to 1 is executed.\n",
+    "    - `interval(0,-1)` : The interval `K` = 0 to N-2 (where N is the size of `K`) is executed.\n",
+    "\n",
+    "The decorator `@stencil(backend=backend)` (Note: `stencil` comes from the package `gt4py.cartesian.gtscript`) converts `copy_stencil` to use the specified `backend` to \"compile\" the stencil.  `stencil` can also be a function call to create a stencil object."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "copy_stencil_numpy = stencil(backend=\"numpy\", definition=copy_stencil)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that the input and output parameters to `copy_stencil` are of type `FloatField`, which can essentially be thought of as a 3-dimensional NumPy array of `float` types.\n",
+    "\n",
+    "`plot_field_at_kN` plots the values within the `IJ` plane at `K = 0` if no integer is specified or at `K` equal to the integer that is specified as an argument.  As we can see in the plots below, `copy_stencil` copies the values from `qty_in` into `qty_out`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Plotting values of qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing `copy_stencil`\")\n",
+    "copy_stencil(qty_in, qty_out)\n",
+    "print(\"Plotting qty_out from `copy_stencil` at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Plotting qty_out from `copy_stencil` at K = 1\")\n",
+    "qty_out.plot_k_level(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Setting domain subsets in a stencil call**\n",
+    "\n",
+    "GT4Py also allows a subset to be specified from a stencil call and executed in a fashion similar to using `interval(...)` in the K interval.  This is done by setting the stencil call's `origin` and `domain` argument.\n",
+    "\n",
+    "- `origin` : This specifies the \"starting\" coordinate to perform computations.  \n",
+    "\n",
+    "- `domain` : This specifies the range of the stencil computation based on `origin` as the \"starting\" coordinate (Note: May need to check whether this affects `interval()`)\n",
+    "\n",
+    "If these two parameters are not set, the stencil call by default will iterate over the entire input domain.  The following demonstrates the effect of specifying different `origin` and `domain`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qty_out = Quantity(data=np.zeros([nx, ny, nz]),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Plotting values of qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing `copy_stencil` with origin=(1, 0, 0)\")\n",
+    "copy_stencil(qty_in, qty_out, origin=(1, 0, 0))\n",
+    "print(\"Plotting qty_out at K = 0 based on `copy_stencil` with origin=(1, 0, 0)\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros([nx, ny, nz]),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Resetting qty_out to zero...\")\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing `copy_stencil` with origin=(0, 1, 0)\")\n",
+    "copy_stencil(qty_in, qty_out, origin=(0, 1, 0))\n",
+    "print(\"Plotting qty_out at K = 0 based on `copy_stencil` with origin=(0, 1, 0)\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros([nx, ny, nz]),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Resetting qty_out to zero...\")\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing `copy_stencil` with origin = (0, 0, 1)\")\n",
+    "copy_stencil(qty_in, qty_out, origin=(0, 0, 1))\n",
+    "print(\"Plotting qty_out at K = 0 based on `copy_stencil` with origin=(0, 0, 1)\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Plotting qty_out at K = 1 based on `copy_stencil` with origin=(0, 0, 1)\")\n",
+    "qty_out.plot_k_level(1)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros([nx, ny, nz]),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "print(\"Resetting qty_out to zero...\")\n",
+    "print(\"Plotting values of qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing `copy_stencil` with domain=(2, 2, nz)\")\n",
+    "copy_stencil(qty_in, qty_out, domain=(2, 2, nz))\n",
+    "print(\"Plotting qty_out at K = 0 based on `copy_stencil` with domain=(2, 2, nz)\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros([nx, ny, nz]),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "print(\"Resetting qty_out to zero...\")\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing `copy_stencil` with origin=(2, 2, 0), domain=(2, 2, nz)\")\n",
+    "copy_stencil(qty_in, qty_out, origin=(2, 2, 0), domain=(2, 2, nz))\n",
+    "print(\"Plotting qty_out at K = 0 based on `copy_stencil` with origin=(2, 2, 0), domain=(2, 2, nz)\")\n",
+    "qty_out.plot_k_level(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **`FORWARD` and `BACKWARD` `computation` keywords and Offset Indexing within a stencil call**\n",
+    "\n",
+    "Besides `PARALLEL`, the developer can specify `FORWARD` or `BACKWARD` as the iteration policy in `K` for a stencil.  Essentially, the `FORWARD` policy has `K` iterating consecutively starting from the lowest vertical index to the highest, while the `BACKWARD` policy performs the reverse.\n",
+    "\n",
+    "An array-based stencil variable can also have an integer dimensional offset if the array variable is on the right hand side of the `=` for the computation.  When a computation is performed at a particular point, an offset variable's coordinate is based on that particular point plus (or minus) the offset in the offset dimension.\n",
+    "\n",
+    "The following examples demonstrate the use of these two iteration policies and also offset indexing in the `K` dimension.  Note that offsets can also be applied to the `I` or `J` dimension."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gt4py.cartesian.gtscript import FORWARD, BACKWARD\n",
+    "\n",
+    "nx = 5\n",
+    "ny = 5\n",
+    "nz = 5\n",
+    "nhalo = 1\n",
+    "backend=\"numpy\"\n",
+    "\n",
+    "shape = (nx + 2 * nhalo, ny + 2 * nhalo, nz)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "arr = np.indices(shape,dtype=float).sum(axis=0)  # Value of each entry is sum of the I and J index at each point\n",
+    "qty_in = Quantity(data=arr,\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Plotting values of qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_in at K = 1\")\n",
+    "qty_in.plot_k_level(1)\n",
+    "print(\"Plotting values of qty_in at K = 2\")\n",
+    "qty_in.plot_k_level(2)\n",
+    "\n",
+    "@stencil(backend=backend)\n",
+    "def mult_upward(qty_in: FloatField, qty_out: FloatField):\n",
+    "    with computation(FORWARD), interval(...):\n",
+    "        qty_out = qty_in[0,0,-1] * 2.0\n",
+    "\n",
+    "print(\"Executing 'mult_upward' with origin=(nhalo, nhalo, 1), domain=(nx, ny, 2)\")\n",
+    "mult_upward(qty_in, qty_out, origin=(nhalo,nhalo,1), domain=(nx,ny,2))\n",
+    "print(\"Plotting values of qty_out at K = 0 with origin=(nhalo, nhalo, 1), domain=(nx, ny, 2)\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 1 with origin=(nhalo, nhalo, 1), domain=(nx, ny, 2)\")\n",
+    "qty_out.plot_k_level(1)\n",
+    "print(\"Plotting values of qty_out at K = 2 with origin=(nhalo, nhalo, 1), domain=(nx, ny, 2)\")\n",
+    "qty_out.plot_k_level(2)\n",
+    "print(\"Plotting values of qty_out at K = 3 with origin=(nhalo, nhalo, 1), domain=(nx, ny, 2)\")\n",
+    "qty_out.plot_k_level(3)\n",
+    "\n",
+    "@stencil(backend=backend)\n",
+    "def copy_downward(qty_in: FloatField, qty_out: FloatField):\n",
+    "    with computation(BACKWARD), interval(...):\n",
+    "        qty_out = qty_in[0,0,1]\n",
+    "\n",
+    "print(\"Resetting qty_out to zeros\")\n",
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Executing 'copy_downward' with origin=(1, 1, 0), domain=(nx, ny, nz-1)\")\n",
+    "copy_downward(qty_in, qty_out, origin=(1, 1, 0), domain=(nx, ny, nz-1))\n",
+    "print(\"***\")\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 1\")\n",
+    "qty_out.plot_k_level(1)\n",
+    "print(\"Plotting values of qty_out at K = 2\")\n",
+    "qty_out.plot_k_level(2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Regarding offsets, GT4Py does not allow offsets to variables in the left hand side of the `=`.  Uncomment and execute the below code to see the error `Assignment to non-zero offsets is not supported.`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @stencil(backend=backend)\n",
+    "# def mult_upward_error(qty_in: FloatField, qty_out: FloatField):\n",
+    "#     with computation(FORWARD), interval(...):\n",
+    "#         qty_out[0,-1,-1] = qty_in * 2.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Limits to offset : Cannot set offset outside of usable domain**\n",
+    "\n",
+    "Note that there are limits to the offsets that can be applied in the stencil.  An error will result if the specified shift results attemps to read data that is not available or allocated.  In the example below, a shift of -2 in the `J` axis will shift `field_in` out of its possible range in `J`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nx = 5\n",
+    "ny = 5\n",
+    "nz = 5\n",
+    "nhalo = 1\n",
+    "backend=\"numpy\"\n",
+    "\n",
+    "shape = (nx + 2 * nhalo, ny + 2 * nhalo, nz)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "arr = np.indices(shape,dtype=float).sum(axis=0)  # Value of each entry is sum of the I and J index at each point\n",
+    "qty_in = Quantity(data=arr,\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "@stencil(backend=backend)\n",
+    "def copy_stencil_offset(field_in: FloatField, field_out: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        field_out = field_in[0,-2,0]\n",
+    "\n",
+    "print(\"Executing 'copy_stencil' with origin=(nhalo, nhalo, 0), domain=(nx, ny, nz)\")\n",
+    "copy_stencil(qty_in, qty_out, origin=(nhalo, nhalo, 0), domain=(nx, ny, nz))\n",
+    "print(\"Executing 'copy_stencil' where qty_out is copied back to qty_in\")\n",
+    "copy_stencil(qty_out, qty_in)\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Executing 'copy_stencil_offset' where origin=(nhalo, nhalo, 0), domain=(nx, ny, nz)\")\n",
+    "copy_stencil_offset(qty_in, qty_out, origin=(nhalo, nhalo, 0), domain=(nx, ny, nz))\n",
+    "qty_out.plot_k_level(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **`if/else` statements**\n",
+    "\n",
+    "GT4Py allows for `if/else` statements to exist within a stencil.  The following simple example shows a stencil `stencil_if_zero` modifing values of `in_out_field` depending on its initial value."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "arr = np.indices(shape,dtype=float).sum(axis=0)  # Value of each entry is sum of the I and J index at each point\n",
+    "qty_in = Quantity(data=arr,\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Plotting values of qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Running copy_stencil with origin=(nhalo, nhalo, 0), domain=(nx, ny, 5)\")\n",
+    "copy_stencil(qty_in, qty_out, origin=(nhalo, nhalo, 0), domain=(nx, ny, 5))\n",
+    "print(\"Plotting values of qty_out at K = 0 based on running copy_stencil with origin=(nhalo, nhalo, 0), domain=(nx, ny, 5)\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 1 based on running copy_stencil with origin=(nhalo, nhalo, 0), domain=(nx, ny, 5)\")\n",
+    "qty_out.plot_k_level(1)\n",
+    "\n",
+    "@stencil(backend=backend)\n",
+    "def stencil_if_zero(in_out_field: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        if in_out_field == 0.0:\n",
+    "            in_out_field = 30\n",
+    "        else:\n",
+    "            in_out_field = 10\n",
+    "print(\"Running 'stencil_if_zero' on qty_out\")\n",
+    "stencil_if_zero(qty_out)\n",
+    "print(\"Plotting values of qty_out at K = 0 based on running stencil_if_zero\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 1 based on running stencil_if_zero\")\n",
+    "qty_out.plot_k_level(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Function calls**\n",
+    "\n",
+    "GT4Py also has the capability to create functions in order to better organize code.  The main difference between a GT4Py function call and a GT4Py stencil is that a function does not (and cannot) contain the keywords `computation` and `interval`.  However, array index referencing within a GT4py function is the same as in a GT4Py stencil.\n",
+    "\n",
+    "GT4Py functions can be created by using the decorator `function` (Note: `function` originates from the package `gt4py.cartesian.gtscript`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gt4py.cartesian.gtscript import function\n",
+    "\n",
+    "@function\n",
+    "def plus_one(field: FloatField):\n",
+    "   return field[0, 0, 0] + 1\n",
+    "\n",
+    "@stencil(backend=backend)\n",
+    "def field_plus_one(source: FloatField, target: FloatField):\n",
+    "   with computation(PARALLEL), interval(...):\n",
+    "       target = plus_one(source)\n",
+    "\n",
+    "nx = 5\n",
+    "ny = 5\n",
+    "nz = 5\n",
+    "nhalo = 1\n",
+    "backend=\"numpy\"\n",
+    "\n",
+    "shape = (nx + 2 * nhalo, ny + 2 * nhalo, nz)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "arr = np.indices(shape, dtype=float).sum(axis=0)  # Value of each entry is sum of the I and J index at each point\n",
+    "qty_in = Quantity(data=arr,\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Plotting values of qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n",
+    "print(\"Executing 'field_plus_one'\")\n",
+    "field_plus_one(qty_in, qty_out)\n",
+    "print(\"Plotting values of qty_out at K = 0 after executing 'field_plus_one'\")\n",
+    "qty_out.plot_k_level(0)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/NDSL/02_NDSL_basics.ipynb b/examples/NDSL/02_NDSL_basics.ipynb
new file mode 100644
index 00000000..eac17cad
--- /dev/null
+++ b/examples/NDSL/02_NDSL_basics.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# **NDSL Basics** #\n",
+    "\n",
+    "### **Introduction**\n",
+    "After establishing the basics of using GT4Py, we'll take a look at developing an object-oriented coding approach with the NDSL middleware.  Much of the object-oriented work comes from the development of [Pace](https://github.com/NOAA-GFDL/pace), the implementation of the FV3GFS / SHiELD atmospheric model using GT4Py and [DaCe](https://github.com/spcl/dace).  The `StencilFactory` object will be introduced and demoed."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Creating the `StencilFactory` object**\n",
+    "\n",
+    "The `StencilFactory` object enables the sharing of stencil properties across multiple stencils as well as \"build and execute\" the stencil.  To help ease the introduction, the [`boilerplate` module](./boilerplate.py) contains a function `get_one_tile_factory` that takes the domain size, halo size, and backend of interest and returns a `StencilFactory` object.  For more details about the objects needed to create the `StencilFactory`, the reader can view the [`get_one_tile_factory`](./boilerplate.py#get_one_tile_factory) function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ndsl import StencilFactory\n",
+    "from ndsl.boilerplate import get_factories_single_tile_numpy\n",
+    "\n",
+    "nx = 6\n",
+    "ny = 6\n",
+    "nz = 1\n",
+    "nhalo = 1\n",
+    "\n",
+    "stencil_factory, _ = get_factories_single_tile_numpy(nx, ny, nz, nhalo)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Creating the Copy stencil**\n",
+    "\n",
+    "The `NDSL` and `gt4py` module contain key terms that will be used to create the stencil.  Many terms are covered in the [GT4Py basic tutorial](./01_gt4py_basics.ipynb) notebook, but we'll briefly recap.\n",
+    "\n",
+    "- `FloatField` : This type can generally can be thought of as a `gt4py` 3-dimensional `numpy` array of floating point values.\n",
+    "\n",
+    "- `computation(PARALLEL)` : This keyword combination means that there is no assumed order to perform calculations in the `K` (3rd) dimension of a `gt4py` storage.  `PARALLEL` can be replaced by `FORWARD` or `BACKWARD` for serialized calculations in the `K` dimension.\n",
+    "\n",
+    "- `interval(...)` : This keyword specifies the range of computation in the `K` dimension.\n",
+    "\n",
+    "The code below contains the copy stencil implementation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ndsl.dsl.typing import FloatField\n",
+    "from gt4py.cartesian.gtscript import PARALLEL, computation, interval\n",
+    "\n",
+    "def copy_field_stencil(field_in: FloatField, field_out: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        field_out = field_in"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that a decorator does not surround this stencil as shown before in the [basic tutorial](./01_gt4py_basics.ipynb).  Instead, we'll use the `StencilFactory` to \"initiate\" the stencil."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Creating a class that performs a stencil computation**\n",
+    "\n",
+    "Using the `StencilFactory` object created earlier, the code will now create a class `CopyField` that takes `copy_field_stencil` and defines the computation domain from the parameters `origin` and `domain` within `__init__`. `origin` indicates the \"starting\" point of the stencil calculation, and `domain` indicates the extent of the stencil calculation in the three dimensions.  Note that when creating `stencil_factory`, a 6 by 6 by 1 sized domain surrounded with a halo layer of size 1 was defined. Thus, whenever a `CopyField` object is created, it will perform calculations within the 6 by 6 by 1 domain (specified by `domain=grid_indexing.domain_compute()`), and the `origin` will start at the `[0,0,0]` location of the 6 by 6 by 1 grid (specified by `origin=grid_indexing.origin_compute()`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CopyField:\n",
+    "    def __init__(self, stencil_factory: StencilFactory):\n",
+    "        grid_indexing = stencil_factory.grid_indexing\n",
+    "        self._copy_field = stencil_factory.from_origin_domain(\n",
+    "            copy_field_stencil, # <-- gt4py stencil function wrapped into NDSL\n",
+    "            origin=grid_indexing.origin_compute(),\n",
+    "            domain=grid_indexing.domain_compute(),\n",
+    "        )\n",
+    "\n",
+    "    def __call__( # <-- Runtime path\n",
+    "        self,\n",
+    "        field_in: FloatField,\n",
+    "        field_out: FloatField,\n",
+    "    ):\n",
+    "        self._copy_field(field_in, field_out)\n",
+    "        \n",
+    "        \n",
+    "copy_field = CopyField(stencil_factory)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Allocating Data in `NDSL`**\n",
+    "\n",
+    "The next code section will create arrays using `Quantity`.  For more information about `Quantity`, see the [GT4Py Basic tutorial](./01_gt4py_basics.ipynb#Copy_Stencil_example)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ndsl.quantity import Quantity\n",
+    "import numpy as np\n",
+    "\n",
+    "backend = stencil_factory.backend\n",
+    "size = (nx + 2 * nhalo) * (ny + 2 * nhalo) * nz\n",
+    "shape = (nx + 2 * nhalo, ny + 2 * nhalo, nz)\n",
+    "\n",
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "arr = np.indices(shape,dtype=float).sum(axis=0)  # Value of each entry is sum of the I and J index at each point\n",
+    "\n",
+    "qty_in = Quantity(data=arr,\n",
+    "                  dims=[\"I\", \"J\", \"K\"],\n",
+    "                  units=\"m\",\n",
+    "                  gt4py_backend=backend)\n",
+    "\n",
+    "print(\"Plotting qty_in at K = 0\")\n",
+    "qty_in.plot_k_level(0)\n",
+    "print(\"Plotting qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Calling `copy_field` stencil**\n",
+    "\n",
+    "The code will call `copy_field` to execute `copy_field_stencil` using the previously defined `Quantity` data containers and plot the result at `K = 0`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Copying copy_field stencil\")\n",
+    "copy_field(qty_in, qty_out)\n",
+    "print(\"Plotting qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From the plot, we see that the copy is only applied to the inner 6 by 6 area and not the entire domain.  The stencil in this case only applies in this \"domain\" and not the \"halo\" region surrounding the domain."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Applying a J offset**\n",
+    "\n",
+    "The next example will create a stencil that takes a `Quantity` as an input, shift the input by 1 in the `-J` direction, and write it to an output `Quantity`.  This stencil is defined in `copy_field_offset_stencil`.\n",
+    "\n",
+    "Note that in `copy_field_offset_stencil`, the shift in the J dimension is performed by referencing the `J` object from `gt4py.cartesian.gtscript` for simplicity.  This reference will apply the shift in J to the entire input domain.  Another way to perform the shift without referencing the `J` object is to write `[0,-1,0]` (assuming that the variable being modified is 3-dimensional) instead of `[J-1]`.\n",
+    "\n",
+    "With the stencil in place, a class `CopyFieldOffset` is defined using the `StencilFactory` object and `copy_field_offset_stencil`.  The class is instantiated and demonstrated to shift `qty_in` by 1 in the J-dimension and write to `qty_out`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gt4py.cartesian.gtscript import J\n",
+    "\n",
+    "def copy_field_offset_stencil(field_in: FloatField, field_out: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        field_out = field_in[J-1]\n",
+    "        \n",
+    "class CopyFieldOffset:\n",
+    "    def __init__(self, stencil_factory: StencilFactory):\n",
+    "        grid_indexing = stencil_factory.grid_indexing\n",
+    "        self._copy_field_offset = stencil_factory.from_origin_domain(\n",
+    "            copy_field_offset_stencil,\n",
+    "            origin=grid_indexing.origin_compute(),\n",
+    "            domain=grid_indexing.domain_compute(),\n",
+    "        )\n",
+    "\n",
+    "    def __call__(\n",
+    "        self,\n",
+    "        field_in: FloatField,\n",
+    "        field_out: FloatField,\n",
+    "    ):\n",
+    "        self._copy_field_offset(field_in, field_out)\n",
+    "        \n",
+    "copy_field_offset = CopyFieldOffset(stencil_factory)\n",
+    "        \n",
+    "qty_out = Quantity(data=np.zeros(shape),\n",
+    "                   dims=[\"I\", \"J\", \"K\"],\n",
+    "                   units=\"m\",\n",
+    "                   gt4py_backend=backend\n",
+    "                   )\n",
+    "\n",
+    "print(\"Initialize qty_out to zeros\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Executing copy_field_offset stencil\")\n",
+    "copy_field_offset(qty_in, qty_out)\n",
+    "print(\"Plotting values of qty_out at K = 0\")\n",
+    "qty_out.plot_k_level(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Limits to offset : Cannot set offset outside of usable domain**\n",
+    "\n",
+    "Note that when the copy offset by -1 in the j-direction is performed, the 'halo' region at J = 8 is copied over due to the `J` shift.  This means that there are limits to the shift amount since choosing a large shift amount may result in accessing a data region that does not exist.  The following example shows this by trying to perform a shift by -2 in the j-direction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def copy_field_offset_stencil(field_in: FloatField, field_out: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        field_out = field_in[J-2]\n",
+    "        \n",
+    "class CopyFieldOffset:\n",
+    "    def __init__(self, stencil_factory: StencilFactory):\n",
+    "        grid_indexing = stencil_factory.grid_indexing\n",
+    "        self._copy_field_offset = stencil_factory.from_origin_domain(\n",
+    "            copy_field_offset_stencil,\n",
+    "            origin=grid_indexing.origin_compute(),\n",
+    "            domain=grid_indexing.domain_compute(),\n",
+    "        )\n",
+    "\n",
+    "    def __call__(\n",
+    "        self,\n",
+    "        field_in: FloatField,\n",
+    "        field_out: FloatField,\n",
+    "    ):\n",
+    "        self._copy_field_offset(field_in, field_out)\n",
+    "        \n",
+    "copy_field_offset = CopyFieldOffset(stencil_factory)\n",
+    "\n",
+    "copy_field_offset(qty_in, qty_out)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### **Example demonstrating error when writing to offset outputs**\n",
+    "\n",
+    "While offsets can be applied to all input `Quantity` variables in a stencil, output `Quantity` variables cannot have such offsets.  When an offset is applied to an output stencil calculation, the error `GTScriptSyntaxError: Assignment to non-zero offsets is not supported.` will be displayed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gt4py.cartesian.gtscript import J\n",
+    "\n",
+    "def copy_field_offset_output_stencil(field_in: FloatField, field_out: FloatField):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        field_out[0,1,0] = field_in\n",
+    "        \n",
+    "class CopyFieldOffsetOutput:\n",
+    "    def __init__(self, stencil_factory: StencilFactory):\n",
+    "        grid_indexing = stencil_factory.grid_indexing\n",
+    "        self._copy_field_offset_output = stencil_factory.from_origin_domain(\n",
+    "            copy_field_offset_output_stencil,\n",
+    "            origin=grid_indexing.origin_compute(),\n",
+    "            domain=grid_indexing.domain_compute(),\n",
+    "        )\n",
+    "\n",
+    "    def __call__(\n",
+    "        self,\n",
+    "        field_in: FloatField,\n",
+    "        field_out: FloatField,\n",
+    "    ):\n",
+    "        self._copy_field_offset_output(field_in, field_out)\n",
+    "        \n",
+    "copy_field_offset_output = CopyFieldOffsetOutput(stencil_factory)\n",
+    "        "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "gt4py_jupyter",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/NDSL/03_orchestration_basics.ipynb b/examples/NDSL/03_orchestration_basics.ipynb
new file mode 100644
index 00000000..eea24154
--- /dev/null
+++ b/examples/NDSL/03_orchestration_basics.ipynb
@@ -0,0 +1,167 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# **NDSL Orchestration Basics**\n",
+    "\n",
+    "### **Introduction**\n",
+    "\n",
+    "When writing code using NDSL, there will be moments where an algorithm or code pattern does not match the stencil paradigm, and shoehorning the algorithm into the paradigm increases development difficulty.  For these moments, we have a capability called orchestration that enables developers to use regular Python for non-stencil algorithms alongside stencil-based code via [DaCe](https://github.com/spcl/dace).  DaCe also will attempt to find optimizations before output C++ code.\n",
+    "\n",
+    "In this example, we will explore how to orchestrate a codebase using NDSL.\n",
+    "\n",
+    "### **Orchestration Example**\n",
+    "\n",
+    "We'll step through a simple example that will orchestrate a codebase containing stencils and Python code.  First we'll import the necessary packages."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from gt4py.cartesian.gtscript import (\n",
+    "    PARALLEL,\n",
+    "    computation,\n",
+    "    interval,\n",
+    ")\n",
+    "from ndsl import (\n",
+    "    StencilFactory,\n",
+    "    DaceConfig,\n",
+    "    orchestrate,\n",
+    "    QuantityFactory,\n",
+    ")\n",
+    "from ndsl.constants import X_DIM, Y_DIM, Z_DIM\n",
+    "from ndsl.dsl.typing import FloatField, Float\n",
+    "from ndsl.boilerplate import get_factories_single_tile_orchestrated_cpu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next we'll define a simple stencil that sums the values around a point and applies a weight factor to that sum.  Note that unlike [previous](./01_gt4py_basics.ipynb#Copy_Stencil_example) examples, we are not using the `@stencil` decorator since this stencil will be referenced within a `StencilFactory` function call."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def localsum_stencil(\n",
+    "    field: FloatField,   # type: ignore\n",
+    "    result: FloatField,  # type: ignore\n",
+    "    weight: Float,       # type: ignore\n",
+    "):\n",
+    "    with computation(PARALLEL), interval(...):\n",
+    "        result = weight * (\n",
+    "            field[1, 0, 0] + field[0, 1, 0] + field[-1, 0, 0] + field[0, -1, 0]\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll define an object that enables the orchestration and combines both stencils and regular Python codes.  The orchestration occurs with the `orchestrate` call in the `__init__` definition.  Within `__call__`, there's a combination of both stencil and regular python codes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LocalSum:\n",
+    "    def __init__(\n",
+    "        self, stencil_factory: StencilFactory, quantity_factory: QuantityFactory\n",
+    "    ) -> None:\n",
+    "        orchestrate(\n",
+    "            obj=self,\n",
+    "            config=stencil_factory.config.dace_config\n",
+    "            or DaceConfig(None, stencil_factory.backend),\n",
+    "        )\n",
+    "        grid_indexing = stencil_factory.grid_indexing\n",
+    "        self._local_sum = stencil_factory.from_origin_domain(\n",
+    "            localsum_stencil,  # <-- gt4py stencil function wrapped into NDSL\n",
+    "            origin=grid_indexing.origin_compute(),\n",
+    "            domain=grid_indexing.domain_compute(),\n",
+    "        )\n",
+    "        self._tmp_field = quantity_factory.zeros(\n",
+    "            [X_DIM, Y_DIM, Z_DIM], \"n/a\", dtype=dtype\n",
+    "        )\n",
+    "        self._n_halo = quantity_factory.sizer.n_halo\n",
+    "\n",
+    "    def __call__(self, in_field: FloatField, out_result: FloatField) -> None:\n",
+    "        self._local_sum(in_field, out_result, 2.0)  # GT4Py Stencil\n",
+    "        tmp_field = out_result[:, :, :] + 2         # Regular Python code\n",
+    "        self._local_sum(tmp_field, out_result, 2.0) # GT4Py Stencil"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we'll create a simple driver that defines the domain and halo size, specifies the backend (`dace:cpu` in order to use DaCe), and uses the boilerplate code to create a stencil and quantity factory objects.  These objects help define the computational domain used for this particular example.  After defining quantities (`in_field` and `out_field`) to hold the appropriate values and creating an object `local_sum` for our combined stencil/Python calculation, `local_sum` is called to perform the computation.  In the output, we can see DaCe orchestrating the code. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ----- Driver ----- #\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    # Settings\n",
+    "    dtype = np.float64\n",
+    "    origin = (0, 0, 0)\n",
+    "    rebuild = True\n",
+    "    tile_size = (3, 3, 3)\n",
+    "\n",
+    "    # Setup\n",
+    "    stencil_factory, qty_factory = get_factories_single_tile_orchestrated_cpu(\n",
+    "        nx=tile_size[0],\n",
+    "        ny=tile_size[1],\n",
+    "        nz=tile_size[2],\n",
+    "        nhalo=2,\n",
+    "    )\n",
+    "    local_sum = LocalSum(stencil_factory, qty_factory)\n",
+    "\n",
+    "    in_field = qty_factory.zeros([X_DIM, Y_DIM, Z_DIM], \"n/a\", dtype=dtype)\n",
+    "    in_field.view[:] = 2.0\n",
+    "    out_field = qty_factory.zeros([X_DIM, Y_DIM, Z_DIM], \"n/a\", dtype=dtype)\n",
+    "\n",
+    "    # Run\n",
+    "    local_sum(in_field, out_field)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "gt4py_jupyter",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/NDSL/README.md b/examples/NDSL/README.md
new file mode 100644
index 00000000..84a6af83
--- /dev/null
+++ b/examples/NDSL/README.md
@@ -0,0 +1,14 @@
+# NDSL examples
+
+This folder contains a couple of Jupyter notebooks with the following examples:
+
+- Getting started with GT4Py: [GT4Py basics](./01_gt4py_basics.ipynb)
+- Getting started with NDSL middleware: [NDSL basics](./02_NDSL_basics.ipynb)
+- Combining stencil and non-stencil code with DaCe: [Orchestration basics](./03_orchestration_basics.ipynb)
+
+## Quickstart
+
+1. Make sure you fulfill the [requirements of NDSL](../../README.md#quickstart), e.g. python version.
+2. Create a virtual environment `python -m venv .venv`, and activate it `source .venv/bin/activate`.
+3. Install NDSL into your environment `pip install ../../[demos]`.
+4. In VSCode, install the Jupyter extension, select your virtual environment as kernel and run the notebooks.
diff --git a/external/dace b/external/dace
index b1a7f8a6..da644fe8 160000
--- a/external/dace
+++ b/external/dace
@@ -1 +1 @@
-Subproject commit b1a7f8a6ea76f913a0bf8b32de5bc416697218fd
+Subproject commit da644fe8c179022fe8e730fb3f47f6399f1db4ce
diff --git a/external/gt4py b/external/gt4py
index 66f84473..0ddddd37 160000
--- a/external/gt4py
+++ b/external/gt4py
@@ -1 +1 @@
-Subproject commit 66f8447398762127ba51c7a335d0da7ada369219
+Subproject commit 0ddddd37d3056ad6518f33908eb02f3b1f992878
diff --git a/ndsl/boilerplate.py b/ndsl/boilerplate.py
new file mode 100644
index 00000000..f22e0b9f
--- /dev/null
+++ b/ndsl/boilerplate.py
@@ -0,0 +1,109 @@
+from typing import Tuple
+
+import numpy as np
+
+from ndsl import (
+    CompilationConfig,
+    DaceConfig,
+    DaCeOrchestration,
+    GridIndexing,
+    NullComm,
+    QuantityFactory,
+    RunMode,
+    StencilConfig,
+    StencilFactory,
+    SubtileGridSizer,
+    TileCommunicator,
+    TilePartitioner,
+)
+
+
+def _get_factories(
+    nx: int,
+    ny: int,
+    nz: int,
+    nhalo,
+    backend: str,
+    orchestration: DaCeOrchestration,
+    topology: str,
+) -> Tuple[StencilFactory, QuantityFactory]:
+    """Build a Stencil & Quantity factory for a combination of options.
+
+    Dev Note: We don't expose this function because we want the boilerplate to remain
+    as easy and self describing as possible. It should be a very easy call to make.
+    The other reason is that the orchestration requires two inputs instead of change
+    a backend name for now, making it confusing. Until refactor, we choose to hide this
+    pattern for boilerplate use.
+    """
+    dace_config = DaceConfig(
+        communicator=None,
+        backend=backend,
+        orchestration=orchestration,
+    )
+
+    compilation_config = CompilationConfig(
+        backend=backend,
+        rebuild=True,
+        validate_args=True,
+        format_source=False,
+        device_sync=False,
+        run_mode=RunMode.BuildAndRun,
+        use_minimal_caching=False,
+    )
+
+    stencil_config = StencilConfig(
+        compare_to_numpy=False,
+        compilation_config=compilation_config,
+        dace_config=dace_config,
+    )
+
+    if topology == "tile":
+        partitioner = TilePartitioner((1, 1))
+        sizer = SubtileGridSizer.from_tile_params(
+            nx_tile=nx,
+            ny_tile=ny,
+            nz=nz,
+            n_halo=nhalo,
+            extra_dim_lengths={},
+            layout=partitioner.layout,
+            tile_partitioner=partitioner,
+        )
+        comm = TileCommunicator(comm=NullComm(0, 1, 42), partitioner=partitioner)
+    else:
+        raise NotImplementedError(f"Topology {topology} is not implemented.")
+
+    grid_indexing = GridIndexing.from_sizer_and_communicator(sizer, comm)
+    stencil_factory = StencilFactory(config=stencil_config, grid_indexing=grid_indexing)
+    quantity_factory = QuantityFactory(sizer, np)
+
+    return stencil_factory, quantity_factory
+
+
+def get_factories_single_tile_orchestrated_cpu(
+    nx, ny, nz, nhalo
+) -> Tuple[StencilFactory, QuantityFactory]:
+    """Build a Stencil & Quantity factory for orchestrated CPU, on a single tile topology."""
+    return _get_factories(
+        nx=nx,
+        ny=ny,
+        nz=nz,
+        nhalo=nhalo,
+        backend="dace:cpu",
+        orchestration=DaCeOrchestration.BuildAndRun,
+        topology="tile",
+    )
+
+
+def get_factories_single_tile_numpy(
+    nx, ny, nz, nhalo
+) -> Tuple[StencilFactory, QuantityFactory]:
+    """Build a Stencil & Quantity factory for Numpy, on a single tile topology."""
+    return _get_factories(
+        nx=nx,
+        ny=ny,
+        nz=nz,
+        nhalo=nhalo,
+        backend="numpy",
+        orchestration=DaCeOrchestration.Python,
+        topology="tile",
+    )
diff --git a/ndsl/constants.py b/ndsl/constants.py
index d0b512b5..0f7d55da 100644
--- a/ndsl/constants.py
+++ b/ndsl/constants.py
@@ -128,6 +128,7 @@ class ConstantVersions(Enum):
 CV_VAP = 3.0 * RVGAS  # Heat capacity of water vapor at constant volume
 ZVIR = RVGAS / RDGAS - 1  # con_fvirt in Fortran physics
 C_ICE = 1972.0  # Heat capacity of ice at -15 degrees Celsius
+C_ICE_0 = 2106.0  # Heat capacity of ice at 0 degrees Celsius
 C_LIQ = 4.1855e3  # Heat capacity of water at 15 degrees Celsius
 CP_VAP = 4.0 * RVGAS  # Heat capacity of water vapor at constant pressure
 TICE = 273.16  # Freezing temperature
@@ -136,6 +137,7 @@ class ConstantVersions(Enum):
 D2ICE = DC_VAP + DC_ICE  # Isobaric heating / cooling
 LI0 = HLF - DC_ICE * TICE
 EPS = RDGAS / RVGAS
+EPSM1 = EPS - 1.0
 LV0 = (
     HLV - DC_VAP * TICE
 )  # 3.13905782e6, evaporation latent heat coefficient at 0 degrees Kelvin
@@ -145,7 +147,8 @@ class ConstantVersions(Enum):
 LI2 = (
     LV0 + LI00
 )  # 2.86799816e6, sublimation latent heat coefficient at 0 degrees Kelvin
-E00 = 611.21  # Saturation vapor pressure at 0 degrees Celsius
+E00 = 611.21  # Saturation vapor pressure at 0 degrees Celsius (Pa)
+PSAT = 610.78  # Saturation vapor pressure at H2O 3pt (Pa)
 T_WFR = TICE - 40.0  # homogeneous freezing temperature
 TICE0 = TICE - 0.01
 T_MIN = 178.0  # Minimum temperature to freeze-dry all water vapor
diff --git a/ndsl/dsl/stencil.py b/ndsl/dsl/stencil.py
index 75ef28ea..6a70b4d8 100644
--- a/ndsl/dsl/stencil.py
+++ b/ndsl/dsl/stencil.py
@@ -32,7 +32,7 @@
 from ndsl.dsl.typing import Float, Index3D, cast_to_index3d
 from ndsl.initialization.sizer import GridSizer, SubtileGridSizer
 from ndsl.quantity import Quantity
-from ndsl.testing import comparison
+from ndsl.testing.comparison import LegacyMetric
 
 
 try:
@@ -68,40 +68,14 @@ def report_difference(args, kwargs, args_copy, kwargs_copy, function_name, gt_id
 
 
 def report_diff(arg: np.ndarray, numpy_arg: np.ndarray, label) -> str:
-    metric_err = comparison.compare_arr(arg, numpy_arg)
-    nans_match = np.logical_and(np.isnan(arg), np.isnan(numpy_arg))
-    n_points = np.product(arg.shape)
-    failures_14 = n_points - np.sum(
-        np.logical_or(
-            nans_match,
-            metric_err < 1e-14,
-        )
-    )
-    failures_10 = n_points - np.sum(
-        np.logical_or(
-            nans_match,
-            metric_err < 1e-10,
-        )
+    metric = LegacyMetric(
+        reference_values=arg,
+        computed_values=numpy_arg,
+        eps=1e-13,
+        ignore_near_zero_errors=False,
+        near_zero=0,
     )
-    failures_8 = n_points - np.sum(
-        np.logical_or(
-            nans_match,
-            metric_err < 1e-8,
-        )
-    )
-    greatest_error = np.max(metric_err[~np.isnan(metric_err)])
-    if greatest_error == 0.0 and failures_14 == 0:
-        report = ""
-    else:
-        report = f"\n    {label}: "
-        report += f"max_err={greatest_error}"
-        if failures_14 > 0:
-            report += f" 1e-14 failures: {failures_14}"
-        if failures_10 > 0:
-            report += f" 1e-10 failures: {failures_10}"
-        if failures_8 > 0:
-            report += f" 1e-8 failures: {failures_8}"
-    return report
+    return metric.__repr__()
 
 
 @dataclasses.dataclass
diff --git a/ndsl/dsl/typing.py b/ndsl/dsl/typing.py
index d85ee30f..5eab76ef 100644
--- a/ndsl/dsl/typing.py
+++ b/ndsl/dsl/typing.py
@@ -63,10 +63,19 @@ def global_set_floating_point_precision():
 IntFieldIJ = Field[gtscript.IJ, Int]
 IntFieldK = Field[gtscript.K, Int]
 BoolField = Field[gtscript.IJK, Bool]
+BoolFieldIJ = Field[gtscript.IJ, Bool]
 
 Index3D = Tuple[int, int, int]
 
 
+def set_4d_field_size(n, dtype):
+    """
+    Defines a 4D field with a given size and type
+    The extra data dimension is not parallel
+    """
+    return Field[gtscript.IJK, (dtype, (n,))]
+
+
 def cast_to_index3d(val: Tuple[int, ...]) -> Index3D:
     if len(val) != 3:
         raise ValueError(f"expected 3d index, received {val}")
diff --git a/ndsl/grid/eta.py b/ndsl/grid/eta.py
index 90db8c4a..35ac510d 100644
--- a/ndsl/grid/eta.py
+++ b/ndsl/grid/eta.py
@@ -30,10 +30,8 @@ class HybridPressureCoefficients:
 
 
 def _load_ak_bk_from_file(eta_file: str) -> Tuple[np.ndarray, np.ndarray]:
-    if eta_file == "None":
-        raise ValueError("eta file not specified")
     if not os.path.isfile(eta_file):
-        raise ValueError("file " + eta_file + " does not exist")
+        raise ValueError(f"eta file {eta_file} does not exist")
 
     # read file into ak, bk arrays
     data = xr.open_dataset(eta_file)
diff --git a/ndsl/grid/generation.py b/ndsl/grid/generation.py
index 275db563..75437272 100644
--- a/ndsl/grid/generation.py
+++ b/ndsl/grid/generation.py
@@ -237,7 +237,7 @@ def __init__(
         dy_const: float = 1000.0,
         deglat: float = 15.0,
         extdgrid: bool = False,
-        eta_file: str = "None",
+        eta_file: Optional[str] = None,
         ak: Optional[np.ndarray] = None,
         bk: Optional[np.ndarray] = None,
     ):
@@ -297,12 +297,34 @@ def __init__(
         self._dy_center = None
         self._area = None
         self._area_c = None
-        (
-            self._ks,
-            self._ptop,
-            self._ak,
-            self._bk,
-        ) = self._set_hybrid_pressure_coefficients(eta_file, ak, bk)
+        if eta_file is not None:
+            (
+                self._ks,
+                self._ptop,
+                self._ak,
+                self._bk,
+            ) = self._set_hybrid_pressure_coefficients(eta_file, ak, bk)
+        else:
+            self._ks = self.quantity_factory.zeros(
+                [],
+                "",
+                dtype=Float,
+            )
+            self._ptop = self.quantity_factory.zeros(
+                [],
+                "Pa",
+                dtype=Float,
+            )
+            self._ak = self.quantity_factory.zeros(
+                [Z_INTERFACE_DIM],
+                "Pa",
+                dtype=Float,
+            )
+            self._bk = self.quantity_factory.zeros(
+                [Z_INTERFACE_DIM],
+                "",
+                dtype=Float,
+            )
         self._ec1 = None
         self._ec2 = None
         self._ew1 = None
diff --git a/ndsl/grid/geometry.py b/ndsl/grid/geometry.py
index 804be0fe..74441cde 100644
--- a/ndsl/grid/geometry.py
+++ b/ndsl/grid/geometry.py
@@ -1,4 +1,5 @@
 from ndsl.comm.partitioner import TilePartitioner
+from ndsl.dsl.typing import Float
 from ndsl.grid.gnomonic import (
     get_lonlat_vect,
     get_unit_vector_direction,
@@ -591,7 +592,7 @@ def edge_factors(
     nhalo: int,
     tile_partitioner: TilePartitioner,
     rank: int,
-    radius: float,
+    radius: Float,
     np,
 ):
     """
@@ -704,7 +705,7 @@ def efactor_a2c_v(
     nhalo: int,
     tile_partitioner: TilePartitioner,
     rank: int,
-    radius: float,
+    radius: Float,
     np,
 ):
     """
@@ -888,7 +889,7 @@ def unit_vector_lonlat(grid, np):
     return unit_lon, unit_lat
 
 
-def _fill_halo_corners(field, value: float, nhalo: int, tile_partitioner, rank):
+def _fill_halo_corners(field, value: Float, nhalo: int, tile_partitioner, rank):
     """
     Fills a tile halo corners (ghost cells) of a field
     with a set value along the first 2 axes
@@ -905,7 +906,7 @@ def _fill_halo_corners(field, value: float, nhalo: int, tile_partitioner, rank):
             field[-nhalo:, -nhalo:] = value  # NE corner
 
 
-def _fill_single_halo_corner(field, value: float, nhalo: int, corner: str):
+def _fill_single_halo_corner(field, value: Float, nhalo: int, corner: str):
     """
     Fills a tile halo corner (ghost cells) of a field
     with a set value along the first 2 axes
diff --git a/ndsl/grid/helper.py b/ndsl/grid/helper.py
index fd62d771..745dce37 100644
--- a/ndsl/grid/helper.py
+++ b/ndsl/grid/helper.py
@@ -12,6 +12,7 @@
     split_cartesian_into_storages = None
 import ndsl.constants as constants
 from ndsl.constants import Z_DIM, Z_INTERFACE_DIM
+from ndsl.dsl.typing import Float
 from ndsl.filesystem import get_fs
 from ndsl.grid.generation import MetricTerms
 from ndsl.initialization.allocator import QuantityFactory
@@ -226,13 +227,13 @@ def dp(self) -> Quantity:
         return self._dp_ref
 
     @property
-    def ptop(self) -> float:
+    def ptop(self) -> Float:
         """
         top of atmosphere pressure (Pa)
         """
         if self.bk.view[0] != 0:
             raise ValueError("ptop is not well-defined when top-of-atmosphere bk != 0")
-        return float(self.ak.view[0])
+        return Float(self.ak.view[0])
 
 
 @dataclasses.dataclass(frozen=True)
diff --git a/ndsl/initialization/allocator.py b/ndsl/initialization/allocator.py
index 5320e4c6..869086f6 100644
--- a/ndsl/initialization/allocator.py
+++ b/ndsl/initialization/allocator.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 from ndsl.constants import SPATIAL_DIMS
+from ndsl.dsl.typing import Float
 from ndsl.initialization.sizer import GridSizer
 from ndsl.optional_imports import gt4py
 from ndsl.quantity import Quantity, QuantityHaloSpec
@@ -60,7 +61,7 @@ def empty(
         self,
         dims: Sequence[str],
         units: str,
-        dtype: type = np.float64,
+        dtype: type = Float,
         allow_mismatch_float_precision: bool = False,
     ):
         return self._allocate(
@@ -71,7 +72,7 @@ def zeros(
         self,
         dims: Sequence[str],
         units: str,
-        dtype: type = np.float64,
+        dtype: type = Float,
         allow_mismatch_float_precision: bool = False,
     ):
         return self._allocate(
@@ -82,7 +83,7 @@ def ones(
         self,
         dims: Sequence[str],
         units: str,
-        dtype: type = np.float64,
+        dtype: type = Float,
         allow_mismatch_float_precision: bool = False,
     ):
         return self._allocate(
@@ -116,7 +117,7 @@ def _allocate(
         allocator: Callable,
         dims: Sequence[str],
         units: str,
-        dtype: type = np.float64,
+        dtype: type = Float,
         allow_mismatch_float_precision: bool = False,
     ):
         origin = self.sizer.get_origin(dims)
@@ -150,7 +151,7 @@ def get_quantity_halo_spec(
         self,
         dims: Sequence[str],
         n_halo: Optional[int] = None,
-        dtype: type = np.float64,
+        dtype: type = Float,
     ) -> QuantityHaloSpec:
         """Build memory specifications for the halo update.
 
diff --git a/ndsl/namelist.py b/ndsl/namelist.py
index 71a1f1ca..205954ca 100644
--- a/ndsl/namelist.py
+++ b/ndsl/namelist.py
@@ -111,6 +111,22 @@ class NamelistDefaults:
     tice = 273.16  # set tice = 165. to turn off ice - phase phys (kessler emulator)
     alin = 842.0  # "a" in lin1983
     clin = 4.8  # "c" in lin 1983, 4.8 -- > 6. (to ehance ql -- > qs)
+    isatmedmf = 0  # which version of satmedmfvdif to use
+    dspheat = False  # flag for tke dissipative heating
+    xkzm_h = 1.0  # background vertical diffusion for heat q over ocean
+    xkzm_m = 1.0  # background vertical diffusion for momentum over ocean
+    xkzm_hl = 1.0  # background vertical diffusion for heat q over land
+    xkzm_ml = 1.0  # background vertical diffusion for momentum over land
+    xkzm_hi = 1.0  # background vertical diffusion for heat q over ice
+    xkzm_mi = 1.0  # background vertical diffusion for momentum over ice
+    xkzm_s = 1.0  # sigma threshold for background mom. diffusion
+    xkzm_lim = 0.01  # background vertical diffusion limit
+    xkzminv = 0.15  # diffusivity in inversion layers
+    xkgdx = 25.0e3  # background vertical diffusion threshold
+    rlmn = 30.0  # lower-limter on asymtotic mixing length in satmedmfdiff
+    rlmx = 300.0  # upper-limter on asymtotic mixing length in satmedmfdiff
+    do_dk_hb19 = False  # flag for using hb19 background diff formula in satmedmfdiff
+    cap_k0_land = False  # flag for applying limter on background diff in inversion layer over land in satmedmfdiff
 
     @classmethod
     def as_dict(cls):
@@ -293,6 +309,22 @@ class Namelist:
     tice: float = NamelistDefaults.tice
     alin: float = NamelistDefaults.alin
     clin: float = NamelistDefaults.clin
+    isatmedmf: int = NamelistDefaults.isatmedmf
+    dspheat: bool = NamelistDefaults.dspheat
+    xkzm_h: float = NamelistDefaults.xkzm_h
+    xkzm_m: float = NamelistDefaults.xkzm_m
+    xkzm_hl: float = NamelistDefaults.xkzm_hl
+    xkzm_ml: float = NamelistDefaults.xkzm_ml
+    xkzm_hi: float = NamelistDefaults.xkzm_hi
+    xkzm_mi: float = NamelistDefaults.xkzm_mi
+    xkzm_s: float = NamelistDefaults.xkzm_s
+    xkzm_lim: float = NamelistDefaults.xkzm_lim
+    xkzminv: float = NamelistDefaults.xkzminv
+    xkgdx: float = NamelistDefaults.xkgdx
+    rlmn: float = NamelistDefaults.rlmn
+    rlmx: float = NamelistDefaults.rlmx
+    do_dk_hb19: bool = NamelistDefaults.do_dk_hb19
+    cap_k0_land: bool = NamelistDefaults.cap_k0_land
     # c0s_shal: Any
     # c1_shal: Any
     # cal_pre: Any
diff --git a/ndsl/quantity.py b/ndsl/quantity.py
index f998c860..b95a9aad 100644
--- a/ndsl/quantity.py
+++ b/ndsl/quantity.py
@@ -2,10 +2,12 @@
 import warnings
 from typing import Any, Dict, Iterable, Optional, Sequence, Tuple, Union, cast
 
+import matplotlib.pyplot as plt
 import numpy as np
 
 import ndsl.constants as constants
 from ndsl.comm._boundary_utils import bound_default_slice, shift_boundary_slice_tuple
+from ndsl.dsl.typing import Float
 from ndsl.optional_imports import cupy, dace, gt4py
 from ndsl.optional_imports import xarray as xr
 from ndsl.types import NumpyModule
@@ -260,7 +262,8 @@ def _validate_quantity_property_lengths(shape, dims, origin, extent):
 def _is_float(dtype):
     """Expected floating point type for Pace"""
     return (
-        dtype == float
+        dtype == Float
+        or dtype == float
         or dtype == np.float32
         or dtype == np.float64
         or dtype == np.float16
@@ -296,9 +299,6 @@ def __init__(
                 storage attribute is disabled and will raise an exception. Will raise
                 a TypeError if this is given with a gt4py storage type as data
         """
-        # ToDo: [Florian 01/23] Kill the abomination.
-        # See https://github.com/NOAA-GFDL/pace/issues/3
-        from ndsl.dsl.typing import Float
 
         if (
             not allow_mismatch_float_precision
@@ -594,6 +594,22 @@ def transpose(
         transposed._attrs = self._attrs
         return transposed
 
+    def plot_k_level(self, k_index=0):
+        field = self.data
+        print(
+            "Min and max values:",
+            field[:, :, k_index].min(),
+            field[:, :, k_index].max(),
+        )
+        plt.xlabel("I")
+        plt.ylabel("J")
+
+        im = plt.imshow(field[:, :, k_index].transpose(), origin="lower")
+
+        plt.colorbar(im)
+        plt.title("Plot at K = " + str(k_index))
+        plt.show()
+
 
 def transpose_sequence(sequence, order):
     return sequence.__class__(sequence[i] for i in order)
diff --git a/ndsl/stencils/corners.py b/ndsl/stencils/corners.py
index 86fea187..5eb7767a 100644
--- a/ndsl/stencils/corners.py
+++ b/ndsl/stencils/corners.py
@@ -1001,6 +1001,9 @@ def fill_corners_dgrid_defn(
     from __externals__ import i_end, i_start, j_end, j_start
 
     with computation(PARALLEL), interval(...):
+        # this line of code is used to fix the missing symbol crash due to the node visitor depth limitation
+        acoef = mysign
+        x_out = x_out
         # sw corner
         with horizontal(region[i_start - 1, j_start - 1]):
             x_out = mysign * y_in[0, 1, 0]
diff --git a/ndsl/stencils/testing/conftest.py b/ndsl/stencils/testing/conftest.py
index d000e1fa..af5bb6a6 100644
--- a/ndsl/stencils/testing/conftest.py
+++ b/ndsl/stencils/testing/conftest.py
@@ -7,7 +7,7 @@
 import xarray as xr
 import yaml
 
-import ndsl.dsl
+from ndsl import CompilationConfig, StencilConfig, StencilFactory
 from ndsl.comm.communicator import (
     Communicator,
     CubedSphereCommunicator,
@@ -17,11 +17,93 @@
 from ndsl.comm.partitioner import CubedSpherePartitioner, TilePartitioner
 from ndsl.dsl.dace.dace_config import DaceConfig
 from ndsl.namelist import Namelist
+from ndsl.stencils.testing.grid import Grid  # type: ignore
 from ndsl.stencils.testing.parallel_translate import ParallelTranslate
 from ndsl.stencils.testing.savepoint import SavepointCase, dataset_to_dict
 from ndsl.stencils.testing.translate import TranslateGrid
 
 
+def pytest_addoption(parser):
+    """Option for the Translate Test system
+
+    See -h or inline help for details.
+    """
+    parser.addoption(
+        "--backend",
+        action="store",
+        default="numpy",
+        help="Backend to execute the test with, can only be one.",
+    )
+    parser.addoption(
+        "--which_modules",
+        action="store",
+        help="Whitelist of modules to run. Only the part after Translate, e.g. in TranslateXYZ it'd be XYZ",
+    )
+    parser.addoption(
+        "--skip_modules",
+        action="store",
+        help="Blacklist of modules to not run. Only the part after Translate, e.g. in TranslateXYZ it'd be XYZ",
+    )
+    parser.addoption(
+        "--which_rank", action="store", help="Restrict test to a single rank"
+    )
+    parser.addoption(
+        "--data_path",
+        action="store",
+        default="./",
+        help="Path of Netcdf input and outputs. Naming pattern needs to be XYZ-In and XYZ-Out for a test class named TranslateXYZ",
+    )
+    parser.addoption(
+        "--threshold_overrides_file",
+        action="store",
+        default=None,
+        help="Path to a yaml overriding the default error threshold for a custom value.",
+    )
+    parser.addoption(
+        "--print_failures",
+        action="store_true",
+        help="Print the failures detail. Default to True.",
+    )
+    parser.addoption(
+        "--failure_stride",
+        action="store",
+        default=1,
+        help="How many indices of failures to print from worst to best. Default to 1.",
+    )
+    parser.addoption(
+        "--grid",
+        action="store",
+        default="file",
+        help='Grid loading mode. "file" looks for "Grid-Info.nc", "compute" does the same but recomputes MetricTerms, "default" creates a simple grid with no metrics terms. Default to "file".',
+    )
+    parser.addoption(
+        "--topology",
+        action="store",
+        default="cubed-sphere",
+        help='Topology of the grid. "cubed-sphere" means a 6-faced grid, "doubly-periodic" means a 1 tile grid. Default to "cubed-sphere".',
+    )
+    parser.addoption(
+        "--multimodal_metric",
+        action="store_true",
+        default=False,
+        help="Use the multi-modal float metric. Default to False.",
+    )
+
+
+def pytest_configure(config):
+    # register an additional marker
+    config.addinivalue_line(
+        "markers", "sequential(name): mark test as running sequentially on ranks"
+    )
+    config.addinivalue_line(
+        "markers", "parallel(name): mark test as running in parallel across ranks"
+    )
+    config.addinivalue_line(
+        "markers",
+        "mock_parallel(name): mark test as running in mock parallel across ranks",
+    )
+
+
 @pytest.fixture()
 def data_path(pytestconfig):
     return data_path_and_namelist_filename_from_config(pytestconfig)
@@ -109,12 +191,14 @@ def get_parallel_savepoint_names(metafunc, data_path):
 
 def get_ranks(metafunc, layout):
     only_rank = metafunc.config.getoption("which_rank")
-    dperiodic = metafunc.config.getoption("dperiodic")
+    topology = metafunc.config.getoption("topology")
     if only_rank is None:
-        if dperiodic:
+        if topology == "doubly-periodic":
             total_ranks = layout[0] * layout[1]
-        else:
+        elif topology == "cubed-sphere":
             total_ranks = 6 * layout[0] * layout[1]
+        else:
+            raise NotImplementedError(f"Topology {topology} is unknown.")
         return range(total_ranks)
     else:
         return [int(only_rank)]
@@ -125,8 +209,8 @@ def get_namelist(namelist_filename):
 
 
 def get_config(backend: str, communicator: Optional[Communicator]):
-    stencil_config = ndsl.dsl.stencil.StencilConfig(
-        compilation_config=ndsl.dsl.stencil.CompilationConfig(
+    stencil_config = StencilConfig(
+        compilation_config=CompilationConfig(
             backend=backend, rebuild=False, validate_args=True
         ),
         dace_config=DaceConfig(
@@ -142,8 +226,8 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen
     namelist = get_namelist(namelist_filename)
     stencil_config = get_config(backend, None)
     ranks = get_ranks(metafunc, namelist.layout)
-    compute_grid = metafunc.config.getoption("compute_grid")
-    dperiodic = metafunc.config.getoption("dperiodic")
+    grid_mode = metafunc.config.getoption("grid")
+    topology_mode = metafunc.config.getoption("topology")
     return _savepoint_cases(
         savepoint_names,
         ranks,
@@ -151,8 +235,8 @@ def sequential_savepoint_cases(metafunc, data_path, namelist_filename, *, backen
         namelist,
         backend,
         data_path,
-        compute_grid,
-        dperiodic,
+        grid_mode,
+        topology_mode,
     )
 
 
@@ -160,26 +244,41 @@ def _savepoint_cases(
     savepoint_names,
     ranks,
     stencil_config,
-    namelist,
-    backend,
-    data_path,
-    compute_grid: bool,
-    dperiodic: bool,
+    namelist: Namelist,
+    backend: str,
+    data_path: str,
+    grid_mode: str,
+    topology_mode: bool,
 ):
     return_list = []
-    ds_grid: xr.Dataset = xr.open_dataset(os.path.join(data_path, "Grid-Info.nc")).isel(
-        savepoint=0
-    )
     for rank in ranks:
-        grid = TranslateGrid(
-            dataset_to_dict(ds_grid.isel(rank=rank)),
-            rank=rank,
-            layout=namelist.layout,
-            backend=backend,
-        ).python_grid()
-        if compute_grid:
-            compute_grid_data(grid, namelist, backend, namelist.layout, dperiodic)
-        stencil_factory = ndsl.dsl.stencil.StencilFactory(
+        if grid_mode == "default":
+            grid = Grid._make(
+                namelist.npx,
+                namelist.npy,
+                namelist.npz,
+                namelist.layout,
+                rank,
+                backend,
+            )
+        elif grid_mode == "file" or grid_mode == "compute":
+            ds_grid: xr.Dataset = xr.open_dataset(
+                os.path.join(data_path, "Grid-Info.nc")
+            ).isel(savepoint=0)
+            grid = TranslateGrid(
+                dataset_to_dict(ds_grid.isel(rank=rank)),
+                rank=rank,
+                layout=namelist.layout,
+                backend=backend,
+            ).python_grid()
+            if grid_mode == "compute":
+                compute_grid_data(
+                    grid, namelist, backend, namelist.layout, topology_mode
+                )
+        else:
+            raise NotImplementedError(f"Grid mode {grid_mode} is unknown.")
+
+        stencil_factory = StencilFactory(
             config=stencil_config,
             grid_indexing=grid.grid_indexing,
         )
@@ -204,12 +303,12 @@ def _savepoint_cases(
     return return_list
 
 
-def compute_grid_data(grid, namelist, backend, layout, dperiodic):
+def compute_grid_data(grid, namelist, backend, layout, topology_mode):
     grid.make_grid_data(
         npx=namelist.npx,
         npy=namelist.npy,
         npz=namelist.npz,
-        communicator=get_communicator(MPI.COMM_WORLD, layout, dperiodic),
+        communicator=get_communicator(MPI.COMM_WORLD, layout, topology_mode),
         backend=backend,
     )
 
@@ -218,11 +317,11 @@ def parallel_savepoint_cases(
     metafunc, data_path, namelist_filename, mpi_rank, *, backend: str, comm
 ):
     namelist = get_namelist(namelist_filename)
-    dperiodic = metafunc.config.getoption("dperiodic")
-    communicator = get_communicator(comm, namelist.layout, dperiodic)
+    topology_mode = metafunc.config.getoption("topology")
+    communicator = get_communicator(comm, namelist.layout, topology_mode)
     stencil_config = get_config(backend, communicator)
     savepoint_names = get_parallel_savepoint_names(metafunc, data_path)
-    compute_grid = metafunc.config.getoption("compute_grid")
+    grid_mode = metafunc.config.getoption("grid")
     return _savepoint_cases(
         savepoint_names,
         [mpi_rank],
@@ -230,8 +329,8 @@ def parallel_savepoint_cases(
         namelist,
         backend,
         data_path,
-        compute_grid,
-        dperiodic,
+        grid_mode,
+        topology_mode,
     )
 
 
@@ -276,8 +375,8 @@ def generate_parallel_stencil_tests(metafunc, *, backend: str):
     )
 
 
-def get_communicator(comm, layout, dperiodic):
-    if (MPI.COMM_WORLD.Get_size() > 1) and (not dperiodic):
+def get_communicator(comm, layout, topology_mode):
+    if (MPI.COMM_WORLD.Get_size() > 1) and (topology_mode == "cubed-sphere"):
         partitioner = CubedSpherePartitioner(TilePartitioner(layout))
         communicator = CubedSphereCommunicator(comm, partitioner)
     else:
@@ -297,10 +396,20 @@ def failure_stride(pytestconfig):
 
 
 @pytest.fixture()
-def compute_grid(pytestconfig):
-    return pytestconfig.getoption("compute_grid")
+def multimodal_metric(pytestconfig):
+    return bool(pytestconfig.getoption("multimodal_metric"))
+
+
+@pytest.fixture()
+def grid(pytestconfig):
+    return pytestconfig.getoption("grid")
+
+
+@pytest.fixture()
+def topology_mode(pytestconfig):
+    return pytestconfig.getoption("topology_mode")
 
 
 @pytest.fixture()
-def dperiodic(pytestconfig):
-    return pytestconfig.getoption("dperiodic")
+def backend(pytestconfig):
+    return pytestconfig.getoption("backend")
diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py
new file mode 100644
index 00000000..a29139c5
--- /dev/null
+++ b/ndsl/stencils/testing/serialbox_to_netcdf.py
@@ -0,0 +1,224 @@
+import argparse
+import os
+import shutil
+from typing import Any, Dict, Optional
+
+import f90nml
+import numpy as np
+import xarray as xr
+
+
+try:
+    import serialbox
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        "Serialbox couldn't be imported, make sure it's in your PYTHONPATH or you env"
+    )
+
+
+def get_parser():
+    parser = argparse.ArgumentParser("Converts Serialbox data to netcdf")
+    parser.add_argument(
+        "data_path",
+        type=str,
+        help="path of serialbox data to convert",
+    )
+    parser.add_argument(
+        "output_path", type=str, help="output directory where netcdf data will be saved"
+    )
+    parser.add_argument(
+        "-dn",
+        "--data_name",
+        type=str,
+        help="[Optional] Give the name of the data, will default to Generator_rankX",
+    )
+    parser.add_argument(
+        "-m",
+        "--merge",
+        action="store_true",
+        default=False,
+        help="merges datastreams blocked into separate savepoints",
+    )
+    return parser
+
+
+def read_serialized_data(serializer, savepoint, variable):
+    data = serializer.read(variable, savepoint)
+    if len(data.flatten()) == 1:
+        return data[0]
+    data[data == 1e40] = 0.0
+    return data
+
+
+def get_all_savepoint_names(serializer):
+    savepoint_names = set()
+    for savepoint in serializer.savepoint_list():
+        savepoint_names.add(savepoint.name)
+    return savepoint_names
+
+
+def get_serializer(data_path: str, rank: int, data_name: Optional[str] = None):
+    if data_name:
+        name = data_name
+    else:
+        name = f"Generator_rank{rank}"
+    return serialbox.Serializer(serialbox.OpenModeKind.Read, data_path, name)
+
+
+def main(
+    data_path: str,
+    output_path: str,
+    merge_blocks: bool,
+    data_name: Optional[str] = None,
+):
+    os.makedirs(output_path, exist_ok=True)
+    namelist_filename_in = os.path.join(data_path, "input.nml")
+
+    if not os.path.exists(namelist_filename_in):
+        raise FileNotFoundError(f"Can't find input.nml in {data_path}. Required.")
+
+    namelist_filename_out = os.path.join(output_path, "input.nml")
+    if namelist_filename_out != namelist_filename_in:
+        shutil.copyfile(os.path.join(data_path, "input.nml"), namelist_filename_out)
+    namelist = f90nml.read(namelist_filename_out)
+    if namelist["fv_core_nml"]["grid_type"] <= 3:
+        total_ranks = (
+            6
+            * namelist["fv_core_nml"]["layout"][0]
+            * namelist["fv_core_nml"]["layout"][1]
+        )
+    else:
+        total_ranks = (
+            namelist["fv_core_nml"]["layout"][0] * namelist["fv_core_nml"]["layout"][1]
+        )
+    nx = int(
+        (namelist["fv_core_nml"]["npx"] - 1) / (namelist["fv_core_nml"]["layout"][0])
+    )
+    ny = int(
+        (namelist["fv_core_nml"]["npy"] - 1) / (namelist["fv_core_nml"]["layout"][1])
+    )
+
+    # all ranks have the same names, just look at first one
+    serializer_0 = get_serializer(data_path, rank=0, data_name=data_name)
+
+    savepoint_names = get_all_savepoint_names(serializer_0)
+    for savepoint_name in sorted(list(savepoint_names)):
+        rank_list = []
+        names_list = list(
+            serializer_0.fields_at_savepoint(
+                serializer_0.get_savepoint(savepoint_name)[0]
+            )
+        )
+        serializer_list = []
+        for rank in range(total_ranks):
+            serializer = get_serializer(data_path, rank, data_name)
+            serializer_list.append(serializer)
+            savepoints = serializer.get_savepoint(savepoint_name)
+            rank_data: Dict[str, Any] = {}
+            for name in set(names_list):
+                rank_data[name] = []
+                for savepoint in savepoints:
+                    rank_data[name].append(
+                        read_serialized_data(serializer, savepoint, name)
+                    )
+                nblocks = len(rank_data[name])
+                if merge_blocks and len(rank_data[name]) > 1:
+                    full_data = np.array(rank_data[name])
+                    if len(full_data.shape) > 1:
+                        if nx * ny == full_data.shape[0] * full_data.shape[1]:
+                            # If we have an (i, x) array from each block reshape it
+                            new_shape = (nx, ny) + full_data.shape[2:]
+                            full_data = full_data.reshape(new_shape)
+                        else:
+                            # We have one array for all blocks
+                            # could be a k-array or something else, so we take one copy
+                            # TODO: is there a decent check for this?
+                            full_data = full_data[0]
+                    elif len(full_data.shape) == 1:
+                        # if it's a scalar from each block then just take one
+                        full_data = full_data[0]
+                    else:
+                        raise IndexError(f"{name} data appears to be empty")
+                    rank_data[name] = [full_data]
+            rank_list.append(rank_data)
+        if merge_blocks:
+            n_savepoints = 1
+        else:
+            n_savepoints = len(savepoints)  # checking from last rank is fine
+        data_vars = {}
+        if n_savepoints > 0:
+            encoding = {}
+            for varname in set(names_list).difference(["rank"]):
+                data_shape = list(rank_list[0][varname][0].shape)
+                if savepoint_name in [
+                    "FVDynamics-In",
+                    "FVDynamics-Out",
+                    "Driver-In",
+                    "Driver-Out",
+                ]:
+                    if varname in [
+                        "qvapor",
+                        "qliquid",
+                        "qice",
+                        "qrain",
+                        "qsnow",
+                        "qgraupel",
+                        "qo3mr",
+                        "qsgs_tke",
+                    ]:
+                        data_vars[varname] = get_data(
+                            data_shape, total_ranks, n_savepoints, rank_list, varname
+                        )[:, :, 3:-3, 3:-3, :]
+                    else:
+                        data_vars[varname] = get_data(
+                            data_shape, total_ranks, n_savepoints, rank_list, varname
+                        )
+                else:
+                    data_vars[varname] = get_data(
+                        data_shape, total_ranks, n_savepoints, rank_list, varname
+                    )
+                if len(data_shape) > 2:
+                    encoding[varname] = {"zlib": True, "complevel": 1}
+            dataset = xr.Dataset(data_vars=data_vars)
+            dataset.to_netcdf(
+                os.path.join(output_path, f"{savepoint_name}.nc"), encoding=encoding
+            )
+
+
+def get_data(data_shape, total_ranks, n_savepoints, output_list, varname):
+    # Read in dtype
+    if total_ranks <= 0:
+        return
+    varname_dtype = output_list[0][varname][0].dtype
+    # Build data array
+    array = np.full(
+        [n_savepoints, total_ranks] + data_shape,
+        fill_value=np.nan,
+        dtype=varname_dtype,
+    )
+    dims = ["savepoint", "rank"] + [
+        f"dim_{varname}_{i}" for i in range(len(data_shape))
+    ]
+    data = xr.DataArray(array, dims=dims)
+    for rank in range(total_ranks):
+        for i_savepoint in range(n_savepoints):
+            if len(data_shape) > 0:
+                data[i_savepoint, rank, :] = output_list[rank][varname][i_savepoint]
+            else:
+                data[i_savepoint, rank] = output_list[rank][varname][i_savepoint]
+    return data
+
+
+def entry_point():
+    parser = get_parser()
+    args = parser.parse_args()
+    main(
+        data_path=args.data_path,
+        output_path=args.output_path,
+        merge_blocks=args.merge,
+        data_name=args.data_name,
+    )
+
+
+if __name__ == "__main__":
+    entry_point()
diff --git a/ndsl/stencils/testing/test_translate.py b/ndsl/stencils/testing/test_translate.py
index 29c4ed65..db8e6047 100644
--- a/ndsl/stencils/testing/test_translate.py
+++ b/ndsl/stencils/testing/test_translate.py
@@ -15,7 +15,7 @@
 from ndsl.quantity import Quantity
 from ndsl.restart._legacy_restart import RESTART_PROPERTIES
 from ndsl.stencils.testing.savepoint import SavepointCase, dataset_to_dict
-from ndsl.testing.comparison import compare_scalar, success, success_array
+from ndsl.testing.comparison import LegacyMetric, MultiModalFloatMetric
 from ndsl.testing.perturbation import perturb
 
 
@@ -32,92 +32,6 @@ def platform():
     return "docker" if in_docker else "metal"
 
 
-def sample_wherefail(
-    computed_data,
-    ref_data,
-    eps,
-    print_failures,
-    failure_stride,
-    test_name,
-    ignore_near_zero_errors,
-    near_zero,
-    xy_indices=False,
-):
-    found_indices = np.where(
-        np.logical_not(
-            success_array(
-                computed_data, ref_data, eps, ignore_near_zero_errors, near_zero
-            )
-        )
-    )
-    computed_failures = computed_data[found_indices]
-    reference_failures = ref_data[found_indices]
-
-    # List all errors
-    return_strings = []
-    bad_indices_count = len(found_indices[0])
-    # Determine worst result
-    worst_metric_err = 0.0
-    for b in range(bad_indices_count):
-        full_index = [f[b] for f in found_indices]
-        metric_err = compare_scalar(computed_failures[b], reference_failures[b])
-        abs_err = abs(computed_failures[b] - reference_failures[b])
-        if print_failures and b % failure_stride == 0:
-            return_strings.append(
-                f"index: {full_index}, computed {computed_failures[b]}, "
-                f"reference {reference_failures[b]}, "
-                f"absolute diff {abs_err:.3e}, "
-                f"metric diff: {metric_err:.3e}"
-            )
-        if np.isnan(metric_err) or (metric_err > worst_metric_err):
-            worst_metric_err = metric_err
-            worst_full_idx = full_index
-            worst_abs_err = abs_err
-            computed_worst = computed_failures[b]
-            reference_worst = reference_failures[b]
-    # Summary and worst result
-    fullcount = len(ref_data.flatten())
-    return_strings.append(
-        f"Failed count: {bad_indices_count}/{fullcount} "
-        f"({round(100.0 * (bad_indices_count / fullcount), 2)}%),\n"
-        f"Worst failed index {worst_full_idx}\n"
-        f"\tcomputed:{computed_worst}\n"
-        f"\treference: {reference_worst}\n"
-        f"\tabsolute diff: {worst_abs_err:.3e}\n"
-        f"\tmetric diff: {worst_metric_err:.3e}\n"
-    )
-
-    if xy_indices:
-        if len(computed_data.shape) == 3:
-            axis = 2
-            any = np.any
-        elif len(computed_data.shape) == 4:
-            axis = (2, 3)
-            any = np.any
-        else:
-            axis = None
-
-            def any(array, axis):
-                return array
-
-        found_xy_indices = np.where(
-            any(
-                np.logical_not(
-                    success_array(
-                        computed_data, ref_data, eps, ignore_near_zero_errors, near_zero
-                    )
-                ),
-                axis=axis,
-            )
-        )
-
-        return_strings.append(
-            "failed horizontal indices:" + str(list(zip(*found_xy_indices)))
-        )
-
-    return "\n".join(return_strings)
-
-
 def process_override(threshold_overrides, testobj, test_name, backend):
     override = threshold_overrides.get(test_name, None)
     if override is not None:
@@ -229,6 +143,7 @@ def test_sequential_savepoint(
     subtests,
     caplog,
     threshold_overrides,
+    multimodal_metric,
     xy_indices=True,
 ):
     if case.testobj is None:
@@ -257,7 +172,12 @@ def test_sequential_savepoint(
         case.testobj.serialnames(case.testobj.in_vars["data_vars"])
         + case.testobj.in_vars["parameters"]
     )
-    input_data = {name: input_data[name] for name in input_names}
+    try:
+        input_data = {name: input_data[name] for name in input_names}
+    except KeyError as e:
+        raise KeyError(
+            f"Variable {e} was described in the translate test but cannot be found in the NetCDF"
+        )
     original_input_data = copy.deepcopy(input_data)
     # run python version of functionality
     output = case.testobj.compute(input_data)
@@ -273,29 +193,36 @@ def test_sequential_savepoint(
         with subtests.test(varname=varname):
             failing_names.append(varname)
             output_data = gt_utils.asarray(output[varname])
-            assert success(
-                output_data,
-                ref_data,
-                case.testobj.max_error,
-                ignore_near_zero,
-                case.testobj.near_zero,
-            ), sample_wherefail(
-                output_data,
-                ref_data,
-                case.testobj.max_error,
-                print_failures,
-                failure_stride,
-                case.savepoint_name,
-                ignore_near_zero_errors=ignore_near_zero,
-                near_zero=case.testobj.near_zero,
-                xy_indices=xy_indices,
-            )
+            if multimodal_metric:
+                metric = MultiModalFloatMetric(
+                    reference_values=ref_data,
+                    computed_values=output_data,
+                    eps=case.testobj.max_error,
+                    ignore_near_zero_errors=ignore_near_zero,
+                    near_zero=case.testobj.near_zero,
+                )
+            else:
+                metric = LegacyMetric(
+                    reference_values=ref_data,
+                    computed_values=output_data,
+                    eps=case.testobj.max_error,
+                    ignore_near_zero_errors=ignore_near_zero,
+                    near_zero=case.testobj.near_zero,
+                )
+            if not metric.check:
+                os.makedirs(OUTDIR, exist_ok=True)
+                log_filename = os.path.join(
+                    OUTDIR,
+                    f"details-{case.savepoint_name}-{varname}-rank{case.rank}.log",
+                )
+                metric.report(log_filename)
+                pytest.fail(str(metric), pytrace=False)
             passing_names.append(failing_names.pop())
         ref_data_out[varname] = [ref_data]
     if len(failing_names) > 0:
         get_thresholds(case.testobj, input_data=original_input_data)
         os.makedirs(OUTDIR, exist_ok=True)
-        out_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc")
+        nc_filename = os.path.join(OUTDIR, f"translate-{case.savepoint_name}.nc")
         input_data_on_host = {}
         for key, _input in input_data.items():
             input_data_on_host[key] = gt_utils.asarray(_input)
@@ -305,10 +232,14 @@ def test_sequential_savepoint(
             [output],
             ref_data_out,
             failing_names,
-            out_filename,
+            nc_filename,
+        )
+    if failing_names != []:
+        pytest.fail(
+            f"Only the following variables passed: {passing_names}", pytrace=False
         )
-    assert failing_names == [], f"only the following variables passed: {passing_names}"
-    assert len(passing_names) > 0, "No tests passed"
+    if len(passing_names) == 0:
+        pytest.fail("No tests passed")
 
 
 def state_from_savepoint(serializer, savepoint, name_to_std_name):
@@ -353,7 +284,8 @@ def test_parallel_savepoint(
     subtests,
     caplog,
     threshold_overrides,
-    compute_grid,
+    grid,
+    multimodal_metric,
     xy_indices=True,
 ):
     if MPI.COMM_WORLD.Get_size() % 6 != 0:
@@ -389,8 +321,8 @@ def test_parallel_savepoint(
         )
     if case.testobj.skip_test:
         return
-    if compute_grid and not case.testobj.compute_grid_option:
-        pytest.xfail(f"compute_grid option not used for test {case.savepoint_name}")
+    if (grid == "compute") and not case.testobj.compute_grid_option:
+        pytest.xfail(f"Grid compute option not used for test {case.savepoint_name}")
     input_data = dataset_to_dict(case.ds_in)
     # run python version of functionality
     output = case.testobj.compute_parallel(input_data, communicator)
@@ -410,27 +342,33 @@ def test_parallel_savepoint(
         with subtests.test(varname=varname):
             failing_names.append(varname)
             output_data = gt_utils.asarray(output[varname])
-            assert success(
-                output_data,
-                ref_data[varname][0],
-                case.testobj.max_error,
-                ignore_near_zero,
-                case.testobj.near_zero,
-            ), sample_wherefail(
-                output_data,
-                ref_data[varname][0],
-                case.testobj.max_error,
-                print_failures,
-                failure_stride,
-                case.savepoint_name,
-                ignore_near_zero,
-                case.testobj.near_zero,
-                xy_indices,
-            )
+            if multimodal_metric:
+                metric = MultiModalFloatMetric(
+                    reference_values=ref_data[varname][0],
+                    computed_values=output_data,
+                    eps=case.testobj.max_error,
+                    ignore_near_zero_errors=ignore_near_zero,
+                    near_zero=case.testobj.near_zero,
+                )
+            else:
+                metric = LegacyMetric(
+                    reference_values=ref_data[varname][0],
+                    computed_values=output_data,
+                    eps=case.testobj.max_error,
+                    ignore_near_zero_errors=ignore_near_zero,
+                    near_zero=case.testobj.near_zero,
+                )
+            if not metric.check:
+                os.makedirs(OUTDIR, exist_ok=True)
+                log_filename = os.path.join(
+                    OUTDIR, f"details-{case.savepoint_name}-{varname}.log"
+                )
+                metric.report(log_filename)
+                pytest.fail(str(metric), pytrace=False)
             passing_names.append(failing_names.pop())
     if len(failing_names) > 0:
         os.makedirs(OUTDIR, exist_ok=True)
-        out_filename = os.path.join(
+        nct_filename = os.path.join(
             OUTDIR, f"translate-{case.savepoint_name}-{case.grid.rank}.nc"
         )
         try:
@@ -443,12 +381,16 @@ def test_parallel_savepoint(
                 [output],
                 ref_data,
                 failing_names,
-                out_filename,
+                nct_filename,
             )
         except Exception as error:
             print(f"TestParallel SaveNetCDF Error: {error}")
-    assert failing_names == [], f"only the following variables passed: {passing_names}"
-    assert len(passing_names) > 0, "No tests passed"
+    if failing_names != []:
+        pytest.fail(
+            f"Only the following variables passed: {passing_names}", pytrace=False
+        )
+    if len(passing_names) == 0:
+        pytest.fail("No tests passed")
 
 
 def save_netcdf(
@@ -464,6 +406,7 @@ def save_netcdf(
 
     data_vars = {}
     for i, varname in enumerate(failing_names):
+        # Read in dimensions and attributes
         if hasattr(testobj, "outputs"):
             dims = [dim_name + f"_{i}" for dim_name in testobj.outputs[varname]["dims"]]
             attrs = {"units": testobj.outputs[varname]["units"]}
@@ -472,27 +415,33 @@ def save_netcdf(
                 f"dim_{varname}_{j}" for j in range(len(ref_data[varname][0].shape))
             ]
             attrs = {"units": "unknown"}
+
+        # Try to save inputs
         try:
-            data_vars[f"{varname}_in"] = xr.DataArray(
+            data_vars[f"{varname}_input"] = xr.DataArray(
                 np.stack([in_data[varname] for in_data in inputs_list]),
                 dims=("rank",) + tuple([f"{d}_in" for d in dims]),
                 attrs=attrs,
             )
         except KeyError as error:
             print(f"No input data found for {error}")
-        data_vars[f"{varname}_ref"] = xr.DataArray(
+
+        # Reference, computed and error computation
+        data_vars[f"{varname}_reference"] = xr.DataArray(
             np.stack(ref_data[varname]),
             dims=("rank",) + tuple([f"{d}_out" for d in dims]),
             attrs=attrs,
         )
-        data_vars[f"{varname}_out"] = xr.DataArray(
+        data_vars[f"{varname}_computed"] = xr.DataArray(
             np.stack([output[varname] for output in output_list]),
             dims=("rank",) + tuple([f"{d}_out" for d in dims]),
             attrs=attrs,
         )
-        data_vars[f"{varname}_error"] = (
-            data_vars[f"{varname}_ref"] - data_vars[f"{varname}_out"]
+        absolute_errors = (
+            data_vars[f"{varname}_reference"] - data_vars[f"{varname}_computed"]
         )
-        data_vars[f"{varname}_error"].attrs = attrs
+        data_vars[f"{varname}_absolute_error"] = absolute_errors
+        data_vars[f"{varname}_absolute_error"].attrs = attrs
+
     print(f"File saved to {out_filename}")
     xr.Dataset(data_vars=data_vars).to_netcdf(out_filename)
diff --git a/ndsl/stencils/testing/translate.py b/ndsl/stencils/testing/translate.py
index ef828302..3d9d20f9 100644
--- a/ndsl/stencils/testing/translate.py
+++ b/ndsl/stencils/testing/translate.py
@@ -5,7 +5,7 @@
 
 import ndsl.dsl.gt4py_utils as utils
 from ndsl.dsl.stencil import StencilFactory
-from ndsl.dsl.typing import Field  # noqa: F401
+from ndsl.dsl.typing import Field, Float  # noqa: F401
 from ndsl.quantity import Quantity
 from ndsl.stencils.testing.grid import Grid  # type: ignore
 
@@ -167,8 +167,10 @@ def make_storage_data_input_vars(self, inputs, storage_vars=None):
         for p in self.in_vars["parameters"]:
             if type(inputs_in[p]) in [np.int64, np.int32]:
                 inputs_out[p] = int(inputs_in[p])
+            elif type(inputs_in[p]) is bool:
+                inputs_out[p] == inputs_in[p]
             else:
-                inputs_out[p] = inputs_in[p]
+                inputs_out[p] = Float(inputs_in[p])
         for d, info in storage_vars.items():
             serialname = info["serialname"] if "serialname" in info else d
             self.update_info(info, inputs_in)
diff --git a/ndsl/testing/comparison.py b/ndsl/testing/comparison.py
index 0ffe55ed..9e2d1d59 100644
--- a/ndsl/testing/comparison.py
+++ b/ndsl/testing/comparison.py
@@ -1,68 +1,291 @@
-from typing import Union
+from typing import List, Optional, Union
 
 import numpy as np
+import numpy.typing as npt
 
 
-def compare_arr(computed_data, ref_data):
-    """
-    Smooth error near zero values.
-    Inputs are arrays.
+class BaseMetric:
+    def __init__(
+        self,
+        reference_values: np.ndarray,
+        computed_values: np.ndarray,
+    ):
+        self.references = np.atleast_1d(reference_values)
+        self.computed = np.atleast_1d(computed_values)
+        self.check = False
+
+    def __str__(self) -> str:
+        ...
+
+    def __repr__(self) -> str:
+        ...
+
+    def report(self, file_path: Optional[str] = None) -> List[str]:
+        ...
+
+
+class LegacyMetric(BaseMetric):
+    """Legacy (AI2) metric used for original FV3 port.
+
+    This metric attempts to smooth error comparison around 0.
+    It further tries to deal with close-to-0 breakdown of absolute
+    error by allowing `near_zero` threshold to be specified by hand.
     """
-    if ref_data.dtype in (np.float64, np.int64, np.float32, np.int32):
-        denom = np.abs(ref_data) + np.abs(computed_data)
-        compare = np.asarray(2.0 * np.abs(computed_data - ref_data) / denom)
-        compare[denom == 0] = 0.0
-        return compare
-    elif ref_data.dtype in (np.bool,):
-        return np.logical_xor(computed_data, ref_data)
-    else:
-        raise TypeError(f"recieved data with unexpected dtype {ref_data.dtype}")
-
-
-def compare_scalar(computed_data: np.float64, ref_data: np.float64) -> np.float64:
-    """Smooth error near zero values. Scalar versions."""
-    err_as_array = compare_arr(np.atleast_1d(computed_data), np.atleast_1d(ref_data))
-    return err_as_array[0]
-
-
-def success_array(
-    computed_data: np.ndarray,
-    ref_data: np.ndarray,
-    eps: float,
-    ignore_near_zero_errors: Union[dict, bool],
-    near_zero: float,
-):
-    success = np.logical_or(
-        np.logical_and(np.isnan(computed_data), np.isnan(ref_data)),
-        compare_arr(computed_data, ref_data) < eps,
-    )
-    if isinstance(ignore_near_zero_errors, dict):
-        if ignore_near_zero_errors.keys():
-            near_zero = ignore_near_zero_errors["near_zero"]
+
+    def __init__(
+        self,
+        reference_values: np.ndarray,
+        computed_values: np.ndarray,
+        eps: float,
+        ignore_near_zero_errors: Union[dict, bool],
+        near_zero: float,
+    ):
+        super().__init__(reference_values, computed_values)
+        self.eps = eps
+        self._calculated_metric = np.empty_like(self.references)
+        self.success = self._compute_errors(
+            ignore_near_zero_errors,
+            near_zero,
+        )
+        self.check = np.all(self.success)
+
+    def _compute_errors(
+        self,
+        ignore_near_zero_errors,
+        near_zero,
+    ) -> npt.NDArray[np.bool_]:
+        if self.references.dtype in (np.float64, np.int64, np.float32, np.int32):
+            denom = self.references
+            denom[self.references == 0] = self.computed[self.references == 0]
+            self._calculated_metric = np.asarray(
+                np.abs((self.computed - self.references) / denom)
+            )
+            self._calculated_metric[denom == 0] = 0.0
+        elif self.references.dtype in (np.bool_, bool):
+            self._calculated_metric = np.logical_xor(self.computed, self.references)
+        else:
+            raise TypeError(
+                f"recieved data with unexpected dtype {self.references.dtype}"
+            )
+        success = np.logical_or(
+            np.logical_and(np.isnan(self.computed), np.isnan(self.references)),
+            self._calculated_metric < self.eps,
+        )
+        if isinstance(ignore_near_zero_errors, dict):
+            if ignore_near_zero_errors.keys():
+                near_zero = ignore_near_zero_errors["near_zero"]
+                success = np.logical_or(
+                    success,
+                    np.logical_and(
+                        np.abs(self.computed) < near_zero,
+                        np.abs(self.references) < near_zero,
+                    ),
+                )
+        elif ignore_near_zero_errors:
             success = np.logical_or(
                 success,
                 np.logical_and(
-                    np.abs(computed_data) < near_zero,
-                    np.abs(ref_data) < near_zero,
+                    np.abs(self.computed) < near_zero,
+                    np.abs(self.references) < near_zero,
                 ),
             )
-    elif ignore_near_zero_errors:
-        success = np.logical_or(
-            success,
-            np.logical_and(
-                np.abs(computed_data) < near_zero, np.abs(ref_data) < near_zero
-            ),
-        )
-    return success
+        return success
 
+    def report(self, file_path: Optional[str] = None) -> List[str]:
+        report = []
+        if self.check:
+            report.append("✅ No numerical differences")
+        else:
+            report.append("❌ Numerical failures")
 
-def success(computed_data, ref_data, eps, ignore_near_zero_errors, near_zero=0.0):
-    return np.all(
-        success_array(
-            np.asarray(computed_data),
-            np.asarray(ref_data),
-            eps,
-            ignore_near_zero_errors,
-            near_zero,
-        )
-    )
+            found_indices = np.logical_not(self.success).nonzero()
+            computed_failures = self.computed[found_indices]
+            reference_failures = self.references[found_indices]
+
+            # List all errors
+            bad_indices_count = len(found_indices[0])
+            # Determine worst result
+            worst_metric_err = 0.0
+            abs_errs = []
+            details = [
+                "All failures:",
+                "Index  Computed  Reference  Absloute E  Metric E",
+            ]
+            for b in range(bad_indices_count):
+                full_index = tuple([f[b] for f in found_indices])
+
+                metric_err = self._calculated_metric[full_index]
+
+                absolute_distance = abs(computed_failures[b] - reference_failures[b])
+                abs_errs.append(absolute_distance)
+
+                details.append(
+                    f"{full_index}  {computed_failures[b]}  "
+                    f"{reference_failures[b]}  {abs_errs[-1]:.3e}  {metric_err:.3e}"
+                )
+
+                if np.isnan(metric_err) or (abs(metric_err) > abs(worst_metric_err)):
+                    worst_metric_err = metric_err
+                    worst_full_idx = full_index
+                    worst_abs_err = abs_errs[-1]
+                    computed_worst = computed_failures[b]
+                    reference_worst = reference_failures[b]
+            # Try to quantify noisy errors
+            unique_errors = len(np.unique(np.array(abs_errs)))
+            # Summary and worst result
+            fullcount = len(self.references.flatten())
+            report.append(
+                f"Failed count: {bad_indices_count}/{fullcount} "
+                f"({round(100.0 * (bad_indices_count / fullcount), 2)}%),\n"
+                f"Worst failed index {worst_full_idx}\n"
+                f"    Computed:{computed_worst}\n"
+                f"    Reference: {reference_worst}\n"
+                f"    Absolute diff: {worst_abs_err:.3e}\n"
+                f"    Metric diff: {worst_metric_err:.3e}\n"
+                f"    Metric threshold: {self.eps}\n"
+                f"  Noise quantification:\n"
+                f"    Reference dtype: {type(reference_worst)}\n"
+                f"    Unique errors: {unique_errors}/{bad_indices_count}"
+            )
+            report.extend(details)
+
+        if file_path:
+            with open(file_path, "w") as fd:
+                fd.write("\n".join(report))
+
+        return report
+
+    def __str__(self) -> str:
+        return self.__repr__()
+
+    def __repr__(self) -> str:
+        report = self.report()
+        if len(report) > 30:
+            report = report[:30]  # ~10 first errors
+            report.append("...")
+        return "\n".join(report)
+
+
+class MultiModalFloatMetric(BaseMetric):
+    """Combination of absolute, relative & ULP comparison for floats
+
+    This metric attempts to combine well known comparison on floats
+    to leverage a robust 32/64 bit float comparison on large accumulating
+    floating errors.
+
+    ULP is used to clear noise (ULP<=1.0 passes)
+    Absolute errors for large amplitute
+    """
+
+    _f32_absolute_eps = 1e-10
+    _f64_absolute_eps = 1e-13
+    relative_fraction = 0.000001  # 0.0001%
+    ulp_threshold = 1.0
+
+    def __init__(
+        self,
+        reference_values: np.ndarray,
+        computed_values: np.ndarray,
+        eps: float,
+        **kwargs,
+    ):
+        super().__init__(reference_values, computed_values)
+        self.absolute_distance = np.empty_like(self.references)
+        self.absolute_distance_metric = np.empty_like(self.references, dtype=np.bool_)
+        self.relative_distance = np.empty_like(self.references)
+        self.relative_distance_metric = np.empty_like(self.references, dtype=np.bool_)
+        self.ulp_distance = np.empty_like(self.references)
+        self.ulp_distance_metric = np.empty_like(self.references, dtype=np.bool_)
+
+        if self.references.dtype is (np.float32, np.int32):
+            self.absolute_eps = max(eps, self._f32_absolute_eps)
+        else:
+            self.absolute_eps = max(eps, self._f64_absolute_eps)
+
+        self.success = self._compute_all_metrics()
+        self.check = np.all(self.success)
+
+    def _compute_all_metrics(
+        self,
+    ) -> npt.NDArray[np.bool_]:
+        if self.references.dtype in (np.float64, np.int64, np.float32, np.int32):
+            max_values = np.maximum(
+                np.absolute(self.computed), np.absolute(self.references)
+            )
+            # Absolute distance
+            self.absolute_distance = np.absolute(self.computed - self.references)
+            self.absolute_distance_metric = self.absolute_distance < self.absolute_eps
+            # Relative distance (in pct)
+            self.relative_distance = np.divide(self.absolute_distance, max_values)
+            self.relative_distance_metric = (
+                self.absolute_distance < self.relative_fraction * max_values
+            )
+            # ULP distance
+            self.ulp_distance = np.divide(
+                self.absolute_distance, np.spacing(max_values)
+            )
+            self.ulp_distance_metric = self.ulp_distance <= self.ulp_threshold
+
+            # Combine all distances into sucess or failure
+            # Success = no NANs & ( abs or rel or ulp )
+            naninf_success = not np.logical_and(
+                np.isnan(self.computed), np.isnan(self.references)
+            ).all()
+            metric_success = np.logical_or(
+                self.relative_distance_metric, self.absolute_distance_metric
+            )
+            metric_success = np.logical_or(metric_success, self.ulp_distance_metric)
+            success = np.logical_and(naninf_success, metric_success)
+            return success
+        elif self.references.dtype in (np.bool_, bool):
+            success = np.logical_xor(self.computed, self.references)
+            return success
+        else:
+            raise TypeError(
+                f"recieved data with unexpected dtype {self.references.dtype}"
+            )
+
+    def report(self, file_path: Optional[str] = None) -> List[str]:
+        report = []
+        if self.check:
+            report.append("✅ No numerical differences")
+        else:
+            report.append("❌ Numerical failures")
+
+            found_indices = np.logical_not(self.success).nonzero()
+            # List all errors to terminal and file
+            bad_indices_count = len(found_indices[0])
+            full_count = len(self.references.flatten())
+            failures_pct = round(100.0 * (bad_indices_count / full_count), 2)
+            report = [
+                f"All failures ({bad_indices_count}/{full_count}) ({failures_pct}%),\n",
+                f"Index   Computed   Reference   "
+                f"Absolute E(<{self.absolute_eps:.2e})  "
+                f"Relative E(<{self.relative_fraction * 100:.2e}%)   "
+                f"ULP E(<{self.ulp_threshold})",
+            ]
+            # Summary and worst result
+            for iBad in range(bad_indices_count):
+                fi = tuple([f[iBad] for f in found_indices])
+                report.append(
+                    f"{str(fi)}  {self.computed[fi]:.16e}  {self.references[fi]:.16e}  "
+                    f"{self.absolute_distance[fi]:.2e} {'✅' if self.absolute_distance_metric[fi] else '❌'}  "
+                    f"{self.relative_distance[fi] * 100:.2e} {'✅' if self.relative_distance_metric[fi] else '❌'}  "
+                    f"{int(self.ulp_distance[fi]):02} {'✅' if self.ulp_distance_metric[fi] else '❌'}  "
+                )
+
+        if file_path:
+            with open(file_path, "w") as fd:
+                fd.write("\n".join(report))
+
+        return report
+
+    def __str__(self) -> str:
+        return self.__repr__()
+
+    def __repr__(self) -> str:
+        report = self.report()
+        if len(report) > 12:
+            report = report[:12]  # ~10 first errors
+            report.append("...")
+        return "\n".join(report)
diff --git a/ndsl/types.py b/ndsl/types.py
index 6c4ce596..e3461c39 100644
--- a/ndsl/types.py
+++ b/ndsl/types.py
@@ -9,7 +9,7 @@
 
 
 class Allocator(Protocol):
-    def __call__(self, shape: Iterable[int], dtype: type) -> Array:
+    def __call__(self, shape: Iterable[int], dtype: type):
         pass
 
 
@@ -21,23 +21,23 @@ class NumpyModule(Protocol):
 
     @functools.wraps(np.rot90)
     def rot90(self, *args, **kwargs):
-        ...
+        pass
 
     @functools.wraps(np.sum)
     def sum(self, *args, **kwargs):
-        ...
+        pass
 
     @functools.wraps(np.log)
     def log(self, *args, **kwargs):
-        ...
+        pass
 
     @functools.wraps(np.sin)
     def sin(self, *args, **kwargs):
-        ...
+        pass
 
     @functools.wraps(np.asarray)
     def asarray(self, *args, **kwargs):
-        ...
+        pass
 
 
 class AsyncRequest(Protocol):
diff --git a/setup.cfg b/setup.cfg
index 76603c37..1a142931 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,3 +23,14 @@ namespace_packages = True
 strict_optional = False
 warn_unreachable = True
 explicit_package_bases = True
+
+[coverage:run]
+parallel = true
+branch = true
+omit =
+        tests/*
+        *gt_cache*
+        .dacecache*
+        external/*
+        __init__.py
+source_pkgs = ndsl
diff --git a/setup.py b/setup.py
index bb24006a..03dcbf0f 100644
--- a/setup.py
+++ b/setup.py
@@ -11,10 +11,15 @@ def local_pkg(name: str, relative_path: str) -> str:
     return path
 
 
-test_requirements = ["pytest", "pytest-subtests"]
+test_requirements = ["pytest", "pytest-subtests", "coverage"]
 develop_requirements = test_requirements + ["pre-commit"]
+demos_requirements = ["ipython", "ipykernel"]
 
-extras_requires = {"test": test_requirements, "develop": develop_requirements}
+extras_requires = {
+    "test": test_requirements,
+    "develop": develop_requirements,
+    "demos": demos_requirements,
+}
 
 requirements: List[str] = [
     local_pkg("gt4py", "external/gt4py"),
@@ -24,16 +29,18 @@ def local_pkg(name: str, relative_path: str) -> str:
     "xarray",
     "f90nml>=1.1.0",
     "fsspec",
-    "netcdf4",
+    "netcdf4==1.7.0",
     "scipy",  # restart capacities only
     "h5netcdf",  # for xarray
     "dask",  # for xarray
+    "numpy==1.26.4",
+    "matplotlib",  # for plotting in boilerplate
 ]
 
 
 setup(
     author="NOAA/NASA",
-    python_requires=">=3.8",
+    python_requires=">=3.11",
     classifiers=[
         "Development Status :: 2 - Pre-Alpha",
         "Intended Audience :: Developers",
@@ -50,6 +57,11 @@ def local_pkg(name: str, relative_path: str) -> str:
     packages=find_namespace_packages(include=["ndsl", "ndsl.*"]),
     include_package_data=True,
     url="https://github.com/NOAA-GFDL/NDSL",
-    version="2024.04.00-RC",
+    version="2024.09.00",
     zip_safe=False,
+    entry_points={
+        "console_scripts": [
+            "ndsl-serialbox_to_netcdf = ndsl.stencils.testing.serialbox_to_netcdf:entry_point",
+        ]
+    },
 )
diff --git a/tests/grid/generate_eta_files.py b/tests/grid/generate_eta_files.py
new file mode 100755
index 00000000..1fb4d5ee
--- /dev/null
+++ b/tests/grid/generate_eta_files.py
@@ -0,0 +1,399 @@
+import numpy as np
+import xarray as xr
+
+
+"""
+This notebook uses the python xarray module
+to create an eta_file containing ak and bk coefficients
+for km=79 and km=91.  The coefficients are written out to
+eta79.nc and eta91.nc netcdf files respectively
+
+To run this script:  `python3 ./generate_eta_files.py`
+"""
+
+# km = 79
+ak = xr.DataArray(
+    dims=["km1"],
+    attrs=dict(units="Pa", _FillValue=False),
+    data=np.array(
+        [
+            3.000000e02,
+            6.467159e02,
+            1.045222e03,
+            1.469188e03,
+            1.897829e03,
+            2.325385e03,
+            2.754396e03,
+            3.191294e03,
+            3.648332e03,
+            4.135675e03,
+            4.668282e03,
+            5.247940e03,
+            5.876271e03,
+            6.554716e03,
+            7.284521e03,
+            8.066738e03,
+            8.902188e03,
+            9.791482e03,
+            1.073499e04,
+            1.162625e04,
+            1.237212e04,
+            1.299041e04,
+            1.349629e04,
+            1.390277e04,
+            1.422098e04,
+            1.446058e04,
+            1.462993e04,
+            1.473633e04,
+            1.478617e04,
+            1.478511e04,
+            1.473812e04,
+            1.464966e04,
+            1.452370e04,
+            1.436382e04,
+            1.417324e04,
+            1.395491e04,
+            1.371148e04,
+            1.344540e04,
+            1.315890e04,
+            1.285407e04,
+            1.253280e04,
+            1.219685e04,
+            1.184788e04,
+            1.148739e04,
+            1.111682e04,
+            1.073748e04,
+            1.035062e04,
+            9.957395e03,
+            9.558875e03,
+            9.156069e03,
+            8.749922e03,
+            8.341315e03,
+            7.931065e03,
+            7.519942e03,
+            7.108648e03,
+            6.698281e03,
+            6.290007e03,
+            5.884984e03,
+            5.484372e03,
+            5.089319e03,
+            4.700960e03,
+            4.320421e03,
+            3.948807e03,
+            3.587201e03,
+            3.236666e03,
+            2.898237e03,
+            2.572912e03,
+            2.261667e03,
+            1.965424e03,
+            1.685079e03,
+            1.421479e03,
+            1.175419e03,
+            9.476516e02,
+            7.388688e02,
+            5.497130e02,
+            3.807626e02,
+            2.325417e02,
+            1.054810e02,
+            -8.381903e-04,
+            0.000000e00,
+        ]
+    ),
+)
+bk = xr.DataArray(
+    dims=["km1"],
+    attrs=dict(units="None", _FillValue=False),
+    data=np.array(
+        [
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.00106595,
+            0.00412866,
+            0.00900663,
+            0.01554263,
+            0.02359921,
+            0.03305481,
+            0.0438012,
+            0.05574095,
+            0.06878554,
+            0.08285347,
+            0.09786981,
+            0.1137643,
+            0.130471,
+            0.1479275,
+            0.1660746,
+            0.1848558,
+            0.2042166,
+            0.2241053,
+            0.2444716,
+            0.2652672,
+            0.286445,
+            0.3079604,
+            0.3297701,
+            0.351832,
+            0.3741062,
+            0.3965532,
+            0.4191364,
+            0.4418194,
+            0.4645682,
+            0.48735,
+            0.5101338,
+            0.5328897,
+            0.5555894,
+            0.5782067,
+            0.6007158,
+            0.6230936,
+            0.6452944,
+            0.6672683,
+            0.6889648,
+            0.7103333,
+            0.7313231,
+            0.7518838,
+            0.7719651,
+            0.7915173,
+            0.8104913,
+            0.828839,
+            0.846513,
+            0.8634676,
+            0.8796583,
+            0.8950421,
+            0.9095779,
+            0.9232264,
+            0.9359506,
+            0.9477157,
+            0.9584892,
+            0.9682413,
+            0.9769447,
+            0.9845753,
+            0.9911126,
+            0.9965372,
+            1.0,
+        ]
+    ),
+)
+coefficients = xr.Dataset(data_vars={"ak": ak, "bk": bk})
+coefficients.to_netcdf("eta79.nc")
+
+
+# km = 91
+ak = xr.DataArray(
+    dims=["km1"],
+    attrs=dict(units="Pa", _FillValue=False),
+    data=np.array(
+        [
+            1.00000000e00,
+            1.75000000e00,
+            2.75000000e00,
+            4.09999990e00,
+            5.98951054e00,
+            8.62932968e00,
+            1.22572632e01,
+            1.71510906e01,
+            2.36545467e01,
+            3.21627693e01,
+            4.31310921e01,
+            5.71100426e01,
+            7.46595764e01,
+            9.64470978e01,
+            1.23169769e02,
+            1.55601318e02,
+            1.94594009e02,
+            2.41047531e02,
+            2.95873840e02,
+            3.60046967e02,
+            4.34604828e02,
+            5.20628723e02,
+            6.19154846e02,
+            7.31296021e02,
+            8.58240906e02,
+            1.00106561e03,
+            1.16092859e03,
+            1.33903992e03,
+            1.53650012e03,
+            1.75448938e03,
+            1.99417834e03,
+            2.25667407e03,
+            2.54317139e03,
+            2.85476392e03,
+            3.19258569e03,
+            3.55775366e03,
+            3.95135107e03,
+            4.37428662e03,
+            4.82711084e03,
+            5.31022168e03,
+            5.82387793e03,
+            6.36904248e03,
+            6.94875244e03,
+            7.56691992e03,
+            8.22634277e03,
+            8.93120996e03,
+            9.68446191e03,
+            1.04822725e04,
+            1.13182793e04,
+            1.21840771e04,
+            1.30655674e04,
+            1.39532207e04,
+            1.48307285e04,
+            1.56872617e04,
+            1.65080645e04,
+            1.72810996e04,
+            1.79942988e04,
+            1.86363223e04,
+            1.91961797e04,
+            1.96640723e04,
+            2.00301914e04,
+            2.02853691e04,
+            2.04215254e04,
+            2.04300684e04,
+            2.03028730e04,
+            2.00323711e04,
+            1.96110664e04,
+            1.90313848e04,
+            1.82866426e04,
+            1.73777930e04,
+            1.63224639e04,
+            1.51444033e04,
+            1.38725674e04,
+            1.25404785e04,
+            1.11834170e04,
+            9.83532715e03,
+            8.52630664e03,
+            7.28224512e03,
+            6.12326074e03,
+            5.06350684e03,
+            4.11124902e03,
+            3.27000122e03,
+            2.53922729e03,
+            1.91530762e03,
+            1.39244995e03,
+            9.63134766e02,
+            6.20599365e02,
+            3.57989502e02,
+            1.69421387e02,
+            5.10314941e01,
+            2.48413086e00,
+            0.00000000e00,
+        ]
+    ),
+)
+bk = xr.DataArray(
+    dims=["km1"],
+    attrs=dict(units="None", _FillValue=False),
+    data=np.array(
+        [
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            0.00000000e00,
+            3.50123992e-06,
+            2.81484008e-05,
+            9.38666999e-05,
+            2.28561999e-04,
+            5.12343016e-04,
+            1.04712998e-03,
+            1.95625005e-03,
+            3.42317997e-03,
+            5.58632007e-03,
+            8.65428988e-03,
+            1.27844000e-02,
+            1.81719996e-02,
+            2.49934997e-02,
+            3.34198996e-02,
+            4.36249003e-02,
+            5.57769015e-02,
+            7.00351968e-02,
+            8.65636021e-02,
+            1.05520003e-01,
+            1.27051994e-01,
+            1.51319996e-01,
+            1.78477004e-01,
+            2.08675995e-01,
+            2.42069006e-01,
+            2.78813988e-01,
+            3.19043010e-01,
+            3.62558991e-01,
+            4.08596009e-01,
+            4.56384987e-01,
+            5.05111992e-01,
+            5.53902984e-01,
+            6.01903021e-01,
+            6.48333013e-01,
+            6.92534983e-01,
+            7.33981013e-01,
+            7.72292018e-01,
+            8.07236016e-01,
+            8.38724971e-01,
+            8.66774976e-01,
+            8.91497016e-01,
+            9.13065016e-01,
+            9.31702971e-01,
+            9.47658002e-01,
+            9.61175978e-01,
+            9.72495019e-01,
+            9.81844008e-01,
+            9.89410996e-01,
+            9.95342016e-01,
+            1.00000000e00,
+        ]
+    ),
+)
+coefficients = xr.Dataset(data_vars={"ak": ak, "bk": bk})
+coefficients.to_netcdf("eta91.nc")
+
+# km =
diff --git a/tests/grid/test_eta.py b/tests/grid/test_eta.py
new file mode 100755
index 00000000..ab0539f8
--- /dev/null
+++ b/tests/grid/test_eta.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python3
+
+import os
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from ndsl import (
+    CubedSphereCommunicator,
+    CubedSpherePartitioner,
+    NullComm,
+    QuantityFactory,
+    SubtileGridSizer,
+    TilePartitioner,
+)
+from ndsl.grid import MetricTerms
+
+
+"""
+This test checks to ensure that ak and bk
+values are read-in and stored properly.
+In addition, this test checks to ensure that
+the function set_hybrid_pressure_coefficients
+fail as expected if the computed eta values
+vary non-mononitically and if the eta_file
+is not provided.
+"""
+
+
+def set_answers(eta_file):
+
+    """
+    Read in the expected values of ak and bk
+    arrays from the input eta NetCDF files.
+    """
+
+    data = xr.open_dataset(eta_file)
+    return data["ak"].values, data["bk"].values
+
+
+def write_non_mono_eta_file(in_eta_file, out_eta_file):
+    """
+    Reads in file eta79.nc and alters randomly chosen ak/bk values
+    This tests the expected failure of set_eta_hybrid_coefficients
+    for coefficients that lead to non-monotonically increasing
+    eta values
+    """
+
+    data = xr.open_dataset(in_eta_file)
+    data["ak"].values[10] = data["ak"].values[0]
+    data["bk"].values[20] = 0.0
+
+    data.to_netcdf(out_eta_file)
+
+
+@pytest.mark.parametrize("km", [79, 91])
+def test_set_hybrid_pressure_coefficients_correct(km):
+
+    """This test checks to see that the ak and bk arrays
+    are read-in correctly and are stored as
+    expected.  Both values of km=79 and km=91 are
+    tested and both tests are expected to pass
+    with the stored ak and bk values agreeing with the
+    values read-in directly from the NetCDF file.
+    """
+
+    working_dir = str(os.getcwd())
+    eta_file = f"{working_dir}/eta{km}.nc"
+
+    backend = "numpy"
+
+    layout = (1, 1)
+
+    nz = km
+    ny = 48
+    nx = 48
+    nhalo = 3
+
+    partitioner = CubedSpherePartitioner(TilePartitioner(layout))
+
+    communicator = CubedSphereCommunicator(NullComm(rank=0, total_ranks=6), partitioner)
+
+    sizer = SubtileGridSizer.from_tile_params(
+        nx_tile=nx,
+        ny_tile=ny,
+        nz=nz,
+        n_halo=nhalo,
+        extra_dim_lengths={},
+        layout=layout,
+        tile_partitioner=partitioner.tile,
+        tile_rank=communicator.tile.rank,
+    )
+
+    quantity_factory = QuantityFactory.from_backend(sizer=sizer, backend=backend)
+
+    metric_terms = MetricTerms(
+        quantity_factory=quantity_factory, communicator=communicator, eta_file=eta_file
+    )
+
+    ak_results = metric_terms.ak.data
+    bk_results = metric_terms.bk.data
+    ak_answers, bk_answers = set_answers(f"eta{km}.nc")
+
+    if ak_answers.size != ak_results.size:
+        raise ValueError("Unexpected size of bk")
+    if bk_answers.size != bk_results.size:
+        raise ValueError("Unexpected size of ak")
+
+    if not np.array_equal(ak_answers, ak_results):
+        raise ValueError("Unexpected value of ak")
+    if not np.array_equal(bk_answers, bk_results):
+        raise ValueError("Unexpected value of bk")
+
+
+def test_set_hybrid_pressure_coefficients_nofile():
+
+    """
+    This test checks to see that the program
+    fails when the eta_file is not specified
+    in the yaml configuration file.
+    """
+
+    eta_file = "NULL"
+
+    backend = "numpy"
+
+    layout = (1, 1)
+
+    nz = 79
+    ny = 48
+    nx = 48
+    nhalo = 3
+
+    partitioner = CubedSpherePartitioner(TilePartitioner(layout))
+
+    communicator = CubedSphereCommunicator(NullComm(rank=0, total_ranks=6), partitioner)
+
+    sizer = SubtileGridSizer.from_tile_params(
+        nx_tile=nx,
+        ny_tile=ny,
+        nz=nz,
+        n_halo=nhalo,
+        extra_dim_lengths={},
+        layout=layout,
+        tile_partitioner=partitioner.tile,
+        tile_rank=communicator.tile.rank,
+    )
+
+    quantity_factory = QuantityFactory.from_backend(sizer=sizer, backend=backend)
+
+    try:
+        metric_terms = MetricTerms(
+            quantity_factory=quantity_factory,
+            communicator=communicator,
+            eta_file=eta_file,
+        )
+    except Exception as error:
+        if str(error) == "eta file NULL does not exist":
+            pytest.xfail("testing eta file not correctly specified")
+        else:
+            pytest.fail(f"ERROR {error}")
+
+
+def test_set_hybrid_pressure_coefficients_not_mono():
+
+    """
+    This test checks to see that the program
+    fails when the computed eta values increase
+    non-monotonically. For the latter test, the
+    eta_file is specified in test_config_not_mono.yaml
+    file and the ak and bk values in the eta_file
+    have been changed nonsensically to result in
+    erronenous eta values.
+    """
+
+    working_dir = str(os.getcwd())
+    in_eta_file = f"{working_dir}/eta79.nc"
+    out_eta_file = "eta_not_mono_79.nc"
+    write_non_mono_eta_file(in_eta_file, out_eta_file)
+    eta_file = out_eta_file
+
+    backend = "numpy"
+
+    layout = (1, 1)
+
+    nz = 79
+    ny = 48
+    nx = 48
+    nhalo = 3
+
+    partitioner = CubedSpherePartitioner(TilePartitioner(layout))
+
+    communicator = CubedSphereCommunicator(NullComm(rank=0, total_ranks=6), partitioner)
+
+    sizer = SubtileGridSizer.from_tile_params(
+        nx_tile=nx,
+        ny_tile=ny,
+        nz=nz,
+        n_halo=nhalo,
+        extra_dim_lengths={},
+        layout=layout,
+        tile_partitioner=partitioner.tile,
+        tile_rank=communicator.tile.rank,
+    )
+
+    quantity_factory = QuantityFactory.from_backend(sizer=sizer, backend=backend)
+
+    try:
+        metric_terms = MetricTerms(
+            quantity_factory=quantity_factory,
+            communicator=communicator,
+            eta_file=eta_file,
+        )
+    except Exception as error:
+        if os.path.isfile(out_eta_file):
+            os.remove(out_eta_file)
+        if str(error) == "ETA values are not monotonically increasing":
+            pytest.xfail("testing eta values are not monotomincally increasing")
+        else:
+            pytest.fail(
+                "ERROR in testing eta values not are not monotonically increasing"
+            )
diff --git a/tests/test_boilerplate.py b/tests/test_boilerplate.py
new file mode 100644
index 00000000..a8de4075
--- /dev/null
+++ b/tests/test_boilerplate.py
@@ -0,0 +1,60 @@
+import numpy as np
+from gt4py.cartesian.gtscript import PARALLEL, computation, interval
+
+from ndsl import QuantityFactory, StencilFactory
+from ndsl.constants import X_DIM, Y_DIM, Z_DIM
+from ndsl.dsl.typing import FloatField
+
+
+def _copy_ops(stencil_factory: StencilFactory, quantity_factory: QuantityFactory):
+    # Allocate data and fill input
+    qty_out = quantity_factory.zeros(dims=[X_DIM, Y_DIM, Z_DIM], units="n/a")
+    qty_in = quantity_factory.zeros(dims=[X_DIM, Y_DIM, Z_DIM], units="n/a")
+    qty_in.view[:] = np.indices(
+        dimensions=quantity_factory.sizer.get_extent([X_DIM, Y_DIM, Z_DIM]),
+        dtype=np.float64,
+    ).sum(
+        axis=0
+    )  # Value of each entry is sum of the I and J index at each point
+
+    # Define a stencil
+    def copy_stencil(input_field: FloatField, output_field: FloatField):
+        with computation(PARALLEL), interval(...):
+            output_field = input_field
+
+    # Execute
+    copy = stencil_factory.from_dims_halo(
+        func=copy_stencil, compute_dims=[X_DIM, Y_DIM, Z_DIM]
+    )
+    copy(qty_in, qty_out)
+    assert (qty_in.view[:] == qty_out.view[:]).all()
+
+
+def test_boilerplate_import_numpy():
+    """Test make sure the basic numpy boilerplate works as expected.
+
+    Dev Note: the import inside the function are part of the test.
+    """
+    from ndsl.boilerplate import get_factories_single_tile_numpy
+
+    # Boilerplate
+    stencil_factory, quantity_factory = get_factories_single_tile_numpy(
+        nx=5, ny=5, nz=2, nhalo=1
+    )
+
+    _copy_ops(stencil_factory, quantity_factory)
+
+
+def test_boilerplate_import_orchestrated_cpu():
+    """Test make sure the basic orchestrate boilerplate works as expected.
+
+    Dev Note: the import inside the function are part of the test.
+    """
+    from ndsl.boilerplate import get_factories_single_tile_orchestrated_cpu
+
+    # Boilerplate
+    stencil_factory, quantity_factory = get_factories_single_tile_orchestrated_cpu(
+        nx=5, ny=5, nz=2, nhalo=1
+    )
+
+    _copy_ops(stencil_factory, quantity_factory)