From 7ab21358143241d4be8691c4b3d2c4ad46881740 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 22 May 2024 16:32:01 -0500
Subject: [PATCH 01/10] Enable chbenchmark in CI pipelines

---
 .github/workflows/maven.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 823589ee8..bd3ebf42c 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -111,7 +111,7 @@ jobs:
       fail-fast: false
       matrix:
         # BROKEN: tpch
-        benchmark: [ 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     steps:
       - name: Download artifact
         uses: actions/download-artifact@v4
@@ -175,7 +175,7 @@ jobs:
       fail-fast: false
       matrix:
         # FIXME: Add tpch back in (#333).
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       mariadb: # https://hub.docker.com/_/mariadb
         image: mariadb:latest
@@ -254,7 +254,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       mysql: # https://hub.docker.com/_/mysql
         image: mysql:latest
@@ -332,7 +332,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       oracle:
         image: gvenzl/oracle-xe:21.3.0-slim-faststart
@@ -420,7 +420,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpcc-with-reconnects', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     steps:
       # Note: we download just the docker-compose scripts/configs rather than the
       # whole source code repo for better testing.
@@ -511,7 +511,7 @@ jobs:
       fail-fast: false
       matrix:
         # TODO: Add tpcc-with-reconnects benchmark support
-        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        benchmark: [ 'auctionmark', 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       cockroach: # https://hub.docker.com/repository/docker/timveil/cockroachdb-single-node
         image: timveil/cockroachdb-single-node:latest

From d3ceadae9958489fab8f6c0d924d3c56f894e5fc Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 22 May 2024 16:51:08 -0500
Subject: [PATCH 02/10] sqlite config

---
 config/sqlite/sample_chbenchmark_config.xml | 161 ++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 config/sqlite/sample_chbenchmark_config.xml

diff --git a/config/sqlite/sample_chbenchmark_config.xml b/config/sqlite/sample_chbenchmark_config.xml
new file mode 100644
index 000000000..d0aae180d
--- /dev/null
+++ b/config/sqlite/sample_chbenchmark_config.xml
@@ -0,0 +1,161 @@
+<?xml version="1.0"?>
+<parameters>
+
+    <!-- Connection details -->
+    <type>SQLITE</type>
+    <driver>org.sqlite.JDBC</driver>
+    <url>jdbc:sqlite:tpcc.db</url>
+    <isolation>TRANSACTION_SERIALIZABLE</isolation>
+    <batchsize>128</batchsize>
+
+    <!-- Scale factor is the number of warehouses in TPCC -->
+    <scalefactor>1</scalefactor>
+
+    <!-- SQLITE only supports one writer thread -->
+    <loaderThreads>1</loaderThreads>
+
+    <!-- The workload -->
+    <!-- Number of terminal per workload -->
+    <terminals>1</terminals>
+
+    <!-- Extra Features (Commented Out) -->
+    <!-- Can be workload-specific -->
+    <!-- <terminals bench="tpcc">2</terminals> -->
+
+    <!-- Workload-specific options a marked with @bench=[workload_name] -->
+    <!-- Workload-specific number of terminals -->
+    <!-- <terminals bench="chbenchmark">2</terminals> -->
+
+    <works>
+
+        <!-- A Basic WorkPhase for Mixed Workloads -->
+        <work>
+            <time>60</time>
+
+            <!-- Note: The rate can be set to UNLIMITED or DISABLED -->
+            <rate>200</rate>
+
+            <!-- Need to Specify transaction weights for each workload .. Otherwise the number of fields won't match -->
+            <weights bench="tpcc">45,43,4,4,4</weights>
+            <weights bench="chbenchmark">3, 2, 3, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5</weights>
+        </work>
+
+        <!-- Extra features showcase -->
+        <!-- <work> -->
+            <!-- <time>60</time> -->
+
+            <!-- <rate>200</rate> -->
+            <!-- <rate bench="chbenchmark">disabled</rate> -->
+
+            <!-- NOTE: TPCC workers won't be distributed evenly between warehouses if not all workers are active -->
+            <!-- <active_terminals>1</active_terminals> -->
+            <!-- <active_terminals bench="chbenchmark">1</active_terminals> -->
+
+            <!-- Specifies transaction weight for each workload. -->
+            <!-- <weights bench="tpcc">45,43,4,4,4</weights> -->
+            <!-- <weights bench="chbenchmark">3, 2, 3, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5</weights> -->
+        <!-- </work> -->
+
+        <!--
+        <work>
+            <time>60</time>
+
+            <rate>100</rate>
+            <rate bench="chbenchmark">unlimited</rate>
+
+            <weights bench="tpcc">45,43,4,4,4</weights>
+            <weights bench="chbenchmark">3, 2, 3, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5</weights>
+        </work>
+        -->
+    </works>
+
+
+    <!-- CH specific -->
+    <transactiontypes bench="chbenchmark">
+        <transactiontype>
+            <name>Q1</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q2</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q3</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q4</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q5</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q6</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q7</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q8</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q9</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q10</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q11</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q12</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q13</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q14</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q15</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q16</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q17</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q18</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q19</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q20</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q21</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Q22</name>
+        </transactiontype>
+    </transactiontypes>
+
+    <!-- TPCC specific -->
+    <transactiontypes bench="tpcc">
+        <transactiontype>
+            <name>NewOrder</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Payment</name>
+        </transactiontype>
+        <transactiontype>
+            <name>OrderStatus</name>
+        </transactiontype>
+        <transactiontype>
+            <name>Delivery</name>
+        </transactiontype>
+        <transactiontype>
+            <name>StockLevel</name>
+        </transactiontype>
+    </transactiontypes>
+</parameters>

From e7d2805feb8d18b4bec72a291582870d35c16d29 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 22 May 2024 16:57:42 -0500
Subject: [PATCH 03/10] need to load tpcc data as well

---
 .github/workflows/maven.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index bd3ebf42c..ff5c347f0 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -145,6 +145,10 @@ jobs:
             echo "The ${{matrix.benchmark}} benchmark is not supported for sqlite."
             exit 0
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              # Disable synchronous mode for sqlite tpcc data loading to save some time.
+              java -jar benchbase.jar -b tpcc -c config/sqlite/sample_tpcc_nosync_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlite/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 
@@ -229,6 +233,9 @@ jobs:
             (sleep 10 && ./scripts/interrupt-docker-db-service.sh mariadb) &
             java -jar benchbase.jar -b tpcc -c config/mariadb/sample_tpcc_config.xml --execute=true --json-histograms results/histograms.json
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              java -jar benchbase.jar -b tpcc -c config/mariadb/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mariadb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 
@@ -307,6 +314,9 @@ jobs:
             (sleep 10 && ./scripts/interrupt-docker-db-service.sh mysql) &
             java -jar benchbase.jar -b tpcc -c config/mysql/sample_tpcc_config.xml --execute=true --json-histograms results/histograms.json
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              java -jar benchbase.jar -b tpcc -c config/mysql/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mysql/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 
@@ -389,6 +399,9 @@ jobs:
             (sleep 10 && ./scripts/interrupt-docker-db-service.sh oracle) &
             java -jar benchbase.jar -b tpcc -c config/oracle/sample_tpcc_config.xml --execute=true --json-histograms results/histograms.json
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              java -jar benchbase.jar -b tpcc -c config/oracle/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/oracle/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 
@@ -475,6 +488,9 @@ jobs:
             (sleep 10 && ./scripts/interrupt-docker-db-service.sh postgres) &
             java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml -im 1000 -mt advanced --execute=true --json-histograms results/histograms.json
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              java -jar benchbase.jar -b tpcc -c config/postgres/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml -im 1000 -mt advanced --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 
@@ -553,6 +569,9 @@ jobs:
             (sleep 10 && ./scripts/interrupt-docker-db-service.sh cockroachdb) &
             java -jar benchbase.jar -b tpcc -c config/cockroachdb/sample_tpcc_config.xml --execute=true --json-histograms results/histograms.json
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              java -jar benchbase.jar -b tpcc -c config/cockroachdb/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/cockroachdb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 
@@ -651,6 +670,9 @@ jobs:
             (sleep 10 && ./scripts/interrupt-docker-db-service.sh sqlserver) &
             java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml -im 1000 -mt advanced --execute=true --json-histograms results/histograms.json
           else
+            if [ ${{matrix.benchmark}} == chbenchmark ]; then
+              java -jar benchbase.jar -b tpcc -c config/sqlserver/sample_tpcc_config.xml --create=true --load=true --execute=false --json-histograms results/histograms.json
+            fi
             java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml -im 1000 -mt advanced --create=true --load=true --execute=true --json-histograms results/histograms.json
           fi
 

From 78968f11e279bed8c963b9951c3d559e5831f0fa Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 29 May 2024 13:25:23 -0500
Subject: [PATCH 04/10] sqlite ddl syntax tweaks

---
 .../benchmarks/chbenchmark/ddl-sqlite.sql     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 src/main/resources/benchmarks/chbenchmark/ddl-sqlite.sql

diff --git a/src/main/resources/benchmarks/chbenchmark/ddl-sqlite.sql b/src/main/resources/benchmarks/chbenchmark/ddl-sqlite.sql
new file mode 100644
index 000000000..5bacb925f
--- /dev/null
+++ b/src/main/resources/benchmarks/chbenchmark/ddl-sqlite.sql
@@ -0,0 +1,31 @@
+DROP TABLE IF EXISTS supplier;
+DROP TABLE IF EXISTS nation;
+DROP TABLE IF EXISTS region;
+
+create table region
+(
+    r_regionkey int       not null,
+    r_name      char(55)  not null,
+    r_comment   char(152) not null,
+    PRIMARY KEY (r_regionkey)
+);
+
+create table nation
+(
+    n_nationkey int    not null,
+   n_name char(25) not null,
+   n_regionkey int not null references region(r_regionkey) ON DELETE CASCADE,
+   n_comment char(152) not null,
+   PRIMARY KEY ( n_nationkey )
+);
+
+create table supplier (
+   su_suppkey int not null,
+   su_name char(25) not null,
+   su_address varchar(40) not null,
+   su_nationkey int not null references nation(n_nationkey)  ON DELETE CASCADE,
+   su_phone char(15) not null,
+   su_acctbal numeric(12,2) not null,
+   su_comment char(101) not null,
+   PRIMARY KEY ( su_suppkey )
+);

From 2d2cbaca613801fa8722940777dfe61eb9e2a9d3 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 29 May 2024 19:54:59 +0000
Subject: [PATCH 05/10] adjust script to match maven ci workflow for
 chbenchmark for sqlite

---
 config/sqlite/sample_epinions_config.xml  |  4 ++--
 docker/build-run-benchmark-with-docker.sh | 23 ++++++++++++++++-------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/config/sqlite/sample_epinions_config.xml b/config/sqlite/sample_epinions_config.xml
index 3fc752d98..8debadc65 100644
--- a/config/sqlite/sample_epinions_config.xml
+++ b/config/sqlite/sample_epinions_config.xml
@@ -4,13 +4,13 @@
     <!-- Connection details -->
     <type>SQLITE</type>
     <driver>org.sqlite.JDBC</driver>
-    <url>jdbc:sqlite:resourcestresser.db</url>
+    <url>jdbc:sqlite:epinions.db</url>
     <isolation>TRANSACTION_SERIALIZABLE</isolation>
     <batchsize>128</batchsize>
 
     <!-- Scalefactor in Epinions scales by *2000 the number of users-->
     <scalefactor>0.1</scalefactor>
-    
+
     <!-- SQLITE only supports one writer thread -->
     <loaderThreads>1</loaderThreads>
 
diff --git a/docker/build-run-benchmark-with-docker.sh b/docker/build-run-benchmark-with-docker.sh
index 69386f5e0..7f90eca44 100755
--- a/docker/build-run-benchmark-with-docker.sh
+++ b/docker/build-run-benchmark-with-docker.sh
@@ -33,7 +33,7 @@ if [ "$BENCHBASE_PROFILE" == 'sqlite' ]; then
     fi
     EXTRA_DOCKER_ARGS="-v $SRC_DIR/$benchmark.db:/benchbase/profiles/sqlite/$benchmark.db"
 
-    if [ "$benchmark" == 'templated' ]; then
+    if echo "$benchmark" | egrep -qx '(templated|chbenchmark)'; then
         # See notes below:
         EXTRA_DOCKER_ARGS+=" -v $SRC_DIR/$benchmark.db:/benchbase/profiles/sqlite/tpcc.db"
     fi
@@ -49,7 +49,7 @@ if [ "${SKIP_LOAD_DB:-false}" != 'true' ]; then
     # For templated benchmarks, we need to preload some data for the test since by
     # design, templated benchmarks do not support the 'load' operation
     # In this case, we load the tpcc data.
-    if [ "$benchmark" == 'templated' ]; then
+    if echo "$benchmark" | egrep -qx '(templated|chbenchmark)'; then
         load_benchmark='tpcc'
 
         echo "INFO: Loading tpcc data for templated benchmark"
@@ -59,15 +59,24 @@ if [ "${SKIP_LOAD_DB:-false}" != 'true' ]; then
         else
             config="config/sample_tpcc_config.xml"
         fi
-    else
+
+        BUILD_IMAGE=false EXTRA_DOCKER_ARGS="--network=host $EXTRA_DOCKER_ARGS" \
+        ./docker/benchbase/run-full-image.sh \
+            --config "$config" --bench "$load_benchmark" \
+            --create=true --load=true --execute=false
+    fi
+
+    # For chbenchmark, we also load it's data in addition to tpcc.
+    if ! echo "$benchmark" | egrep -qx '(templated)'; then
         echo "INFO: Loading $benchmark data"
         load_benchmark="$benchmark"
         config="config/sample_${benchmark}_config.xml"
+
+        BUILD_IMAGE=false EXTRA_DOCKER_ARGS="--network=host $EXTRA_DOCKER_ARGS" \
+        ./docker/benchbase/run-full-image.sh \
+            --config "$config" --bench "$load_benchmark" \
+            --create=true --load=true --execute=false
     fi
-    BUILD_IMAGE=false EXTRA_DOCKER_ARGS="--network=host $EXTRA_DOCKER_ARGS" \
-    ./docker/benchbase/run-full-image.sh \
-        --config "$config" --bench "$load_benchmark" \
-        --create=true --load=true --execute=false
 else
     echo "INFO: Skipping load of $benchmark data"
 fi

From 2ecc98cd66e4c3cfaf0d72055ab02f4096bdbba7 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 29 May 2024 20:08:29 +0000
Subject: [PATCH 06/10] sqlite chbenchmark query dialect overrides

---
 .../benchmarks/chbenchmark/dialect-sqlite.xml | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml

diff --git a/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml b/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
new file mode 100644
index 000000000..63deb1d80
--- /dev/null
+++ b/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0"?>
+<dialects>
+    <dialect type="sqlite">
+        <procedure name="Q7">
+            <statement name="query_stmt">
+                SELECT su_nationkey AS supp_nation, substr(c_state, 1, 1) AS cust_nation, strftime('%Y', o_entry_d) AS l_year,
+                        sum(ol_amount) AS revenue
+                FROM supplier, stock, order_line, oorder, customer, nation n1, nation n2
+                WHERE ol_supply_w_id = s_w_id AND ol_i_id = s_i_id AND MOD ((s_w_id * s_i_id), 10000) = su_suppkey
+                        AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND c_id = o_c_id AND c_w_id = o_w_id
+                        AND c_d_id = o_d_id AND su_nationkey = n1.n_nationkey AND ascii(substr(c_state, 1, 1)) = n2.n_nationkey
+                        AND (
+                            (n1.n_name = 'Germany' AND n2.n_name = 'Cambodia')
+                            OR
+                            (n1.n_name = 'Cambodia' AND n2.n_name = 'Germany')
+                        )
+                GROUP BY su_nationkey, cust_nation, l_year
+                ORDER BY su_nationkey, cust_nation, l_year
+            </statement>
+        </procedure>
+        <procedure name="Q8">
+            <statement name="query_stmt">
+                SELECT strftime('%Y', o_entry_d) AS l_year, sum(
+                    CASE WHEN n2.n_name = 'Germany' THEN ol_amount ELSE 0 END
+                ) / sum(ol_amount) AS mkt_share
+                FROM item, supplier, stock, order_line, oorder, customer, nation n1, nation n2, region
+                WHERE i_id = s_i_id AND ol_i_id = s_i_id AND ol_supply_w_id = s_w_id AND MOD ((s_w_id * s_i_id), 10000) = su_suppkey
+                        AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND c_id = o_c_id AND c_w_id = o_w_id
+                        AND c_d_id = o_d_id AND n1.n_nationkey = ascii(substr(c_state, 1, 1)) AND n1.n_regionkey = r_regionkey
+                        AND 1000 > ol_i_id AND r_name = 'Europe' AND su_nationkey = n2.n_nationkey AND i_data LIKE '%b' AND i_id = ol_i_id
+                GROUP BY l_year ORDER BY l_year
+            </statement>
+        </procedure>
+        <procedure name="Q9">
+            <statement name="query_stmt">
+                SELECT n_name, strftime('%Y', o_entry_d) AS l_year, sum(ol_amount) AS sum_profit
+                FROM item, stock, supplier, order_line, oorder, nation
+                WHERE ol_i_id = s_i_id AND ol_supply_w_id = s_w_id AND MOD ((s_w_id * s_i_id), 10000) = su_suppkey
+                        AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND ol_i_id = i_id AND su_nationkey = n_nationkey
+                        AND i_data LIKE '%bb'
+                GROUP BY n_name, l_year ORDER BY n_name, l_year DESC
+            </statement>
+        </procedure>
+    </dialect>
+</dialects>
\ No newline at end of file

From 45a7dcb18a8342f2a9b39eeba2190194ca9964de Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 29 May 2024 21:25:21 +0000
Subject: [PATCH 07/10] syntax fixups

---
 .../benchmarks/chbenchmark/dialect-sqlite.xml | 166 +++++++++++++++++-
 1 file changed, 163 insertions(+), 3 deletions(-)

diff --git a/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml b/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
index 63deb1d80..9f172d294 100644
--- a/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
+++ b/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
@@ -1,6 +1,60 @@
 <?xml version="1.0"?>
 <dialects>
     <dialect type="sqlite">
+        <procedure name="Q1">
+            <statement name="query_stmt">
+                SELECT ol_number, sum(ol_quantity) AS sum_qty, sum(ol_amount) AS sum_amount, avg(ol_quantity) AS avg_qty,
+                        avg(ol_amount) AS avg_amount, count(*) AS count_order
+                FROM order_line
+                WHERE ol_delivery_d > datetime('2007-01-02 00:00:00')
+                GROUP BY ol_number
+                ORDER BY ol_number
+            </statement>
+        </procedure>
+        <procedure name="Q2">
+            <statement name="query_stmt">
+                SELECT su_suppkey, su_name, n_name, i_id, i_name, su_address, su_phone, su_comment
+                FROM item, supplier, stock, nation, region, (
+                    SELECT s_i_id AS m_i_id, MIN(s_quantity) AS m_s_quantity
+                    FROM stock, supplier, nation, region
+                    WHERE MOD((s_w_id*s_i_id), 10000) = su_suppkey AND su_nationkey = n_nationkey
+                            AND n_regionkey = r_regionkey AND r_name LIKE 'Europ%'
+                    GROUP BY s_i_id) m
+                WHERE i_id = s_i_id AND MOD((s_w_id * s_i_id), 10000) = su_suppkey AND su_nationkey = n_nationkey
+                        AND n_regionkey = r_regionkey AND i_data LIKE '%b' AND r_name LIKE 'Europ%' AND i_id=m_i_id
+                        AND s_quantity = m_s_quantity
+                ORDER BY n_name, su_name, i_id
+            </statement>
+        </procedure>
+        <procedure name="Q3">
+            <statement name="query_stmt">
+                SELECT ol_o_id, ol_w_id, ol_d_id, sum(ol_amount) AS revenue, o_entry_d
+                FROM customer, new_order, oorder, order_line
+                WHERE c_state LIKE 'A%' AND c_id = o_c_id AND c_w_id = o_w_id AND c_d_id = o_d_id AND no_w_id = o_w_id
+                        AND no_d_id = o_d_id AND no_o_id = o_id AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id
+                        AND o_entry_d > datetime('2007-01-02 00:00:00')
+                GROUP BY ol_o_id, ol_w_id, ol_d_id, o_entry_d
+                ORDER BY revenue DESC , o_entry_d
+            </statement>
+        </procedure>
+        <procedure name="Q5">
+            <statement name="query_stmt">
+                SELECT n_name, sum(ol_amount) AS revenue
+                FROM customer, oorder, order_line, stock, supplier, nation, region
+                WHERE c_id = o_c_id AND c_w_id = o_w_id AND c_d_id = o_d_id AND ol_o_id = o_id AND ol_w_id = o_w_id
+                        AND ol_d_id=o_d_id AND ol_w_id = s_w_id AND ol_i_id = s_i_id
+                        AND MOD((s_w_id * s_i_id), 10000) = su_suppkey
+                        AND unicode(substr(c_state, 1, 1)) = su_nationkey AND su_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = 'Europe'
+                        AND o_entry_d >= datetime('2007-01-02 00:00:00')
+                GROUP BY n_name ORDER BY revenue DESC
+            </statement>
+        </procedure>
+        <procedure name="Q6">
+            <statement name="query_stmt">
+                SELECT sum(ol_amount) AS revenue FROM order_line WHERE ol_delivery_d >= to_date('1999-01-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS')
+                        AND to_date('2020-01-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') > ol_delivery_d AND ol_quantity BETWEEN 1 AND 100000
+            </statement>
+        </procedure>
         <procedure name="Q7">
             <statement name="query_stmt">
                 SELECT su_nationkey AS supp_nation, substr(c_state, 1, 1) AS cust_nation, strftime('%Y', o_entry_d) AS l_year,
@@ -8,7 +62,7 @@
                 FROM supplier, stock, order_line, oorder, customer, nation n1, nation n2
                 WHERE ol_supply_w_id = s_w_id AND ol_i_id = s_i_id AND MOD ((s_w_id * s_i_id), 10000) = su_suppkey
                         AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND c_id = o_c_id AND c_w_id = o_w_id
-                        AND c_d_id = o_d_id AND su_nationkey = n1.n_nationkey AND ascii(substr(c_state, 1, 1)) = n2.n_nationkey
+                        AND c_d_id = o_d_id AND su_nationkey = n1.n_nationkey AND unicode(substr(c_state, 1, 1)) = n2.n_nationkey
                         AND (
                             (n1.n_name = 'Germany' AND n2.n_name = 'Cambodia')
                             OR
@@ -26,7 +80,7 @@
                 FROM item, supplier, stock, order_line, oorder, customer, nation n1, nation n2, region
                 WHERE i_id = s_i_id AND ol_i_id = s_i_id AND ol_supply_w_id = s_w_id AND MOD ((s_w_id * s_i_id), 10000) = su_suppkey
                         AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND c_id = o_c_id AND c_w_id = o_w_id
-                        AND c_d_id = o_d_id AND n1.n_nationkey = ascii(substr(c_state, 1, 1)) AND n1.n_regionkey = r_regionkey
+                        AND c_d_id = o_d_id AND n1.n_nationkey = unicode(substr(c_state, 1, 1)) AND n1.n_regionkey = r_regionkey
                         AND 1000 > ol_i_id AND r_name = 'Europe' AND su_nationkey = n2.n_nationkey AND i_data LIKE '%b' AND i_id = ol_i_id
                 GROUP BY l_year ORDER BY l_year
             </statement>
@@ -41,5 +95,111 @@
                 GROUP BY n_name, l_year ORDER BY n_name, l_year DESC
             </statement>
         </procedure>
+        <procedure name="Q10">
+            <statement name="query_stmt">
+                SELECT c_id, c_last, sum(ol_amount) AS revenue, c_city, c_phone, n_name
+                FROM customer, oorder, order_line, nation
+                WHERE c_id = o_c_id AND c_w_id = o_w_id AND c_d_id = o_d_id AND ol_w_id = o_w_id AND ol_d_id = o_d_id
+                AND ol_o_id = o_id AND o_entry_d >= datetime('2007-01-02 00:00:00')
+                AND ol_delivery_d >= o_entry_d AND n_nationkey = unicode(substr(c_state, 1, 1))
+                GROUP BY c_id, c_last, c_city, c_phone, n_name ORDER BY revenue DESC
+            </statement>
+        </procedure>
+        <procedure name="Q12">
+            <statement name="query_stmt">
+                SELECT o_ol_cnt, sum(CASE WHEN o_carrier_id = 1 OR o_carrier_id = 2 THEN 1 ELSE 0 END) AS high_line_count,
+                        sum(CASE WHEN o_carrier_id != 1 AND o_carrier_id != 2 THEN 1 ELSE 0 END) AS low_line_count
+                FROM oorder, order_line
+                WHERE ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND ol_delivery_d >= o_entry_d
+                        AND datetime('2020-01-01 00:00:00') > ol_delivery_d
+                GROUP BY o_ol_cnt ORDER BY o_ol_cnt
+            </statement>
+        </procedure>
+        <procedure name="Q13">
+            <statement name="query_stmt">
+                SELECT c_count, count(*) AS custdist FROM (
+                    SELECT c_id, count(o_id) AS c_count
+                        FROM customer LEFT OUTER JOIN oorder ON
+                                (c_w_id = o_w_id AND c_d_id = o_d_id AND c_id = o_c_id AND o_carrier_id > 8)
+                        GROUP BY c_id
+                )
+                GROUP BY c_count
+                ORDER BY custdist DESC, c_count DESC
+            </statement>
+        </procedure>
+        <procedure name="Q14">
+            <statement name="query_stmt">
+                SELECT (100.00 * sum(CASE WHEN i_data LIKE 'PR%' THEN ol_amount ELSE 0 END) / (1 + sum(ol_amount))) AS promo_revenue
+                FROM order_line, item
+                WHERE ol_i_id = i_id AND ol_delivery_d >= datetime('2007-01-02 00:00:00')
+                        AND datetime('2020-01-02') > ol_delivery_d
+            </statement>
+        </procedure>
+        <procedure name="Q15">
+            <statement name="createview_stmt">
+                CREATE view revenue0 (supplier_no, total_revenue) AS
+                    SELECT mod((s_w_id * s_i_id),10000) as supplier_no, sum(ol_amount) as total_revenue
+                    FROM order_line, stock
+                    WHERE ol_i_id = s_i_id AND ol_supply_w_id = s_w_id
+                            AND ol_delivery_d >= to_date('2007-01-02 00:00:00', 'YYYY-MM-DD HH24:MI:SS')
+                    GROUP BY mod((s_w_id * s_i_id),10000)
+            </statement>
+        </procedure>
+        <procedure name="Q16">
+            <statement name="query_stmt">
+                SELECT i_name, substr(i_data, 1, 3) as brand, i_price, count(DISTINCT (mod((s_w_id * s_i_id),10000))) AS supplier_cnt
+                FROM stock, item
+                WHERE i_id = s_i_id AND i_data NOT LIKE 'zz%' AND (mod((s_w_id * s_i_id),10000) NOT IN
+                        (SELECT su_suppkey FROM supplier WHERE su_comment LIKE '%bad%')
+                )
+                GROUP BY i_name, substr(i_data, 1, 3), i_price ORDER BY supplier_cnt DESC
+            </statement>
+        </procedure>
+        <procedure name="Q19">
+            <statement name="query_stmt">
+                SELECT sum(ol_amount) AS revenue
+                FROM order_line, item
+                WHERE (
+                    ol_i_id = i_id AND i_data LIKE '%a' AND ol_quantity >= 1 AND 10 >= ol_quantity
+                            AND i_price BETWEEN 1 AND 400000 AND ol_w_id IN (1, 2, 3)
+                ) OR (
+                    ol_i_id = i_id AND i_data LIKE '%b' AND ol_quantity >= 1 AND 10 >= ol_quantity
+                            AND i_price BETWEEN 1 AND 400000 AND ol_w_id IN (1, 2, 4)
+                ) OR (
+                    ol_i_id = i_id AND i_data LIKE '%c' AND ol_quantity >= 1 AND 10 >= ol_quantity
+                            AND i_price BETWEEN 1 AND 400000 AND ol_w_id IN (1, 5, 3)
+                )
+            </statement>
+        </procedure>
+        <procedure name="Q20">
+            <statement name="query_stmt">
+                SELECT su_name, su_address FROM supplier, nation
+                WHERE su_suppkey IN (
+                    SELECT mod(s_i_id * s_w_id, 10000)
+                    FROM stock INNER JOIN item ON i_id = s_i_id INNER JOIN order_line ON ol_i_id = s_i_id
+                    WHERE ol_delivery_d > datetime('2010-05-23 12:00:00') AND i_data LIKE 'co%'
+                    GROUP BY s_i_id, s_w_id, s_quantity
+                    HAVING 2*s_quantity > sum(ol_quantity)
+                ) AND su_nationkey = n_nationkey AND n_name = 'Germany'
+                ORDER BY su_name
+            </statement>
+        </procedure>
+        <procedure name="Q22">
+            <statement name="query_stmt">
+                SELECT substr(c_state,1,1) AS country, count(*) AS numcust, sum(c_balance) AS totacctbal
+                FROM customer
+                WHERE substr(c_phone,1,1) IN ('1', '2', '3', '4', '5', '6', '7')
+                AND c_balance > (
+                SELECT avg(c_balance)
+                FROM customer
+                WHERE c_balance > 0.00 AND substr(c_phone,1,1) IN ('1', '2', '3', '4', '5', '6', '7'))
+                AND NOT EXISTS (
+                SELECT * FROM oorder
+                WHERE o_c_id = c_id AND o_w_id = c_w_id AND o_d_id = c_d_id
+                )
+                GROUP BY substr(c_state,1,1)
+                ORDER BY substr(c_state,1,1)
+            </statement>
+        </procedure>
     </dialect>
-</dialects>
\ No newline at end of file
+</dialects>

From 5f53004e7183e0a16b78e9c9a2e16f8e85ec08e9 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Wed, 29 May 2024 21:30:32 +0000
Subject: [PATCH 08/10] syntax fixups

---
 .../resources/benchmarks/chbenchmark/dialect-sqlite.xml     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml b/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
index 9f172d294..7f61e9bd2 100644
--- a/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
+++ b/src/main/resources/benchmarks/chbenchmark/dialect-sqlite.xml
@@ -51,8 +51,8 @@
         </procedure>
         <procedure name="Q6">
             <statement name="query_stmt">
-                SELECT sum(ol_amount) AS revenue FROM order_line WHERE ol_delivery_d >= to_date('1999-01-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS')
-                        AND to_date('2020-01-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') > ol_delivery_d AND ol_quantity BETWEEN 1 AND 100000
+                SELECT sum(ol_amount) AS revenue FROM order_line WHERE ol_delivery_d >= datetime('1999-01-01 00:00:00')
+                        AND datetime('2020-01-01 00:00:00') > ol_delivery_d AND ol_quantity BETWEEN 1 AND 100000
             </statement>
         </procedure>
         <procedure name="Q7">
@@ -141,7 +141,7 @@
                     SELECT mod((s_w_id * s_i_id),10000) as supplier_no, sum(ol_amount) as total_revenue
                     FROM order_line, stock
                     WHERE ol_i_id = s_i_id AND ol_supply_w_id = s_w_id
-                            AND ol_delivery_d >= to_date('2007-01-02 00:00:00', 'YYYY-MM-DD HH24:MI:SS')
+                            AND ol_delivery_d >= datetime('2007-01-02 00:00:00')
                     GROUP BY mod((s_w_id * s_i_id),10000)
             </statement>
         </procedure>

From 9eb8b8c08cb4cca7f1e05ff55ac9b955e5179fb9 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Fri, 31 May 2024 13:25:43 -0500
Subject: [PATCH 09/10] skip cockroachdb for now

---
 .github/workflows/maven.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 962f5578e..f3911d957 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -531,12 +531,14 @@ jobs:
       fail-fast: false
       matrix:
         # TODO: Add tpcc-with-reconnects benchmark support
-        benchmark: [ 'auctionmark', 'chbenchmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
+        # TODO: Add chbenchmark benchmark support
+        benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'templated', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
     services:
       cockroach: # https://hub.docker.com/repository/docker/timveil/cockroachdb-single-node
         image: timveil/cockroachdb-single-node:latest
         env:
           DATABASE_NAME: benchbase
+          # TODO: Expand for additional config adjustments (See Also: #405, #519, #525)
           MEMORY_SIZE: .75
         ports:
           - 26257:26257

From 3273f7aa06e384e1090ad6c7469e7ed158f0c1d4 Mon Sep 17 00:00:00 2001
From: Brian Kroth <bpkroth@microsoft.com>
Date: Fri, 31 May 2024 13:32:22 -0500
Subject: [PATCH 10/10] oracle can't group by computed column aliases

---
 .../resources/benchmarks/chbenchmark/dialect-oracle.xml   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/main/resources/benchmarks/chbenchmark/dialect-oracle.xml b/src/main/resources/benchmarks/chbenchmark/dialect-oracle.xml
index 9d4559fd2..688b17393 100644
--- a/src/main/resources/benchmarks/chbenchmark/dialect-oracle.xml
+++ b/src/main/resources/benchmarks/chbenchmark/dialect-oracle.xml
@@ -68,7 +68,7 @@
                             OR
                             (n1.n_name = 'Cambodia' AND n2.n_name = 'Germany')
                         )
-                GROUP BY su_nationkey, cust_nation, l_year
+                GROUP BY su_nationkey, substr(c_state, 1, 1), extract(YEAR FROM o_entry_d)
                 ORDER BY su_nationkey, cust_nation, l_year
             </statement>
         </procedure>
@@ -82,7 +82,8 @@
                         AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND c_id = o_c_id AND c_w_id = o_w_id
                         AND c_d_id = o_d_id AND n1.n_nationkey = ascii(substr(c_state, 1, 1)) AND n1.n_regionkey = r_regionkey
                         AND 1000 > ol_i_id AND r_name = 'Europe' AND su_nationkey = n2.n_nationkey AND i_data LIKE '%b' AND i_id = ol_i_id
-                GROUP BY l_year ORDER BY l_year
+                GROUP BY extract(YEAR FROM o_entry_d)
+                ORDER BY l_year
             </statement>
         </procedure>
         <procedure name="Q9">
@@ -92,7 +93,8 @@
                 WHERE ol_i_id = s_i_id AND ol_supply_w_id = s_w_id AND MOD ((s_w_id * s_i_id), 10000) = su_suppkey
                         AND ol_w_id = o_w_id AND ol_d_id = o_d_id AND ol_o_id = o_id AND ol_i_id = i_id AND su_nationkey = n_nationkey
                         AND i_data LIKE '%bb'
-                GROUP BY n_name, l_year ORDER BY n_name, l_year DESC
+                GROUP BY n_name, extract(YEAR FROM o_entry_d)
+                ORDER BY n_name, l_year DESC
             </statement>
         </procedure>
         <procedure name="Q10">