Skip to content

Commit 87afd0f

Browse files
authored
Merge branch 'main' into rf_1203
2 parents b861aca + 05e8630 commit 87afd0f

File tree

121 files changed

+3156
-976
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+3156
-976
lines changed

.github/actions/benchmark_cloud/action.yml

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ inputs:
1010
dataset:
1111
description: "hits/tpch"
1212
required: true
13+
database:
14+
description: "Database name for benchmark queries"
15+
required: false
1316
source:
1417
description: "pr/release"
1518
required: true
@@ -52,7 +55,11 @@ runs:
5255
echo "database=load_test_${{ inputs.run_id }}" >> $GITHUB_OUTPUT
5356
echo "tries=1" >> $GITHUB_OUTPUT
5457
else
55-
echo "database=clickbench" >> $GITHUB_OUTPUT
58+
database="${{ inputs.database }}"
59+
if [[ -z "$database" ]]; then
60+
database="clickbench"
61+
fi
62+
echo "database=$database" >> $GITHUB_OUTPUT
5663
echo "tries=3" >> $GITHUB_OUTPUT
5764
fi
5865
@@ -65,37 +72,24 @@ runs:
6572
BENCHMARK_VERSION: ${{ inputs.version }}
6673
BENCHMARK_DATABASE: ${{ steps.prepare.outputs.database }}
6774
BENCHMARK_TRIES: ${{ steps.prepare.outputs.tries }}
75+
BENCHMARK_SOURCE: ${{ inputs.source }}
76+
BENCHMARK_SOURCE_ID: ${{ inputs.source_id }}
77+
BENCHMARK_SHA: ${{ inputs.sha }}
6878
CLOUD_USER: ${{ inputs.cloud_user }}
6979
CLOUD_PASSWORD: ${{ inputs.cloud_password }}
7080
CLOUD_GATEWAY: ${{ inputs.cloud_gateway }}
7181
CLOUD_WAREHOUSE: benchmark-${{ inputs.run_id }}
7282
shell: bash
7383
run: |
74-
./benchmark_cloud.sh
75-
76-
- name: Prepare Metadata
77-
working-directory: benchmark/clickbench
78-
shell: bash
79-
run: |
80-
case ${{ inputs.source }} in
81-
pr)
82-
yq -i '.system = "Databend(PR#${{ inputs.source_id }})"' result.json
83-
;;
84-
release)
85-
yq -i '.system = "Databend(Release@${{ inputs.source_id }})"' result.json
86-
;;
87-
*)
88-
echo "Unsupported benchmark source: ${{ inputs.source }}"
89-
exit 1
90-
esac
91-
yq -i '.comment = "commit: ${{ inputs.sha }}"' result.json
92-
mv result.json result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
84+
python3 benchmark_cloud.py
9385
9486
- name: Upload artifact
9587
uses: actions/upload-artifact@v4
9688
with:
9789
name: benchmark-${{ inputs.dataset }}-${{ inputs.size }}
98-
path: benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
90+
path: |
91+
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
92+
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-*.ndjson
9993
10094
- name: Remove warehouse
10195
if: always()

.github/workflows/reuse.benchmark.yml

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ jobs:
119119
cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
120120
cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
121121
cloud_gateway: ${{ secrets.BENCHMARK_CLOUD_GATEWAY }}
122+
database: load
122123
- name: clean
123124
if: always()
124125
continue-on-error: true
@@ -136,10 +137,11 @@ jobs:
136137
strategy:
137138
matrix:
138139
include:
139-
- { dataset: hits, size: Small }
140-
- { dataset: hits, size: Large }
141-
- { dataset: tpch, size: Small }
142-
- { dataset: tpch, size: Large }
140+
- { dataset: hits, size: Small, database: clickbench, timeout: 10 }
141+
- { dataset: hits, size: Large, database: clickbench, timeout: 10 }
142+
- { dataset: tpch, size: Small, database: clickbench, timeout: 20 }
143+
- { dataset: tpch, size: Large, database: clickbench, timeout: 20 }
144+
- { dataset: tpch1000, size: Large, database: tpch_1000, timeout: 60 }
143145
fail-fast: true
144146
max-parallel: 1
145147
steps:
@@ -154,7 +156,7 @@ jobs:
154156
- uses: ./.github/actions/setup_bendsql
155157
- uses: ./.github/actions/benchmark_cloud
156158
if: inputs.target == 'cloud' || inputs.target == 'all'
157-
timeout-minutes: 20
159+
timeout-minutes: ${{ matrix.timeout }}
158160
id: benchmark
159161
with:
160162
sha: ${{ inputs.sha }}
@@ -167,6 +169,7 @@ jobs:
167169
cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
168170
cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
169171
cloud_gateway: ${{ secrets.BENCHMARK_CLOUD_GATEWAY }}
172+
database: ${{ matrix.database }}
170173
- name: clean
171174
if: always()
172175
continue-on-error: true
@@ -191,11 +194,18 @@ jobs:
191194
pattern: benchmark-*
192195
merge-multiple: true
193196
- name: Get Report Prefix
197+
working-directory: benchmark/clickbench
194198
run: |
195-
for result in benchmark/clickbench/results/*.json; do
199+
shopt -s nullglob
200+
for result in results/*.json; do
196201
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
197-
mkdir -p benchmark/clickbench/results/${dataset}/
198-
mv $result benchmark/clickbench/results/${dataset}/$(basename $result)
202+
mkdir -p results/${dataset}/
203+
mv $result results/${dataset}/$(basename $result)
204+
done
205+
for ndjson in results/*.ndjson; do
206+
dataset=$(echo $ndjson | sed -E 's/.*result-(\w+)-.*\.ndjson/\1/')
207+
mkdir -p results/${dataset}/
208+
mv $ndjson results/${dataset}/$(basename $ndjson)
199209
done
200210
echo "REPORT_S3_PREFIX=s3://benchmark/clickbench/pr/${{ inputs.source_id }}/${{ inputs.run_id }}" >> $GITHUB_ENV
201211
- name: Upload PR clickbench result to R2
@@ -208,10 +218,12 @@ jobs:
208218
working-directory: benchmark/clickbench
209219
run: |
210220
echo -e "## ClickBench Report\n" > /tmp/body
221+
shopt -s nullglob
211222
for p in results/*; do
223+
[ -d "$p" ] || continue
212224
dataset=$(basename $p)
213225
aws s3 sync results/$dataset/ ${REPORT_S3_PREFIX}/ --include "*.json" --no-progress --checksum-algorithm=CRC32
214-
aws s3 sync "s3://benchmark/clickbench/release/${dataset}/latest/" ./results/${dataset}/ --exclude "*" --include "*.json"
226+
aws s3 sync "s3://benchmark/clickbench/release/${dataset}/latest/" ./results/${dataset}/ --exclude "*" --include "*.json" || true
215227
./update_results.py --dataset $dataset --pr ${{ inputs.source_id }}
216228
aws s3 cp ./results/${dataset}.html ${REPORT_S3_PREFIX}/${dataset}.html --no-progress --checksum-algorithm=CRC32
217229
echo "* **${dataset}**: https://benchmark.databend.com/clickbench/pr/${{ inputs.source_id }}/${{ inputs.run_id }}/${dataset}.html" >> /tmp/body
@@ -233,6 +245,7 @@ jobs:
233245
- "tpch"
234246
- "hits"
235247
- "load"
248+
- "tpch1000"
236249
# - "internal"
237250
steps:
238251
- uses: actions/checkout@v4
@@ -245,6 +258,20 @@ jobs:
245258
path: benchmark/clickbench/results
246259
pattern: benchmark-${{ matrix.dataset }}-*
247260
merge-multiple: true
261+
- name: Prepare results directory
262+
working-directory: benchmark/clickbench
263+
run: |
264+
shopt -s nullglob
265+
for result in results/*.json; do
266+
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
267+
mkdir -p results/${dataset}/
268+
mv $result results/${dataset}/$(basename $result)
269+
done
270+
for ndjson in results/*.ndjson; do
271+
dataset=$(echo $ndjson | sed -E 's/.*result-(\w+)-.*\.ndjson/\1/')
272+
mkdir -p results/${dataset}/
273+
mv $ndjson results/${dataset}/$(basename $ndjson)
274+
done
248275
- name: Generate report and upload to R2
249276
working-directory: benchmark/clickbench
250277
env:
@@ -253,12 +280,6 @@ jobs:
253280
AWS_DEFAULT_REGION: auto
254281
AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
255282
run: |
256-
for result in results/*.json; do
257-
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
258-
mkdir -p results/${dataset}/
259-
mv $result results/${dataset}/$(basename $result)
260-
done
261-
262283
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date --date='-1 month' -u +%Y)/$(date --date='-1 month' -u +%m)/ ./results/${{ matrix.dataset }}/
263284
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/ ./results/${{ matrix.dataset }}/
264285
./update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
@@ -271,3 +292,20 @@ jobs:
271292
done
272293
273294
aws s3 cp ./results/${{ matrix.dataset }}.html s3://benchmark/clickbench/release/${{ matrix.dataset }}.html --no-progress --checksum-algorithm=CRC32
295+
- name: Upload NDJSON archives to R2
296+
working-directory: benchmark/clickbench
297+
env:
298+
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
299+
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
300+
AWS_DEFAULT_REGION: auto
301+
AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
302+
DATASET: ${{ matrix.dataset }}
303+
run: |
304+
shopt -s nullglob
305+
MONTH=$(date -u +%Y-%m)
306+
ARCHIVE_PREFIX="s3://benchmark/results/${DATASET}/${MONTH}/"
307+
IMPORT_PREFIX="s3://benchmark/results/import/"
308+
for file in ./results/${DATASET}/*.ndjson; do
309+
aws s3 cp $file "${ARCHIVE_PREFIX}$(basename $file)" --no-progress --checksum-algorithm=CRC32
310+
aws s3 cp $file "${IMPORT_PREFIX}$(basename $file)" --no-progress --checksum-algorithm=CRC32
311+
done

Cargo.lock

Lines changed: 6 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ simple_hll = { version = "0.0.4", features = ["serde_borsh"] }
487487
simsearch = "0.2"
488488
siphasher = "0.3"
489489
sled = { version = "0.34", default-features = false }
490+
smallvec = "1.13.2"
490491
snailquote = "0.3.1"
491492
snap = "1"
492493
socket2 = "0.5.3"
@@ -548,6 +549,7 @@ cargo_metadata = "0.19"
548549
fast-float2 = "0.2.3"
549550
gix = "0.71.0"
550551
indent = "0.1.1"
552+
inventory = "0.3.15"
551553
logos = "0.12.1"
552554
nom = "8.0.0"
553555
nom-rule = "0.5.1"

0 commit comments

Comments
 (0)