Skip to content

Commit 21dcf46

Browse files
committed
Run PR gpu utests/relvals on both CUDA and ROCm GPUs
1 parent e981d12 commit 21dcf46

File tree

6 files changed

+65
-36
lines changed

6 files changed

+65
-36
lines changed

cleanup-cmssdt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ DIRS="lizard flawfinder invalid-includes cmssw-afs-eos-comparison ubsan_logs ib-
5151
DIRS="${DIRS} check_headers valgrind HLT-Validation ib-static-analysis ib-baseline-tests ib-dqm-tests profiling igprof"
5252
DIRS="${DIRS} iwyu material-budget das_query build-any-ib check-unused-cmsdist-packages class_versions"
5353
DIRS="${DIRS} test-os-alma8 test-os-cs8 test-os-ubi8 test-os-lxplus8 test-os-rhel8 test-os-rocky8 test-os-el8 cms-containers-run-cmssw-test"
54+
DIRS="${DIRS} baseLineComparisonsCUDA baseLineComparisonsROCM"
5455
for dir in ${DIRS}; do
5556
[ -d ${JENKINS_ARTIFACTS}/$dir ] || continue
5657
DIRS_PROCESSED="${DIRS_PROCESSED} ${dir}"

pr_testing/run-pr-relvals.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ echo "${MATRIX_ARGS}" | tr ';' '\n' | while IFS= read -r args; do
1717
if [ $(echo "${args}" | sed 's|.*-l ||;s| .*||' | tr ',' '\n' | grep '^all$' | wc -l) -gt 0 ] ; then
1818
OPTS=""
1919
case "${TEST_FLAVOR}" in
20-
gpu ) OPTS="-w gpu" ;;
20+
cuda | rocm ) OPTS="-w gpu" ;;
2121
high_stats ) ;;
2222
nano ) OPTS="-w nano" ;;
2323
* ) ;;

pr_testing/run-pr-unittests.sh

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -10,57 +10,58 @@ cd $WORKSPACE/${CMSSW_VERSION}
1010
CMSSW_PKG_COUNT=$(ls -d $LOCALRT/src/*/* | wc -l)
1111
REPORT_OPTS="--report-url ${PR_RESULT_URL} $NO_POST"
1212

13-
rm -f ${RESULTS_DIR}/unittestGPU.txt
14-
mark_commit_status_all_prs 'unittests/gpu' 'pending' -u "${BUILD_URL}" -d "Running tests" || true
13+
rm -f ${RESULTS_DIR}/unittest${GPU_FLAVOR}.txt
14+
mark_commit_status_all_prs "unittests/${GPU_FLAVOR}" 'pending' -u "${BUILD_URL}" -d "Running tests" || true
1515
echo '--------------------------------------'
16-
mkdir -p $WORKSPACE/gpuUnitTests
16+
mkdir -p $WORKSPACE/${GPU_FLAVOR}UnitTests
1717
let UT_TIMEOUT=7200+${CMSSW_PKG_COUNT}*20
18-
UTESTS_CMD="USER_UNIT_TESTS=cuda timeout ${UT_TIMEOUT} scram b -v -k -j ${NCPU} unittests "
18+
gpu_t_lc=$(echo ${GPU_T} | tr '[A-Z]' '[a-z]')
19+
UTESTS_CMD="USER_UNIT_TESTS=${gpu_t_lc} timeout ${UT_TIMEOUT} scram b -v -k -j ${NCPU} unittests "
1920
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
2021
scram build echo_LD_LIBRARY_PATH || true
2122
scram build -r echo_CXX || true
2223
cms_major=$(echo ${CMSSW_IB} | cut -d_ -f2)
2324
cms_minor=$(echo ${CMSSW_IB} | cut -d_ -f3)
2425
cms_ver="$(echo 00${cms_major} | sed -E 's|^.*(..)$|\1|')$(echo 00${cms_minor} | sed -E 's|^.*(..)$|\1|')"
25-
echo $UTESTS_CMD > $WORKSPACE/gpuUnitTests/log.txt
26-
(eval $UTESTS_CMD && echo 'ALL_OK') > $WORKSPACE/gpuUnitTests/log.txt 2>&1 || true
26+
echo $UTESTS_CMD > $WORKSPACE/${GPU_FLAVOR}UnitTests/log.txt
27+
(eval $UTESTS_CMD && echo 'ALL_OK') > $WORKSPACE/${GPU_FLAVOR}UnitTests/log.txt 2>&1 || true
2728
echo 'END OF UNIT TESTS'
2829
echo '--------------------------------------'
2930

30-
TEST_ERRORS=$(grep -ai 'had errors\|recipe for target' $WORKSPACE/gpuUnitTests/log.txt | sed "s|'||g;s|.*recipe for target *||;s|.*unittests_|---> test |;s| failed$| timeout|" || true)
31-
TEST_ERRORS=`grep -ai "had errors" $WORKSPACE/gpuUnitTests/log.txt` || true
32-
GENERAL_ERRORS=`grep -a "ALL_OK" $WORKSPACE/gpuUnitTests/log.txt` || true
31+
TEST_ERRORS=$(grep -ai 'had errors\|recipe for target' $WORKSPACE/${GPU_FLAVOR}UnitTests/log.txt | sed "s|'||g;s|.*recipe for target *||;s|.*unittests_|---> test |;s| failed$| timeout|" || true)
32+
TEST_ERRORS=`grep -ai "had errors" $WORKSPACE/${GPU_FLAVOR}UnitTests/log.txt` || true
33+
GENERAL_ERRORS=`grep -a "ALL_OK" $WORKSPACE/${GPU_FLAVOR}UnitTests/log.txt` || true
3334

3435
if [ "X$TEST_ERRORS" != "X" -o "X$GENERAL_ERRORS" = "X" ]; then
35-
echo "Errors in the gpu unit tests"
36-
echo 'GPU_UNIT_TEST_RESULTS;ERROR,GPU Unit Tests,See Log,gpuUnitTests' >> ${RESULTS_DIR}/unittestGPU.txt
36+
echo "Errors in the ${GPU_FLAVOR} unit tests"
37+
echo "${GPU_FLAVOR}_UNIT_TEST_RESULTS;ERROR,GPU Unit Tests,See Log,${GPU_FLAVOR}UnitTests" >> ${RESULTS_DIR}/unittest${GPU_FLAVOR}.txt
3738
ALL_OK=false
3839
UNIT_TESTS_OK=false
39-
$CMS_BOT_DIR/report-pull-request-results PARSE_GPU_UNIT_TESTS_FAIL -f $WORKSPACE/gpuUnitTests/log.txt --report-file ${RESULTS_DIR}/14-unittestGPU-report.res ${REPORT_OPTS}
40-
echo "GpuUnitTests" > ${RESULTS_DIR}/14-failed.res
40+
$CMS_BOT_DIR/report-pull-request-results PARSE_${GPU_FLAVOR}_UNIT_TESTS_FAIL -f $WORKSPACE/${GPU_FLAVOR}UnitTests/log.txt --report-file ${RESULTS_DIR}/14-unittest${GPU_FLAVOR}-report.res ${REPORT_OPTS}
41+
echo "${GPU_FLAVOR}UnitTests" > ${RESULTS_DIR}/14-failed.res
4142
else
42-
echo 'GPU_UNIT_TEST_RESULTS;OK,GPU Unit Tests,See Log,gpuUnitTests' >> ${RESULTS_DIR}/unittestGPU.txt
43+
echo "${GPU_FLAVOR}_UNIT_TEST_RESULTS;OK,GPU Unit Tests,See Log,${GPU_FLAVOR}UnitTests" >> ${RESULTS_DIR}/unittest${GPU_FLAVOR}.txt
4344
fi
44-
echo "<html><head></head><body>" > $WORKSPACE/gpuUnitTests/success.html
45-
cp $WORKSPACE/gpuUnitTests/success.html $WORKSPACE/gpuUnitTests/failed.html
45+
echo "<html><head></head><body>" > $WORKSPACE/${GPU_FLAVOR}UnitTests/success.html
46+
cp $WORKSPACE/${GPU_FLAVOR}UnitTests/success.html $WORKSPACE/${GPU_FLAVOR}UnitTests/failed.html
4647
UT_ERR=false
4748
utlog="testing.log"
4849
for t in $(find $WORKSPACE/$CMSSW_IB/tmp/${SCRAM_ARCH}/src -name ${utlog} -type f | sed "s|$WORKSPACE/$CMSSW_IB/tmp/${SCRAM_ARCH}/||;s|/${utlog}$||") ; do
49-
mkdir -p $WORKSPACE/gpuUnitTests/${t}
50-
mv $WORKSPACE/$CMSSW_IB/tmp/${SCRAM_ARCH}/${t}/${utlog} $WORKSPACE/gpuUnitTests/${t}/
51-
if [ $(grep -a '^\-\-\-> test *[^ ]* *succeeded$' $WORKSPACE/gpuUnitTests/${t}/${utlog} | wc -l) -gt 0 ] ; then
52-
echo "<a href='${t}/${utlog}'>${t}</a><br/>" >> $WORKSPACE/gpuUnitTests/success.html
50+
mkdir -p $WORKSPACE/${GPU_FLAVOR}UnitTests/${t}
51+
mv $WORKSPACE/$CMSSW_IB/tmp/${SCRAM_ARCH}/${t}/${utlog} $WORKSPACE/${GPU_FLAVOR}UnitTests/${t}/
52+
if [ $(grep -a '^\-\-\-> test *[^ ]* *succeeded$' $WORKSPACE/${GPU_FLAVOR}UnitTests/${t}/${utlog} | wc -l) -gt 0 ] ; then
53+
echo "<a href='${t}/${utlog}'>${t}</a><br/>" >> $WORKSPACE/${GPU_FLAVOR}UnitTests/success.html
5354
else
54-
echo "<a href='${t}/${utlog}'>${t}</a><br/>" >> $WORKSPACE/gpuUnitTests/failed.html
55+
echo "<a href='${t}/${utlog}'>${t}</a><br/>" >> $WORKSPACE/${GPU_FLAVOR}UnitTests/failed.html
5556
UT_ERR=true
5657
fi
5758
done
58-
if ! $UT_ERR ; then echo "No unit test failed" >> $WORKSPACE/gpuUnitTests/failed.html ; fi
59-
echo "</body></html>" >> $WORKSPACE/gpuUnitTests/success.html
60-
echo "</body></html>" >> $WORKSPACE/gpuUnitTests/failed.html
59+
if ! $UT_ERR ; then echo "No unit test failed" >> $WORKSPACE/${GPU_FLAVOR}UnitTests/failed.html ; fi
60+
echo "</body></html>" >> $WORKSPACE/${GPU_FLAVOR}UnitTests/success.html
61+
echo "</body></html>" >> $WORKSPACE/${GPU_FLAVOR}UnitTests/failed.html
6162
prepare_upload_results
6263
if $UNIT_TESTS_OK ; then
63-
mark_commit_status_all_prs 'unittests/gpu' 'success' -u "${BUILD_URL}" -d "Passed"
64+
mark_commit_status_all_prs "unittests/${GPU_FLAVOR}" 'success' -u "${BUILD_URL}" -d "Passed"
6465
else
65-
mark_commit_status_all_prs 'unittests/gpu' 'error' -u "${BUILD_URL}" -d "Some unit tests were failed."
66+
mark_commit_status_all_prs "unittests/${GPU_FLAVOR}" 'error' -u "${BUILD_URL}" -d "Some unit tests were failed."
6667
fi

pr_testing/test_multiple_prs.sh

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash -ex
1+
#!/bin/bash -ex
22
# This script will be called by Jenkins job 'ib-run-pr-tests'
33
# and
44
# 1) will merge multiple PRs for multiple repos
@@ -161,6 +161,7 @@ if [ $(echo "${CONFIG_LINE}" | grep "PROD_ARCH=1" | wc -l) -gt 0 ] ; then
161161
fi
162162
fi
163163
fi
164+
ALL_GPU_TYPES=("cuda" "rocm")
164165

165166
# ----------
166167
# -- MAIN --
@@ -379,6 +380,16 @@ if $DO_COMPARISON ; then
379380
grep -v '^\(WORKFLOWS\|MATRIX_ARGS\)=' run-baseline-${BUILD_ID}-01.${ex_type_lc} > run-baseline-${BUILD_ID}-02.${ex_type_lc}
380381
echo "WORKFLOWS=-l ${WF_LIST}" >> run-baseline-${BUILD_ID}-02.${ex_type_lc}
381382
echo "MATRIX_ARGS=${WF_ARGS}" >> run-baseline-${BUILD_ID}-02.${ex_type_lc}
383+
if [ X"${ex_type_lc}" = X"gpu" ]; then
384+
for GPU_T in ${ALL_GPU_TYPES[@]}; do
385+
cp run-baseline-${BUILD_ID}-01.${ex_type_lc} run-baseline-${BUILD_ID}-01.${GPU_T}
386+
sed -i -e "s/TEST_FLAVOR=gpu/TEST_FLAVOR=${GPU_T}/g" run-baseline-${BUILD_ID}-01.${GPU_T}
387+
388+
cp run-baseline-${BUILD_ID}-02.${ex_type_lc} run-baseline-${BUILD_ID}-02.${GPU_T}
389+
sed -i -e "s/TEST_FLAVOR=gpu/TEST_FLAVOR=${GPU_T}/g" run-baseline-${BUILD_ID}-02.${GPU_T}
390+
done
391+
rm run-baseline-${BUILD_ID}-01.${ex_type_lc} run-baseline-${BUILD_ID}-02.${ex_type_lc}
392+
fi
382393
done
383394
popd
384395
send_jenkins_artifacts $WORKSPACE/ib-baseline-tests/ ib-baseline-tests/
@@ -1314,7 +1325,9 @@ if [ "X$BUILD_OK" = Xtrue -a "$RUN_TESTS" = "true" ]; then
13141325
fi
13151326
if [ $(echo ${ENABLE_BOT_TESTS} | tr ',' ' ' | tr ' ' '\n' | grep '^GPU$' | wc -l) -gt 0 -a X"${DISABLE_GPU_TESTS}" != X"true" ] ; then
13161327
DO_GPU_TESTS=true
1317-
mark_commit_status_all_prs 'unittests/gpu' 'pending' -u "${BUILD_URL}" -d "Waiting for tests to start"
1328+
for GPU_T in ${ALL_GPU_TYPES[@]} ; do
1329+
mark_commit_status_all_prs 'unittests/${GPU_T}' 'pending' -u "${BUILD_URL}" -d "Waiting for tests to start"
1330+
done
13181331
fi
13191332
if [ $(echo ${ENABLE_BOT_TESTS} | tr ',' ' ' | tr ' ' '\n' | grep '^HLT_P2_TIMING$' | wc -l) -gt 0 ] ; then
13201333
if [ $(echo ${ARCHITECTURE} | grep "_amd64_" | wc -l) -gt 0 ] ; then
@@ -1456,6 +1469,12 @@ if [ "X$DO_SHORT_MATRIX" = Xtrue ]; then
14561469
ex_type_lc=$(echo ${ex_type} | tr '[A-Z]' '[a-z]')
14571470
grep -v '^MATRIX_ARGS=' $WORKSPACE/run-relvals.prop > $WORKSPACE/run-relvals-${ex_type_lc}.prop
14581471
echo "MATRIX_ARGS=$(get_pr_relval_args $DO_COMPARISON _${ex_type})" >> $WORKSPACE/run-relvals-${ex_type_lc}.prop
1472+
if [ "${ex_type_lc}" = "gpu" ]; then
1473+
for GPU_T in ${ALL_GPU_TYPES[@]}; do
1474+
cp $WORKSPACE/run-relvals-${ex_type_lc}.prop $WORKSPACE/run-relvals-${GPU_T}.prop
1475+
done
1476+
rm $WORKSPACE/run-relvals-${ex_type_lc}.prop
1477+
fi
14591478
done
14601479
if [ $(runTheMatrix.py --help | grep '^ *--maxSteps' | wc -l) -eq 0 ] ; then
14611480
mark_commit_status_all_prs "relvals/input" 'success' -u "${BUILD_URL}" -d "Not ran, runTheMatrix does not support --maxSteps flag" -e
@@ -1487,7 +1506,9 @@ if [ "X$DO_ADDON_TESTS" = Xtrue ]; then
14871506
fi
14881507

14891508
if [ "X$DO_GPU_TESTS" = Xtrue ]; then
1490-
cp $WORKSPACE/test-env.txt $WORKSPACE/run-unittests.prop
1509+
for GPU_T in ${ALL_GPU_TYPES[@]}; do
1510+
cp $WORKSPACE/test-env.txt $WORKSPACE/run-unittests-${GPU_T}.prop
1511+
done
14911512
fi
14921513

14931514
if ${BUILD_EXTERNAL} ; then
@@ -1498,7 +1519,7 @@ fi
14981519

14991520
if [ "${DO_PROFILING}" = "true" ] ; then
15001521
PROFILING_WORKFLOWS=$($CMS_BOT_DIR/cmssw-pr-test-config _PROFILING | tr ',' ' ')
1501-
for wf in ${PROFILING_WORKFLOWS};do
1522+
for wf in ${PROFILING_WORKFLOWS}; do
15021523
cp $WORKSPACE/test-env.txt $WORKSPACE/run-profiling-$wf.prop
15031524
echo "PROFILING_WORKFLOWS=${wf}" >> $WORKSPACE/run-profiling-$wf.prop
15041525
done

report-pull-request-results.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def read_unit_tests_file(unit_tests_file):
411411
send_message_pr(message)
412412

413413

414-
def read_gpu_tests_file(unit_tests_file):
414+
def read_gpu_tests_file(unit_tests_file, gpu_flavor="GPU"):
415415
errors_found = ""
416416
err_cnt = 0
417417
for line in openlog(unit_tests_file):
@@ -423,8 +423,8 @@ def read_gpu_tests_file(unit_tests_file):
423423
continue
424424
errors_found += line
425425
message = (
426-
"\n## GPU Unit Tests\n\nI found %s errors in the following unit tests:\n\n<pre>%s</pre>"
427-
% (err_cnt, errors_found)
426+
"\n## %s Unit Tests\n\nI found %s errors in the following unit tests:\n\n<pre>%s</pre>"
427+
% (gpu_flavor, err_cnt, errors_found)
428428
)
429429
send_message_pr(message)
430430

@@ -606,6 +606,8 @@ def complain_missing_param(param_name):
606606
GITLOG_FILE_BASE_URL = "%s/git-recent-commits.json" % options.report_url
607607
GIT_CMS_MERGE_TOPIC_BASE_URL = "%s/git-merge-result" % options.report_url
608608

609+
ACTION = ACTION.upper()
610+
609611
if ACTION == "GET_BASE_MESSAGE":
610612
get_base_message()
611613
elif ACTION == "PARSE_UNIT_TESTS_FAIL":
@@ -630,6 +632,10 @@ def complain_missing_param(param_name):
630632
read_material_budget_log_file(options.unit_tests_file)
631633
elif ACTION == "MERGE_COMMITS":
632634
add_to_report(get_recent_merges_message())
635+
elif ACTION == "PARSE_CUDA_UNIT_TESTS_FAIL":
636+
read_gpu_tests_file(options.unit_tests_file, "CUDA")
637+
elif ACTION == "PARSE_ROCM_UNIT_TESTS_FAIL":
638+
read_gpu_tests_file(options.unit_tests_file, "ROCm")
633639
elif ACTION == "PARSE_GPU_UNIT_TESTS_FAIL":
634640
read_gpu_tests_file(options.unit_tests_file)
635641
else:

run-ib-pr-matrix.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ if [ "${CHECK_WORKFLOWS}" = "true" ] ; then
99
send_jenkins_artifacts ${WORKSPACE}/workflows-${BUILD_ID}.log ${ARTIFACT_DIR}/workflows-${BUILD_ID}.log
1010
OPTS=""
1111
case "${TEST_FLAVOR}" in
12-
gpu ) OPTS="-w gpu" ;;
12+
cuda | rocm ) OPTS="-w gpu" ;;
1313
high_stats ) ;;
1414
nano ) OPTS="-w nano" ;;
1515
* ) ;;
@@ -49,7 +49,7 @@ pushd "$WORKSPACE/matrix-results"
4949
CMD_OPTS=""
5050
if ${PRODUCTION_RELEASE} && cmsDriver.py --help | grep -q '\-\-maxmem_profile' ; then CMD_OPTS="--maxmem_profile" ; fi
5151
case "${TEST_FLAVOR}" in
52-
gpu ) MATRIX_ARGS="-w gpu ${MATRIX_ARGS}" ;;
52+
cuda | rocm ) MATRIX_ARGS="-w gpu ${MATRIX_ARGS}" ;;
5353
high_stats ) CMD_OPTS="-n 500" ; MATRIX_ARGS="-i all ${MATRIX_ARGS}" ;;
5454
threading ) MATRIX_ARGS="-i all -t 4 ${MATRIX_ARGS}" ; let NJOBS=(${NJOBS}/4)+1 ;;
5555
nano ) MATRIX_ARGS="-w nano -i all ${MATRIX_ARGS}" ;;

0 commit comments

Comments
 (0)