Skip to content

Commit d70c269

Browse files
Fixed comparison ERROR on coordinator, and extended hash,bitmap, and list benchmarks (#270)
* Enabled running forks source built benchmarks * Fixed server_name Null check * Enabled passing baseline/comparison hash and github_repo to ensure proper data filtering on compare. Removed refs/heads/ usage from builder * skipping cli builder test on ci * Added --baseline-target-branch and --comparison-target-branch to the compare tool * Added GEOPOS and GEOSEARCH WITHCOORD new benchmarks * Included the connection setup benchmark using HELLO * Bumping version from 0.1.218 to 0.1.219 * Added APPEND/INCRBY/INCRBYFLOAT/SETRANGE benchmarks * Included APPEND,INCRBY,INCRBYFLOAT,SETRANGE pipeline 10 benchmarks * Added SETEX benchmark * Added ZUNION, ZUNIONSTORE, and extra ZADD benchmarks. Included SADD benchmarks with intset underlying encoding * Included extra pipeline 10 and lrange with longs benchmarks * Included HGETALL 50 fields use-case * Added HGETALL 50 fields use-case. 10Bytes and 100Bytes * Fixed comparison on CI * Added BITCOUNT benchmarks
1 parent e31755f commit d70c269

15 files changed

+401
-86
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "redis-benchmarks-specification"
3-
version = "0.1.226"
3+
version = "0.1.234"
44
description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute."
55
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
66
readme = "Readme.md"

redis_benchmarks_specification/__builder__/builder.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -370,16 +370,17 @@ def builder_process_stream(
370370
deps_list.append("fpconv")
371371
redis_temporary_dir = temporary_dir + "/" + redis_dir + "/"
372372
logging.info("Using redis temporary dir {}".format(redis_temporary_dir))
373-
build_command = "bash -c 'make Makefile.dep && cd ./deps && CXX={} CC={} make {} {} -j && cd .. && CXX={} CC={} make {} {} -j'".format(
374-
cpp_compiler,
375-
compiler,
376-
" ".join(deps_list),
377-
build_vars_str,
378-
cpp_compiler,
379-
compiler,
380-
"redis-server",
381-
build_vars_str,
382-
)
373+
# build_command = "bash -c 'make Makefile.dep && cd ./deps && CXX={} CC={} make {} {} -j && cd .. && CXX={} CC={} make {} {} -j'".format(
374+
# cpp_compiler,
375+
# compiler,
376+
# " ".join(deps_list),
377+
# build_vars_str,
378+
# cpp_compiler,
379+
# compiler,
380+
# "redis-server",
381+
# build_vars_str,
382+
# )
383+
build_command = "sh -c 'make -j'"
383384
if b"build_command" in testDetails:
384385
build_command = testDetails[b"build_command"].decode()
385386
server_name = "redis"

redis_benchmarks_specification/__compare__/compare.py

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ def compare_command_logic(args, project_name, project_version):
284284
(
285285
detected_regressions,
286286
table_output,
287-
total_improvements,
287+
improvements_list,
288288
regressions_list,
289289
total_stable,
290290
total_unstable,
@@ -332,6 +332,7 @@ def compare_command_logic(args, project_name, project_version):
332332
args.improvement_str,
333333
)
334334
total_regressions = len(regressions_list)
335+
total_improvements = len(improvements_list)
335336
prepare_regression_comment(
336337
auto_approve,
337338
baseline_branch,
@@ -359,6 +360,7 @@ def compare_command_logic(args, project_name, project_version):
359360
verbose,
360361
args.regressions_percent_lower_limit,
361362
regressions_list,
363+
improvements_list,
362364
)
363365
return (
364366
detected_regressions,
@@ -398,6 +400,7 @@ def prepare_regression_comment(
398400
verbose,
399401
regressions_percent_lower_limit,
400402
regressions_list=[],
403+
improvements_list=[],
401404
):
402405
if total_comparison_points > 0:
403406
comment_body = "### Automated performance analysis summary\n\n"
@@ -424,18 +427,27 @@ def prepare_regression_comment(
424427
comparison_summary += "- Detected a total of {} improvements above the improvement water line.\n".format(
425428
total_improvements
426429
)
430+
if len(improvements_list) > 0:
431+
regression_values = [l[1] for l in improvements_list]
432+
regression_df = pd.DataFrame(regression_values)
433+
median_regression = round(float(regression_df.median().iloc[0]), 1)
434+
max_regression = round(float(regression_df.max().iloc[0]), 1)
435+
min_regression = round(float(regression_df.min().iloc[0]), 1)
436+
437+
comparison_summary += f" - Median/Common-Case improvement was {median_regression}% and ranged from [{min_regression}%,{max_regression}%].\n"
438+
427439
if total_regressions > 0:
428440
comparison_summary += "- Detected a total of {} regressions bellow the regression water line {}.\n".format(
429441
total_regressions, regressions_percent_lower_limit
430442
)
431443
if len(regressions_list) > 0:
432444
regression_values = [l[1] for l in regressions_list]
433445
regression_df = pd.DataFrame(regression_values)
434-
median_regression = round(float(regression_df.median().iloc[0]), 2)
435-
max_regression = round(float(regression_df.max().iloc[0]), 2)
436-
min_regression = round(float(regression_df.min().iloc[0]), 2)
446+
median_regression = round(float(regression_df.median().iloc[0]), 1)
447+
max_regression = round(float(regression_df.max().iloc[0]), 1)
448+
min_regression = round(float(regression_df.min().iloc[0]), 1)
437449

438-
comparison_summary += f" - Median/Common-Case regression was {median_regression}%% and ranged from [{min_regression},{max_regression}] %%.\n"
450+
comparison_summary += f" - Median/Common-Case regression was {median_regression}% and ranged from [{min_regression}%,{max_regression}%].\n"
439451

440452
comment_body += comparison_summary
441453
comment_body += "\n"
@@ -542,7 +554,7 @@ def compute_regression_table(
542554
tf_triggering_env,
543555
metric_name,
544556
comparison_branch,
545-
baseline_branch="master",
557+
baseline_branch="unstable",
546558
baseline_tag=None,
547559
comparison_tag=None,
548560
baseline_deployment_name="oss-standalone",
@@ -704,8 +716,8 @@ def compute_regression_table(
704716
table_name="",
705717
headers=[
706718
"Test Case",
707-
"Baseline {} (median obs. +- std.dev)".format(baseline_str),
708-
"Comparison {} (median obs. +- std.dev)".format(comparison_str),
719+
f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
720+
f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
709721
"% change ({})".format(metric_mode),
710722
"Note",
711723
],
@@ -727,8 +739,8 @@ def compute_regression_table(
727739
table_name="",
728740
headers=[
729741
"Test Case",
730-
"Baseline {} (median obs. +- std.dev)".format(baseline_str),
731-
"Comparison {} (median obs. +- std.dev)".format(comparison_str),
742+
f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
743+
f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
732744
"% change ({})".format(metric_mode),
733745
"Note",
734746
],
@@ -737,7 +749,7 @@ def compute_regression_table(
737749
writer_regressions.dump(mystdout, False)
738750
table_output += mystdout.getvalue()
739751
table_output += "\n\n"
740-
test_names_str = "|".join(improvements_list)
752+
test_names_str = "|".join([l[0] for l in improvements_list])
741753
table_output += f"Improvements test regexp names: {test_names_str}\n\n"
742754
mystdout.close()
743755
sys.stdout = old_stdout
@@ -748,8 +760,8 @@ def compute_regression_table(
748760
table_name="",
749761
headers=[
750762
"Test Case",
751-
"Baseline {} (median obs. +- std.dev)".format(baseline_str),
752-
"Comparison {} (median obs. +- std.dev)".format(comparison_str),
763+
f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
764+
f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
753765
"% change ({})".format(metric_mode),
754766
"Note",
755767
],
@@ -766,7 +778,7 @@ def compute_regression_table(
766778
return (
767779
detected_regressions,
768780
table_output,
769-
total_improvements,
781+
improvements_list,
770782
regressions_list,
771783
total_stable,
772784
total_unstable,
@@ -1098,7 +1110,12 @@ def from_rts_to_regression_table(
10981110
logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
10991111
pass
11001112
unstable = False
1101-
if baseline_v != "N/A" and comparison_v != "N/A":
1113+
if (
1114+
baseline_v != "N/A"
1115+
and comparison_pct_change != "N/A"
1116+
and comparison_v != "N/A"
1117+
and baseline_pct_change != "N/A"
1118+
):
11021119
if comparison_pct_change > 10.0 or baseline_pct_change > 10.0:
11031120
note = "UNSTABLE (very high variance)"
11041121
unstable = True
@@ -1119,6 +1136,10 @@ def from_rts_to_regression_table(
11191136
percentage_change = (
11201137
float(baseline_v) / float(comparison_v) - 1
11211138
) * 100.0
1139+
else:
1140+
logging.warn(
1141+
f"Missing data for test {test_name}. baseline_v={baseline_v} (pct_change={baseline_pct_change}), comparison_v={comparison_v} (pct_change={comparison_pct_change}) "
1142+
)
11221143
if baseline_v != "N/A" or comparison_v != "N/A":
11231144
detected_regression = False
11241145
detected_improvement = False
@@ -1170,7 +1191,7 @@ def from_rts_to_regression_table(
11701191
table_regressions.append(line)
11711192

11721193
if detected_improvement:
1173-
improvements_list.append(test_name)
1194+
improvements_list.append([test_name, percentage_change])
11741195
table_improvements.append(line)
11751196

11761197
if unstable:
@@ -1325,23 +1346,25 @@ def get_v_pct_change_and_largest_var(
13251346
_, comparison_v = comparison_datapoints[0]
13261347
for tuple in comparison_datapoints:
13271348
if last_n < 0 or (last_n > 0 and len(comparison_values) < last_n):
1328-
comparison_values.append(tuple[1])
1329-
comparison_df = pd.DataFrame(comparison_values)
1330-
comparison_median = float(comparison_df.median().iloc[0])
1331-
comparison_v = comparison_median
1332-
comparison_std = float(comparison_df.std().iloc[0])
1333-
if verbose:
1334-
logging.info(
1335-
"comparison_datapoints: {} value: {}; std-dev: {}; median: {}".format(
1336-
comparison_datapoints,
1337-
comparison_v,
1338-
comparison_std,
1339-
comparison_median,
1349+
if tuple[1] > 0.0:
1350+
comparison_values.append(tuple[1])
1351+
if len(comparison_values) > 0:
1352+
comparison_df = pd.DataFrame(comparison_values)
1353+
comparison_median = float(comparison_df.median().iloc[0])
1354+
comparison_v = comparison_median
1355+
comparison_std = float(comparison_df.std().iloc[0])
1356+
if verbose:
1357+
logging.info(
1358+
"comparison_datapoints: {} value: {}; std-dev: {}; median: {}".format(
1359+
comparison_datapoints,
1360+
comparison_v,
1361+
comparison_std,
1362+
comparison_median,
1363+
)
13401364
)
1341-
)
1342-
comparison_pct_change = (comparison_std / comparison_median) * 100.0
1343-
if comparison_pct_change > largest_variance:
1344-
largest_variance = comparison_pct_change
1365+
comparison_pct_change = (comparison_std / comparison_median) * 100.0
1366+
if comparison_pct_change > largest_variance:
1367+
largest_variance = comparison_pct_change
13451368
return comparison_pct_change, comparison_v, largest_variance
13461369

13471370

redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,7 +1374,7 @@ def process_self_contained_coordinator_stream(
13741374
(
13751375
detected_regressions,
13761376
table_output,
1377-
total_improvements,
1377+
improvement_list,
13781378
regressions_list,
13791379
total_stable,
13801380
total_unstable,
@@ -1387,8 +1387,8 @@ def process_self_contained_coordinator_stream(
13871387
metric_name,
13881388
comparison_branch,
13891389
baseline_branch,
1390-
baseline_tag,
1391-
comparison_tag,
1390+
None, # we only compare by branch on CI automation
1391+
None, # we only compare by branch on CI automation
13921392
baseline_deployment_name,
13931393
comparison_deployment_name,
13941394
print_improvements_only,
@@ -1410,37 +1410,44 @@ def process_self_contained_coordinator_stream(
14101410
running_platform,
14111411
)
14121412
total_regressions = len(regressions_list)
1413+
total_improvements = len(improvement_list)
14131414
auto_approve = True
14141415
grafana_link_base = "https://benchmarksredisio.grafana.net/d/1fWbtb7nz/experimental-oss-spec-benchmarks"
1415-
1416-
prepare_regression_comment(
1417-
auto_approve,
1418-
baseline_branch,
1419-
baseline_tag,
1420-
comparison_branch,
1421-
comparison_tag,
1422-
contains_regression_comment,
1423-
github_pr,
1424-
grafana_link_base,
1425-
is_actionable_pr,
1426-
old_regression_comment_body,
1427-
pr_link,
1428-
regression_comment,
1429-
datasink_conn,
1430-
running_platform,
1431-
table_output,
1432-
tf_github_org,
1433-
tf_github_repo,
1434-
tf_triggering_env,
1435-
total_comparison_points,
1436-
total_improvements,
1437-
total_regressions,
1438-
total_stable,
1439-
total_unstable,
1440-
verbose,
1441-
regressions_percent_lower_limit,
1442-
regressions_list,
1443-
)
1416+
try:
1417+
prepare_regression_comment(
1418+
auto_approve,
1419+
baseline_branch,
1420+
baseline_tag,
1421+
comparison_branch,
1422+
comparison_tag,
1423+
contains_regression_comment,
1424+
github_pr,
1425+
grafana_link_base,
1426+
is_actionable_pr,
1427+
old_regression_comment_body,
1428+
pr_link,
1429+
regression_comment,
1430+
datasink_conn,
1431+
running_platform,
1432+
table_output,
1433+
tf_github_org,
1434+
tf_github_repo,
1435+
tf_triggering_env,
1436+
total_comparison_points,
1437+
total_improvements,
1438+
total_regressions,
1439+
total_stable,
1440+
total_unstable,
1441+
verbose,
1442+
regressions_percent_lower_limit,
1443+
regressions_list,
1444+
)
1445+
except Exception as e:
1446+
logging.error(
1447+
"Failed to produce regression comment but continuing... Error: {}".format(
1448+
e.__str__()
1449+
)
1450+
)
14441451
logging.info(
14451452
f"Added test named {test_name} to the completed test list in key {stream_test_list_completed}"
14461453
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
version: 0.4
2+
name: memtier_benchmark-100Kkeys-hash-hgetall-50-fields-100B-values
3+
description: Runs memtier_benchmark, for a keyspace length of 1M keys pre-loading HASHes in which the value has a data size of 10 Bytes. After pre-loading the data it issues HGETALL command.
4+
dbconfig:
5+
configuration-parameters:
6+
save: '""'
7+
check:
8+
keyspacelen: 100000
9+
preload_tool:
10+
run_image: redislabs/memtier_benchmark:edge
11+
tool: memtier_benchmark
12+
arguments: '"--data-size" "100" --command "HSET __key__ field:1 __data__ field:2 __data__ field:3 __data__ field:4 __data__ field:5 __data__ field:6 __data__ field:7 __data__ field:8 __data__ field:9 __data__ field:10 __data__ field:11 __data__ field:12 __data__ field:13 __data__ field:14 __data__ field:15 __data__ field:16 __data__ field:17 __data__ field:18 __data__ field:19 __data__ field:20 __data__ field:21 __data__ field:22 __data__ field:23 __data__ field:24 __data__ field:25 __data__ field:26 __data__ field:27 __data__ field:28 __data__ field:29 __data__ field:30 __data__ field:31 __data__ field:32 __data__ field:33 __data__ field:34 __data__ field:35 __data__ field:36 __data__ field:37 __data__ field:38 __data__ field:39 __data__ field:40 __data__ field:41 __data__ field:42 __data__ field:43 __data__ field:44 __data__ field:45 __data__ field:46 __data__ field:47 __data__ field:48 __data__ field:49 __data__ field:50 __data__" --command-key-pattern="P" --key-minimum=1 --key-maximum 1000000 -n 500 -c 50 -t 4 --hide-histogram'
13+
resources:
14+
requests:
15+
memory: 2g
16+
tested-groups:
17+
- hash
18+
tested-commands:
19+
- hgetall
20+
redis-topologies:
21+
- oss-standalone
22+
build-variants:
23+
- gcc:8.5.0-amd64-debian-buster-default
24+
- dockerhub
25+
clientconfig:
26+
run_image: redislabs/memtier_benchmark:edge
27+
tool: memtier_benchmark
28+
arguments: ' --command "HGETALL __key__" --command-key-pattern="R" --key-minimum=1 --key-maximum 100000 --test-time 120 -c 50 -t 4 --hide-histogram'
29+
resources:
30+
requests:
31+
cpus: '4'
32+
memory: 2g
33+
34+
priority: 96

0 commit comments

Comments
 (0)