Skip to content

Commit f5812d8

Browse files
committed
Update benchmark annotations
And stop benchmarking the Generator::State re-use, as it no longer make a sizeable difference.
1 parent 3f950f2 commit f5812d8

File tree

2 files changed

+21
-26
lines changed

2 files changed

+21
-26
lines changed

benchmark/encoder.rb

+11-15
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
def implementations(ruby_obj)
1818
state = JSON::State.new(JSON.dump_default_options)
1919
{
20-
json_state: ["json (reuse)", proc { state.generate(ruby_obj) }],
2120
json: ["json", proc { JSON.generate(ruby_obj) }],
2221
oj: ["oj", proc { Oj.dump(ruby_obj) }],
2322
}
@@ -58,27 +57,24 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
5857
# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
5958

6059
# On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the
61-
# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster
60+
# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10-20% faster
6261
# than `Oj.dump`.
6362
benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]]
6463
benchmark_encoding "small nested array", [[1,2,3,4,5]]*10
65-
66-
# On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why
67-
# Hash serialization doesn't perform as well as other types.
6864
benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }
6965

70-
# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing
71-
# with mostly multi-byte characters. This is a tradeoff.
72-
benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state)
73-
benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state)
66+
# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~50% slower when dealing
67+
# with mostly multi-byte characters. There's likely some gains left to be had in multi-byte handling.
68+
benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500)
69+
benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500)
7470

7571
# On these benchmarks we perform well, we're on par or better.
7672
benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state)
77-
benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json"), except: %i(json_state)
78-
benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state)
73+
benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json")
74+
benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json")
7975

80-
# On twitter.json we're still about 10% slower, this is worth investigating.
81-
benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state)
76+
# On twitter.json we're still about 6% slower, this is worth investigating.
77+
benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json")
8278

8379
# This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation
8480
# which uses a relatively old version of dtoa.c from David M. Gay.
@@ -89,8 +85,8 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
8985
# but all these are implemented in C++11 or newer, making it hard if not impossible to include them.
9086
# Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match
9187
# Oj speed without losing precision.
92-
benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state)
88+
benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false
9389

9490
# We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be
9591
# opportunities here.
96-
benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state)
92+
benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20

benchmark/parser.rb

+10-11
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,26 @@ def benchmark_parsing(name, json_output)
2828

2929
# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
3030

31-
# Oj::Parser is very significanly faster (1.80x) on the nested array benchmark.
32-
benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10)
33-
34-
# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part because its
31+
# Oj::Parser is significanly faster (~1.3x) on the next 3 micro-benchmarks in large part because its
3532
# cache is persisted across calls. That's not something we can do with the current API, we'd
3633
# need to expose a stateful API as well, but that's no really desirable.
37-
# Other than that we're faster than regular `Oj.load` by a good margin.
34+
# Other than that we're faster than regular `Oj.load` by a good margin (between 1.3x and 2.4x).
35+
benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10)
3836
benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" })
39-
4037
benchmark_parsing "test from oj", <<JSON
41-
{"a":"Alpha","b":true,"c":12345,"d":[true,[false,[-123456789,null],3.9676,["Something else.",false],null]],"e":{"zero":null,"one":1,"two":2,"three":[3],"four":[0,1,2,3,4]},"f":null,"h":{"a":{"b":{"c":{"d":{"e":{"f":{"g":null}}}}}}},"i":[[[[[[[null]]]]]]]}
38+
{"a":"Alpha","b":true,"c":12345,"d":[true,[false,[-123456789,null],3.9676,["Something else.",false],null]],
39+
"e":{"zero":null,"one":1,"two":2,"three":[3],"four":[0,1,2,3,4]},"f":null,
40+
"h":{"a":{"b":{"c":{"d":{"e":{"f":{"g":null}}}}}}},"i":[[[[[[[null]]]]]]]}
4241
JSON
4342

44-
# On these macro-benchmarks, we're on par with `Oj::Parser` and significantly
45-
# faster than `Oj.load`.
43+
# On these macro-benchmarks, we're on par with `Oj::Parser`, except `twitter.json` where we're `1.14x` faster,
44+
# And between 1.3x and 1.5x faster than `Oj.load`.
4645
benchmark_parsing "activitypub.json", File.read("#{__dir__}/data/activitypub.json")
4746
benchmark_parsing "twitter.json", File.read("#{__dir__}/data/twitter.json")
4847
benchmark_parsing "citm_catalog.json", File.read("#{__dir__}/data/citm_catalog.json")
4948

50-
# rapidjson is 8x faster thanks to it's much more performant float parser.
49+
# rapidjson is 8x faster thanks to its much more performant float parser.
5150
# Unfortunately, there isn't a lot of existing fast float parsers in pure C,
5251
# and including C++ is problematic.
53-
# Aside from that, we're much faster than other alternatives here.
52+
# Aside from that, we're close to the alternatives here.
5453
benchmark_parsing "float parsing", File.read("#{__dir__}/data/canada.json")

0 commit comments

Comments
 (0)