Skip to content

Commit 646b913

Browse files
committed
[GR-60623] Halve branch profile counter values on overflow.
PullRequest: graal/19858
2 parents 54ae00e + 915367f commit 646b913

File tree

4 files changed

+188
-59
lines changed

4 files changed

+188
-59
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
int 986
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
;;
2+
;; Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
;; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
;;
5+
;; The Universal Permissive License (UPL), Version 1.0
6+
;;
7+
;; Subject to the condition set forth below, permission is hereby granted to any
8+
;; person obtaining a copy of this software, associated documentation and/or
9+
;; data (collectively the "Software"), free of charge and under any and all
10+
;; copyright rights in the Software, and any and all patent rights owned or
11+
;; freely licensable by each licensor hereunder covering either (i) the
12+
;; unmodified Software as contributed to or provided by such licensor, or (ii)
13+
;; the Larger Works (as defined below), to deal in both
14+
;;
15+
;; (a) the Software, and
16+
;;
17+
;; (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
;; one is included with the Software each a "Larger Work" to which the Software
19+
;; is contributed by such licensors),
20+
;;
21+
;; without restriction, including without limitation the rights to copy, create
22+
;; derivative works of, display, perform, and distribute the Software and make,
23+
;; use, sell, offer for sale, import, export, have made, and have sold the
24+
;; Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
;; either these or other terms.
26+
;;
27+
;; This license is subject to the following condition:
28+
;;
29+
;; The above copyright notice and either this complete permission notice or at a
30+
;; minimum a reference to the UPL must be included in all copies or substantial
31+
;; portions of the Software.
32+
;;
33+
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
;; SOFTWARE.
40+
;;
41+
42+
(;
43+
pub fn collatz_steps(mut n: u64) -> u64 {
44+
let mut counter = 0;
45+
while n != 1 {
46+
if n % 2 == 0 {
47+
n /= 2;
48+
} else {
49+
n = 3 * n + 1;
50+
}
51+
counter += 1;
52+
}
53+
counter
54+
}
55+
;)
56+
57+
(module
58+
(type $int_func (func (result i32)))
59+
(type $setup_func (func))
60+
(type $teardown_func (func (param i32)))
61+
62+
(global $iterations i64 (i64.const 670617279))
63+
64+
(memory $memory (export "memory") 0)
65+
66+
(func (export "benchmarkSetupEach") (type $setup_func))
67+
68+
(func (export "benchmarkTeardownEach") (type $teardown_func))
69+
70+
(func (export "benchmarkRun") (type $int_func)
71+
global.get $iterations
72+
call $collatz_steps
73+
i32.wrap_i64
74+
)
75+
76+
(func $collatz_steps (export "collatz_steps") (param i64) (result i64)
77+
(local i64)
78+
i64.const 0
79+
local.set 1
80+
block
81+
local.get 0
82+
i64.const 1
83+
i64.eq
84+
br_if 0
85+
i64.const 0
86+
local.set 1
87+
loop $continue
88+
local.get 1
89+
i64.const 1
90+
i64.add
91+
local.set 1
92+
local.get 0
93+
i64.const 1
94+
i64.shr_u
95+
local.get 0
96+
i64.const 3
97+
i64.mul
98+
i64.const 1
99+
i64.add
100+
local.get 0
101+
i64.const 1
102+
i64.and
103+
i64.eqz
104+
select
105+
local.tee 0
106+
i64.const 1
107+
i64.ne
108+
br_if $continue
109+
end
110+
end
111+
local.get 1
112+
)
113+
)

wasm/src/org.graalvm.wasm/src/org/graalvm/wasm/BinaryStreamParser.java

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -42,19 +42,28 @@
4242

4343
import static com.oracle.truffle.api.nodes.ExplodeLoop.LoopExplosionKind.FULL_EXPLODE_UNTIL_RETURN;
4444

45+
import java.lang.invoke.MethodHandles;
46+
import java.lang.invoke.VarHandle;
47+
import java.nio.ByteOrder;
48+
import java.util.Arrays;
49+
4550
import org.graalvm.wasm.constants.GlobalModifier;
4651
import org.graalvm.wasm.exception.Failure;
4752
import org.graalvm.wasm.exception.WasmException;
4853

54+
import com.oracle.truffle.api.CompilerDirectives;
4955
import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
56+
import com.oracle.truffle.api.memory.ByteArraySupport;
5057
import com.oracle.truffle.api.nodes.ExplodeLoop;
5158

52-
import java.util.Arrays;
53-
5459
public abstract class BinaryStreamParser {
5560
protected static final int SINGLE_RESULT_VALUE = 0;
5661
protected static final int MULTI_RESULT_VALUE = 1;
5762

63+
private static final VarHandle I16LE = MethodHandles.byteArrayViewVarHandle(short[].class, ByteOrder.LITTLE_ENDIAN);
64+
private static final VarHandle I32LE = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN);
65+
private static final VarHandle I64LE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
66+
5867
@CompilationFinal(dimensions = 1) protected byte[] data;
5968
protected int offset;
6069

@@ -353,7 +362,7 @@ public static byte peekLeb128Length(byte[] data, int initialOffset) {
353362

354363
/**
355364
* Reads the unsigned byte value at the given bytecode offset.
356-
*
365+
*
357366
* @param bytecode The bytecode
358367
* @param offset The offset in the bytecode
359368
* @return the unsigned byte value at the given bytecode offset.
@@ -364,7 +373,7 @@ public static int rawPeekU8(byte[] bytecode, int offset) {
364373

365374
/**
366375
* Reads the signed byte value at the given bytecode offset.
367-
*
376+
*
368377
* @param bytecode The bytecode
369378
* @param offset The offset in the bytecode
370379
* @return The signed byte value at the given bytecode offset.
@@ -375,27 +384,38 @@ public static byte rawPeekI8(byte[] bytecode, int offset) {
375384

376385
/**
377386
* Reads the unsigned short value at the given bytecode offset.
378-
*
387+
*
379388
* @param bytecode The bytecode
380389
* @param offset The offset in the bytecode
381390
* @return The unsigned short value at the given bytecode offset.
382391
*/
383392
public static int rawPeekU16(byte[] bytecode, int offset) {
384-
return ((bytecode[offset] & 0xFF) | ((bytecode[offset + 1] & 0xFF) << 8));
393+
return Short.toUnsignedInt(rawPeekI16(bytecode, offset));
394+
}
395+
396+
/**
397+
* Reads the signed short value at the given bytecode offset.
398+
*
399+
* @param bytecode The bytecode
400+
* @param offset The offset in the bytecode
401+
* @return The signed short value at the given bytecode offset.
402+
*/
403+
public static short rawPeekI16(byte[] bytecode, int offset) {
404+
if (CompilerDirectives.inCompiledCode()) {
405+
return ByteArraySupport.littleEndian().getShortUnaligned(bytecode, offset);
406+
}
407+
return (short) I16LE.get(bytecode, offset);
385408
}
386409

387410
/**
388411
* Writes the unsigned short value to the given bytecode offset.
389-
*
412+
*
390413
* @param bytecode The bytecode
391414
* @param offset The offset in the bytecode
392415
* @param value The value that should be written
393416
*/
394417
public static void writeU16(byte[] bytecode, int offset, int value) {
395-
final byte low = (byte) (value & 0xFF);
396-
final byte high = (byte) ((value >> 8) & 0xFF);
397-
bytecode[offset] = low;
398-
bytecode[offset + 1] = high;
418+
I16LE.set(bytecode, offset, (short) value);
399419
}
400420

401421
/**
@@ -406,60 +426,46 @@ public static void writeU16(byte[] bytecode, int offset, int value) {
406426
* @return The unsigned integer value at the given bytecode offset.
407427
*/
408428
public static long rawPeekU32(byte[] bytecode, int offset) {
409-
return (bytecode[offset] & 0xFFL) |
410-
((bytecode[offset + 1] & 0xFFL) << 8) |
411-
((bytecode[offset + 2] & 0xFFL) << 16) |
412-
((bytecode[offset + 3] & 0xFFL) << 24);
429+
return Integer.toUnsignedLong(rawPeekI32(bytecode, offset));
413430
}
414431

415432
/**
416433
* Reads the signed integer value at the given bytecode offset.
417-
*
434+
*
418435
* @param bytecode The bytecode
419436
* @param offset The offset in the bytecode.
420437
* @return The signed integer value at the given bytecode offset.
421438
*/
422439
public static int rawPeekI32(byte[] bytecode, int offset) {
423-
return (bytecode[offset] & 0xFF) |
424-
((bytecode[offset + 1] & 0xFF) << 8) |
425-
((bytecode[offset + 2] & 0xFF) << 16) |
426-
((bytecode[offset + 3] & 0xFF) << 24);
440+
if (CompilerDirectives.inCompiledCode()) {
441+
return ByteArraySupport.littleEndian().getIntUnaligned(bytecode, offset);
442+
}
443+
return (int) I32LE.get(bytecode, offset);
427444
}
428445

429446
/**
430447
* Reads the signed long value at the given bytecode offset.
431-
*
448+
*
432449
* @param bytecode The bytecode
433450
* @param offset The offset in the bytecode.
434451
* @return The signed long value at the given bytecode offset.
435452
*/
436453
public static long rawPeekI64(byte[] bytecode, int offset) {
437-
return (bytecode[offset] & 0xFFL) |
438-
((bytecode[offset + 1] & 0xFFL) << 8) |
439-
((bytecode[offset + 2] & 0xFFL) << 16) |
440-
((bytecode[offset + 3] & 0xFFL) << 24) |
441-
((bytecode[offset + 4] & 0xFFL) << 32) |
442-
((bytecode[offset + 5] & 0xFFL) << 40) |
443-
((bytecode[offset + 6] & 0xFFL) << 48) |
444-
((bytecode[offset + 7] & 0xFFL) << 56);
454+
if (CompilerDirectives.inCompiledCode()) {
455+
return ByteArraySupport.littleEndian().getLongUnaligned(bytecode, offset);
456+
}
457+
return (long) I64LE.get(bytecode, offset);
445458
}
446459

447460
/**
448461
* Writes the signed long value to the given bytecode offset.
449-
*
462+
*
450463
* @param bytecode The bytecode
451464
* @param offset The offset in the bytecode
452465
* @param value The value that should be written
453466
*/
454467
public static void writeI64(byte[] bytecode, int offset, long value) {
455-
bytecode[offset] = (byte) (value & 0xFF);
456-
bytecode[offset + 1] = (byte) ((value >> 8) & 0xFF);
457-
bytecode[offset + 2] = (byte) ((value >> 16) & 0xFF);
458-
bytecode[offset + 3] = (byte) ((value >> 24) & 0xFF);
459-
bytecode[offset + 4] = (byte) ((value >> 32) & 0xFF);
460-
bytecode[offset + 5] = (byte) ((value >> 40) & 0xFF);
461-
bytecode[offset + 6] = (byte) ((value >> 48) & 0xFF);
462-
bytecode[offset + 7] = (byte) ((value >> 56) & 0xFF);
468+
I64LE.set(bytecode, offset, value);
463469
}
464470

465471
/**

wasm/src/org.graalvm.wasm/src/org/graalvm/wasm/nodes/WasmFunctionNode.java

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4646,45 +4646,54 @@ private static void dropStack(VirtualFrame frame, int stackPointer, int targetSt
46464646
private static final int MAX_PROFILE_VALUE = 0x0000_00ff;
46474647
private static final int MAX_TABLE_PROFILE_VALUE = 0x0000_ffff;
46484648

4649+
@SuppressWarnings("all") // "The parameter condition should not be assigned."
46494650
private static boolean profileCondition(byte[] data, final int profileOffset, boolean condition) {
46504651
int t = rawPeekU8(data, profileOffset);
46514652
int f = rawPeekU8(data, profileOffset + 1);
4652-
boolean val = condition;
4653-
if (val) {
4653+
if (condition) {
46544654
if (t == 0) {
46554655
CompilerDirectives.transferToInterpreterAndInvalidate();
46564656
}
4657-
if (!CompilerDirectives.inInterpreter()) {
4658-
if (f == 0) {
4659-
// Make this branch fold during PE
4660-
val = true;
4657+
if (CompilerDirectives.inInterpreter()) {
4658+
if (t < MAX_PROFILE_VALUE) {
4659+
t++;
4660+
} else {
4661+
// halve count rounding up, must never go from 1 to 0.
4662+
f = (f >>> 1) + (f & 0x1);
4663+
t = (MAX_PROFILE_VALUE >>> 1) + 1;
4664+
data[profileOffset + 1] = (byte) f;
46614665
}
4666+
data[profileOffset] = (byte) t;
4667+
return condition;
46624668
} else {
4663-
if (t < MAX_PROFILE_VALUE) {
4664-
data[profileOffset] = (byte) (t + 1);
4669+
if (f == 0) {
4670+
// Make this branch fold during PE
4671+
condition = true;
46654672
}
46664673
}
46674674
} else {
46684675
if (f == 0) {
46694676
CompilerDirectives.transferToInterpreterAndInvalidate();
46704677
}
4671-
if (!CompilerDirectives.inInterpreter()) {
4672-
if (t == 0) {
4673-
// Make this branch fold during PE
4674-
val = false;
4678+
if (CompilerDirectives.inInterpreter()) {
4679+
if (f < MAX_PROFILE_VALUE) {
4680+
f++;
4681+
} else {
4682+
// halve count rounding up, must never go from 1 to 0.
4683+
t = (t >>> 1) + (t & 0x1);
4684+
f = (MAX_PROFILE_VALUE >>> 1) + 1;
4685+
data[profileOffset] = (byte) t;
46754686
}
4687+
data[profileOffset + 1] = (byte) f;
4688+
return condition;
46764689
} else {
4677-
if (f < MAX_PROFILE_VALUE) {
4678-
data[profileOffset + 1] = (byte) (f + 1);
4690+
if (t == 0) {
4691+
// Make this branch fold during PE
4692+
condition = false;
46794693
}
46804694
}
46814695
}
4682-
if (CompilerDirectives.inInterpreter()) {
4683-
return val;
4684-
} else {
4685-
int sum = t + f;
4686-
return CompilerDirectives.injectBranchProbability((double) t / (double) sum, val);
4687-
}
4696+
return CompilerDirectives.injectBranchProbability((double) t / (double) (t + f), condition);
46884697
}
46894698

46904699
private static void updateBranchTableProfile(byte[] data, final int counterOffset, final int profileOffset) {

0 commit comments

Comments
 (0)