Skip to content

Commit bf366ef

Browse files
Bryan C. Millsdmitshur
Bryan C. Mills
authored andcommitted
[release-branch.go1.18] internal/fuzz: fix encoding for out-of-range ints and runes
Also switch float64 NaN encoding to use hexadecimal, and accept hexadecimal encoding for all other integer types too. (That gives us the flexibility to change the encodings in either direction in the future without breaking earlier Go versions.) Out-of-range runes encoded using "%q" were previously replaced with the Unicode replacement charecter, losing their values. Out-of-range ints and uints on 32-bit platforms were previously rejected. Now they are wrapped instead: an “interesting” case with a large int or uint found on a 64-bit platform likely remains interesting on a 32-bit platform, even if the specific values differ. To verify the above changes, I have made TestMarshalUnmarshal accept (and check for) arbitrary differences between input and output, and added tests cases that include values in valid but non-canonical encodings. I have also added round-trip fuzz tests in the opposite direction for most of the types affected by this change, verifying that a marshaled value unmarshals to the same bitwise value. Updates #51258 Updates #51526 Fixes #51528 Change-Id: I7727a9d0582d81be0d954529545678a4374e88ed Reviewed-on: https://go-review.googlesource.com/c/go/+/390424 Trust: Bryan Mills <[email protected]> Run-TryBot: Bryan Mills <[email protected]> Reviewed-by: Roland Shoemaker <[email protected]> TryBot-Result: Gopher Robot <[email protected]> (cherry picked from commit 7419bb3) Reviewed-on: https://go-review.googlesource.com/c/go/+/390816 Trust: Dmitri Shuralyov <[email protected]> Run-TryBot: Dmitri Shuralyov <[email protected]> Reviewed-by: Bryan Mills <[email protected]>
1 parent 92644ff commit bf366ef

File tree

2 files changed

+311
-45
lines changed

2 files changed

+311
-45
lines changed

src/internal/fuzz/encoding.go

+69-15
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"go/token"
1313
"math"
1414
"strconv"
15+
"unicode/utf8"
1516
)
1617

1718
// encVersion1 will be the first line of a file with version 1 encoding.
@@ -32,21 +33,60 @@ func marshalCorpusFile(vals ...any) []byte {
3233
fmt.Fprintf(b, "%T(%v)\n", t, t)
3334
case float32:
3435
if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) {
35-
fmt.Fprintf(b, "math.Float32frombits(%v)\n", math.Float32bits(t))
36+
// We encode unusual NaNs as hex values, because that is how users are
37+
// likely to encounter them in literature about floating-point encoding.
38+
// This allows us to reproduce fuzz failures that depend on the specific
39+
// NaN representation (for float32 there are about 2^24 possibilities!),
40+
// not just the fact that the value is *a* NaN.
41+
//
42+
// Note that the specific value of float32(math.NaN()) can vary based on
43+
// whether the architecture represents signaling NaNs using a low bit
44+
// (as is common) or a high bit (as commonly implemented on MIPS
45+
// hardware before around 2012). We believe that the increase in clarity
46+
// from identifying "NaN" with math.NaN() is worth the slight ambiguity
47+
// from a platform-dependent value.
48+
fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t))
3649
} else {
50+
// We encode all other values — including the NaN value that is
51+
// bitwise-identical to float32(math.Nan()) — using the default
52+
// formatting, which is equivalent to strconv.FormatFloat with format
53+
// 'g' and can be parsed by strconv.ParseFloat.
54+
//
55+
// For an ordinary floating-point number this format includes
56+
// sufficiently many digits to reconstruct the exact value. For positive
57+
// or negative infinity it is the string "+Inf" or "-Inf". For positive
58+
// or negative zero it is "0" or "-0". For NaN, it is the string "NaN".
3759
fmt.Fprintf(b, "%T(%v)\n", t, t)
3860
}
3961
case float64:
4062
if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) {
41-
fmt.Fprintf(b, "math.Float64frombits(%v)\n", math.Float64bits(t))
63+
fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t))
4264
} else {
4365
fmt.Fprintf(b, "%T(%v)\n", t, t)
4466
}
4567
case string:
4668
fmt.Fprintf(b, "string(%q)\n", t)
4769
case rune: // int32
48-
fmt.Fprintf(b, "rune(%q)\n", t)
70+
// Although rune and int32 are represented by the same type, only a subset
71+
// of valid int32 values can be expressed as rune literals. Notably,
72+
// negative numbers, surrogate halves, and values above unicode.MaxRune
73+
// have no quoted representation.
74+
//
75+
// fmt with "%q" (and the corresponding functions in the strconv package)
76+
// would quote out-of-range values to the Unicode replacement character
77+
// instead of the original value (see https://go.dev/issue/51526), so
78+
// they must be treated as int32 instead.
79+
//
80+
// We arbitrarily draw the line at UTF-8 validity, which biases toward the
81+
// "rune" interpretation. (However, we accept either format as input.)
82+
if utf8.ValidRune(t) {
83+
fmt.Fprintf(b, "rune(%q)\n", t)
84+
} else {
85+
fmt.Fprintf(b, "int32(%v)\n", t)
86+
}
4987
case byte: // uint8
88+
// For bytes, we arbitrarily prefer the character interpretation.
89+
// (Every byte has a valid character encoding.)
5090
fmt.Fprintf(b, "byte(%q)\n", t)
5191
case []byte: // []uint8
5292
fmt.Fprintf(b, "[]byte(%q)\n", t)
@@ -199,6 +239,14 @@ func parseCorpusValue(line []byte) (any, error) {
199239
}
200240
return strconv.Unquote(val)
201241
case "byte", "rune":
242+
if kind == token.INT {
243+
switch typ {
244+
case "rune":
245+
return parseInt(val, typ)
246+
case "byte":
247+
return parseUint(val, typ)
248+
}
249+
}
202250
if kind != token.CHAR {
203251
return nil, fmt.Errorf("character literal required for byte/rune types")
204252
}
@@ -265,18 +313,24 @@ func parseCorpusValue(line []byte) (any, error) {
265313
func parseInt(val, typ string) (any, error) {
266314
switch typ {
267315
case "int":
268-
return strconv.Atoi(val)
316+
// The int type may be either 32 or 64 bits. If 32, the fuzz tests in the
317+
// corpus may include 64-bit values produced by fuzzing runs on 64-bit
318+
// architectures. When running those tests, we implicitly wrap the values to
319+
// fit in a regular int. (The test case is still “interesting”, even if the
320+
// specific values of its inputs are platform-dependent.)
321+
i, err := strconv.ParseInt(val, 0, 64)
322+
return int(i), err
269323
case "int8":
270-
i, err := strconv.ParseInt(val, 10, 8)
324+
i, err := strconv.ParseInt(val, 0, 8)
271325
return int8(i), err
272326
case "int16":
273-
i, err := strconv.ParseInt(val, 10, 16)
327+
i, err := strconv.ParseInt(val, 0, 16)
274328
return int16(i), err
275-
case "int32":
276-
i, err := strconv.ParseInt(val, 10, 32)
329+
case "int32", "rune":
330+
i, err := strconv.ParseInt(val, 0, 32)
277331
return int32(i), err
278332
case "int64":
279-
return strconv.ParseInt(val, 10, 64)
333+
return strconv.ParseInt(val, 0, 64)
280334
default:
281335
panic("unreachable")
282336
}
@@ -286,19 +340,19 @@ func parseInt(val, typ string) (any, error) {
286340
func parseUint(val, typ string) (any, error) {
287341
switch typ {
288342
case "uint":
289-
i, err := strconv.ParseUint(val, 10, 0)
343+
i, err := strconv.ParseUint(val, 0, 64)
290344
return uint(i), err
291-
case "uint8":
292-
i, err := strconv.ParseUint(val, 10, 8)
345+
case "uint8", "byte":
346+
i, err := strconv.ParseUint(val, 0, 8)
293347
return uint8(i), err
294348
case "uint16":
295-
i, err := strconv.ParseUint(val, 10, 16)
349+
i, err := strconv.ParseUint(val, 0, 16)
296350
return uint16(i), err
297351
case "uint32":
298-
i, err := strconv.ParseUint(val, 10, 32)
352+
i, err := strconv.ParseUint(val, 0, 32)
299353
return uint32(i), err
300354
case "uint64":
301-
return strconv.ParseUint(val, 10, 64)
355+
return strconv.ParseUint(val, 0, 64)
302356
default:
303357
panic("unreachable")
304358
}

0 commit comments

Comments
 (0)