Skip to content

Commit

Permalink
strconv: fix atoi() and its tests (#23737)
Browse files Browse the repository at this point in the history
  • Loading branch information
Bruno-Vdr authored Feb 17, 2025
1 parent 9649af3 commit 9bed50d
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 44 deletions.
71 changes: 47 additions & 24 deletions vlib/strconv/atoi.v
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ module strconv
// int_size = 32 << (~u32(0) >> 63)
// max_u64 = u64(u64(1 << 63) - 1)
const int_size = 32
const max_u64 = u64(18446744073709551615)

@[inline]
pub fn byte_to_lower(c u8) u8 {
Expand Down Expand Up @@ -218,34 +217,58 @@ pub fn parse_int(_s string, base int, _bit_size int) !i64 {
}

// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
// It follows V scanner as much as observed.
@[direct_array_access]
pub fn atoi(s string) !int {
if s == '' {
return error('strconv.atoi: parsing "": invalid syntax')
}
if (int_size == 32 && (0 < s.len && s.len < 10))
|| (int_size == 64 && (0 < s.len && s.len < 19)) {
// Fast path for small integers that fit int type.
mut start_idx := 0
if s[0] == `-` || s[0] == `+` {
start_idx++
if s.len - start_idx < 1 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
return error('strconv.atoi: parsing "${s}": invalid syntax')
}
return error('strconv.atoi: parsing "": empty string')
}

mut start_idx := 0
mut sign := i64(1)

if s[0] == `-` || s[0] == `+` {
start_idx++
if s[0] == `-` {
sign = -1
}
mut n := 0
for i in start_idx .. s.len {
ch := s[i] - `0`
if ch > 9 {
// return 0, &NumError{fnAtoi, s0, ErrSyntax}
return error('strconv.atoi: parsing "${s}": invalid syntax')
}

if s.len - start_idx < 1 {
return error('strconv.atoi: parsing "${s}": no number after sign')
}

if s[start_idx] == `_` || s[s.len - 1] == `_` {
return error('strconv.atoi: parsing "${s}": values cannot start or end with underscores')
}

mut x := i64(0)
mut underscored := false
for i in start_idx .. s.len {
c := s[i] - `0`
if c == 47 { // 47 = Ascii(`_`) - ascii(`0`) = 95 - 48.
if underscored == true { // Two consecutives underscore
return error('strconv.atoi: parsing "${s}": consecutives underscores are not allowed')
}
underscored = true
continue // Skip underscore
} else {
if c > 9 {
return error('strconv.atoi: parsing "${s}": invalid radix 10 character')
}
underscored = false
x = (x * 10) + (c * sign)
if sign == 1 && x > i64_max_int32 {
return error('strconv.atoi: parsing "${s}": integer overflow')
} else {
if x < i64_min_int32 {
return error('strconv.atoi: parsing "${s}": integer underflow')
}
}
n = n * 10 + int(ch)
}
return if s[0] == `-` { -n } else { n }
}
// Slow path for invalid, big, or underscored integers.
int64 := parse_int(s, 10, 0)!
return int(int64)
return int(x)
}

const i64_min_int32 = i64(-2147483647) - 1 // msvc has a bug that treats just i64(min_int) as 2147483648 :-(; this is a workaround for it
const i64_max_int32 = i64(2147483646) + 1
78 changes: 58 additions & 20 deletions vlib/strconv/atoi_test.v
Original file line number Diff line number Diff line change
@@ -1,28 +1,66 @@
import strconv

fn test_atoi() {
assert strconv.atoi('16')! == 16
assert strconv.atoi('+16')! == 16
assert strconv.atoi('-16')! == -16

// invalid strings
if x := strconv.atoi('str') {
println(x)
assert false
} else {
assert true
struct StrVal { // Inner test struct
str_value string
int_value int
}
if x := strconv.atoi('string_longer_than_10_chars') {
println(x)
assert false
} else {
assert true

// Parsing of theses value should succeed.
ok := [
StrVal{'1', 1},
StrVal{'-1', -1},
StrVal{'0', 0},
StrVal{'+0', 0},
StrVal{'-0', 0},
StrVal{'-0_00', 0},
StrVal{'+0_00', 0},
StrVal{'+1', 1},
StrVal{'+1024', 1024},
StrVal{'+3_14159', 314159},
StrVal{'-1_00_1', -1001},
StrVal{'-1_024', -1024},
StrVal{'123_456_789', 123456789},
StrVal{'00000006', 6},
StrVal{'0_0_0_0_0_0_0_6', 6},
StrVal{'2147483647', 2147483647}, // Signed 32bits max.
StrVal{'-2147483648', -2147483648}, // Signed 32bits min.
]

// Check that extracted int value matches its string.
for v in ok {
// println('Parsing ${v.str_value} should equals ${v.int_value}')
assert strconv.atoi(v.str_value)! == v.int_value
}
if x := strconv.atoi('') {
println(x)
assert false
} else {
assert true

// Parsing of these values should fail !
ko := [
'', // Empty string
'-', // Only sign
'+', // Only sign
'_', // Only Underscore
'_10', // Start with underscore
'+_10', // Start with underscore after sign.
'-_16', // Start with underscore after sign.
'123_', // End with underscore
'-3__14159', // Two consecutives underscore.
'-3_14159A', // Non radix 10 char.
'A42', // Non radix 10 char.
'-2147483649', // 32bits underflow by 1.
'+2147483648', // 32 bit overflow by 1.
'+3147483648', // 32 bit overflow by a lot.
'-2147244836470', // Large underflow.
'+86842255899621148766244',
]

for v in ko {
if r := strconv.atoi(v) {
// These conversions should fail so force assertion !
assert false, 'The string ${v} int extraction should not succeed or be considered as valid ${r}).'
} else {
// println('Parsing fails as it should for : "${v}')
assert true
}
}
}

Expand Down

0 comments on commit 9bed50d

Please sign in to comment.