Skip to content

Commit 4e1927d

Browse files
committed
Auto merge of #95399 - gilescope:plan_b, r=scottmcm
Faster parsing for lower numbers for radix up to 16 (cont.) ( Continuation of #83371 ) With LingMan's change I think this is potentially ready.
2 parents b8f4cb6 + 3ee7bb1 commit 4e1927d

File tree

3 files changed

+136
-38
lines changed

3 files changed

+136
-38
lines changed

library/core/src/num/mod.rs

+65-37
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use crate::ascii;
66
use crate::intrinsics;
77
use crate::mem;
8+
use crate::ops::{Add, Mul, Sub};
89
use crate::str::FromStr;
910

1011
// Used because the `?` operator is not allowed in a const context.
@@ -954,9 +955,10 @@ pub enum FpCategory {
954955
}
955956

956957
#[doc(hidden)]
957-
trait FromStrRadixHelper: PartialOrd + Copy {
958-
fn min_value() -> Self;
959-
fn max_value() -> Self;
958+
trait FromStrRadixHelper:
959+
PartialOrd + Copy + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
960+
{
961+
const MIN: Self;
960962
fn from_u32(u: u32) -> Self;
961963
fn checked_mul(&self, other: u32) -> Option<Self>;
962964
fn checked_sub(&self, other: u32) -> Option<Self>;
@@ -976,12 +978,9 @@ macro_rules! from_str_radix_int_impl {
976978
}
977979
from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 }
978980

979-
macro_rules! doit {
981+
macro_rules! impl_helper_for {
980982
($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
981-
#[inline]
982-
fn min_value() -> Self { Self::MIN }
983-
#[inline]
984-
fn max_value() -> Self { Self::MAX }
983+
const MIN: Self = Self::MIN;
985984
#[inline]
986985
fn from_u32(u: u32) -> Self { u as Self }
987986
#[inline]
@@ -998,7 +997,18 @@ macro_rules! doit {
998997
}
999998
})*)
1000999
}
1001-
doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
1000+
impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
1001+
1002+
/// Determins if a string of text of that length of that radix could be guaranteed to be
1003+
/// stored in the given type T.
1004+
/// Note that if the radix is known to the compiler, it is just the check of digits.len that
1005+
/// is done at runtime.
1006+
#[doc(hidden)]
1007+
#[inline(always)]
1008+
#[unstable(issue = "none", feature = "std_internals")]
1009+
pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
1010+
radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
1011+
}
10021012

10031013
fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
10041014
use self::IntErrorKind::*;
@@ -1014,7 +1024,7 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
10141024
return Err(PIE { kind: Empty });
10151025
}
10161026

1017-
let is_signed_ty = T::from_u32(0) > T::min_value();
1027+
let is_signed_ty = T::from_u32(0) > T::MIN;
10181028

10191029
// all valid digits are ascii, so we will just iterate over the utf8 bytes
10201030
// and cast them to chars. .to_digit() will safely return None for anything
@@ -1032,38 +1042,56 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
10321042
};
10331043

10341044
let mut result = T::from_u32(0);
1035-
if is_positive {
1036-
// The number is positive
1037-
for &c in digits {
1038-
let x = match (c as char).to_digit(radix) {
1039-
Some(x) => x,
1040-
None => return Err(PIE { kind: InvalidDigit }),
1041-
};
1042-
result = match result.checked_mul(radix) {
1043-
Some(result) => result,
1044-
None => return Err(PIE { kind: PosOverflow }),
1045-
};
1046-
result = match result.checked_add(x) {
1047-
Some(result) => result,
1048-
None => return Err(PIE { kind: PosOverflow }),
1045+
1046+
if can_not_overflow::<T>(radix, is_signed_ty, digits) {
1047+
// If the len of the str is short compared to the range of the type
1048+
// we are parsing into, then we can be certain that an overflow will not occur.
1049+
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
1050+
// above is a faster (conservative) approximation of this.
1051+
//
1052+
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
1053+
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
1054+
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
1055+
macro_rules! run_unchecked_loop {
1056+
($unchecked_additive_op:expr) => {
1057+
for &c in digits {
1058+
result = result * T::from_u32(radix);
1059+
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
1060+
result = $unchecked_additive_op(result, T::from_u32(x));
1061+
}
10491062
};
10501063
}
1064+
if is_positive {
1065+
run_unchecked_loop!(<T as core::ops::Add>::add)
1066+
} else {
1067+
run_unchecked_loop!(<T as core::ops::Sub>::sub)
1068+
};
10511069
} else {
1052-
// The number is negative
1053-
for &c in digits {
1054-
let x = match (c as char).to_digit(radix) {
1055-
Some(x) => x,
1056-
None => return Err(PIE { kind: InvalidDigit }),
1057-
};
1058-
result = match result.checked_mul(radix) {
1059-
Some(result) => result,
1060-
None => return Err(PIE { kind: NegOverflow }),
1061-
};
1062-
result = match result.checked_sub(x) {
1063-
Some(result) => result,
1064-
None => return Err(PIE { kind: NegOverflow }),
1070+
macro_rules! run_checked_loop {
1071+
($checked_additive_op:ident, $overflow_err:expr) => {
1072+
for &c in digits {
1073+
// When `radix` is passed in as a literal, rather than doing a slow `imul`
1074+
// the compiler can use shifts if `radix` can be expressed as a
1075+
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
1076+
// When the compiler can't use these optimisations,
1077+
// the latency of the multiplication can be hidden by issuing it
1078+
// before the result is needed to improve performance on
1079+
// modern out-of-order CPU as multiplication here is slower
1080+
// than the other instructions, we can get the end result faster
1081+
// doing multiplication first and let the CPU spends other cycles
1082+
// doing other computation and get multiplication result later.
1083+
let mul = result.checked_mul(radix);
1084+
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
1085+
result = mul.ok_or_else($overflow_err)?;
1086+
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
1087+
}
10651088
};
10661089
}
1090+
if is_positive {
1091+
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
1092+
} else {
1093+
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
1094+
};
10671095
}
10681096
Ok(result)
10691097
}

library/core/tests/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#![feature(numfmt)]
5454
#![feature(step_trait)]
5555
#![feature(str_internals)]
56+
#![feature(std_internals)]
5657
#![feature(test)]
5758
#![feature(trusted_len)]
5859
#![feature(try_blocks)]

library/core/tests/num/mod.rs

+70-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use core::cmp::PartialEq;
22
use core::convert::{TryFrom, TryInto};
33
use core::fmt::Debug;
44
use core::marker::Copy;
5-
use core::num::{IntErrorKind, ParseIntError, TryFromIntError};
5+
use core::num::{can_not_overflow, IntErrorKind, ParseIntError, TryFromIntError};
66
use core::ops::{Add, Div, Mul, Rem, Sub};
77
use core::option::Option;
88
use core::option::Option::None;
@@ -120,6 +120,75 @@ fn test_int_from_str_overflow() {
120120
test_parse::<i64>("-9223372036854775809", Err(IntErrorKind::NegOverflow));
121121
}
122122

123+
#[test]
124+
fn test_can_not_overflow() {
125+
fn can_overflow<T>(radix: u32, input: &str) -> bool
126+
where
127+
T: std::convert::TryFrom<i8>,
128+
{
129+
!can_not_overflow::<T>(radix, T::try_from(-1_i8).is_ok(), input.as_bytes())
130+
}
131+
132+
// Positive tests:
133+
assert!(!can_overflow::<i8>(16, "F"));
134+
assert!(!can_overflow::<u8>(16, "FF"));
135+
136+
assert!(!can_overflow::<i8>(10, "9"));
137+
assert!(!can_overflow::<u8>(10, "99"));
138+
139+
// Negative tests:
140+
141+
// Not currently in std lib (issue: #27728)
142+
fn format_radix<T>(mut x: T, radix: T) -> String
143+
where
144+
T: std::ops::Rem<Output = T>,
145+
T: std::ops::Div<Output = T>,
146+
T: std::cmp::PartialEq,
147+
T: std::default::Default,
148+
T: Copy,
149+
T: Default,
150+
u32: TryFrom<T>,
151+
{
152+
let mut result = vec![];
153+
154+
loop {
155+
let m = x % radix;
156+
x = x / radix;
157+
result.push(
158+
std::char::from_digit(m.try_into().ok().unwrap(), radix.try_into().ok().unwrap())
159+
.unwrap(),
160+
);
161+
if x == T::default() {
162+
break;
163+
}
164+
}
165+
result.into_iter().rev().collect()
166+
}
167+
168+
macro_rules! check {
169+
($($t:ty)*) => ($(
170+
for base in 2..=36 {
171+
let num = (<$t>::MAX as u128) + 1;
172+
173+
// Calcutate the string length for the smallest overflowing number:
174+
let max_len_string = format_radix(num, base as u128);
175+
// Ensure that that string length is deemed to potentially overflow:
176+
assert!(can_overflow::<$t>(base, &max_len_string));
177+
}
178+
)*)
179+
}
180+
181+
check! { i8 i16 i32 i64 i128 isize usize u8 u16 u32 u64 }
182+
183+
// Check u128 separately:
184+
for base in 2..=36 {
185+
let num = u128::MAX as u128;
186+
let max_len_string = format_radix(num, base as u128);
187+
// base 16 fits perfectly for u128 and won't overflow:
188+
assert_eq!(can_overflow::<u128>(base, &max_len_string), base != 16);
189+
}
190+
}
191+
123192
#[test]
124193
fn test_leading_plus() {
125194
test_parse::<u8>("+127", Ok(127));

0 commit comments

Comments
 (0)