Skip to content

core: add core::ascii::Char::from_digit #118963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 44 additions & 16 deletions library/core/src/ascii/ascii_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! suggestions from rustc if you get anything slightly wrong in here, and overall
//! helps with clarity as we're also referring to `char` intentionally in here.

use crate::fmt::{self, Write};
use crate::fmt;
use crate::mem::transmute;

/// One of the 128 Unicode characters from U+0000 through U+007F,
Expand Down Expand Up @@ -474,7 +474,8 @@ impl AsciiChar {
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
/// `'0'`, `'1'`, …, `'9'` respectively.
///
/// If `d >= 10`, returns `None`.
/// If `d >= 10`, returns `None`. To get a digit up to `d == 35`, use
/// [`from_digit`](Self::from_digit) instead.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn digit(d: u8) -> Option<Self> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, that only reason I added this method was to use it for things like '0' + d in

radix! { Binary, 2, "0b", x @ 0 ..= 1 => b'0' + x }
radix! { Octal, 8, "0o", x @ 0 ..= 7 => b'0' + x }

But then it looks like it's not actually used. So it's possible that maybe it shouldn't have existed at all; I don't know.

(The idea was to give something to point to when people say "but '0' + d was so nice". But for hex there was never that shorthand, so having people use "…".as_ascii().unwrap()[d] might be fine for those non-decimal uses.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem at the moment is unwrap not being const, so one has to
write some annoying code:

    const DIGITS: &[AsciiChar; 16] = match "...".as_ascii() {
        Some(digits) => digits,
        None => panic!()
    };

It’s not the end of the world of course, but it’s certainly less
convenient that a AsciiChar::from_digit call.

Though I can see reasons to wait for const unwrap or a"..." syntax
(proposed in the tracking issue) before considering adding this
function.


By the way, maybe for b'0' + d the solution is to implement Add?
In debug builds it would panic on overflow while on release it would
wrap (i.e. mask out most significant bit).

Expand Down Expand Up @@ -515,6 +516,37 @@ impl AsciiChar {
}
}

/// Returns a *character* digit corresponding to given numeric value of
/// a digit.
///
/// When passed the *number* `0`, `1`, …, `8`, `9`, `10`, …, `34`, `35`,
/// returns the *character* `'0'`, `'1'`, …, `'8'`, `'9'`, `'a'`, …, `'y'`,
/// `'z'` respectively.
///
/// If `d >= 36`, returns `None`. To get a digit only for `d < 10`, use
/// [`digit`](Self::digit) instead.
///
/// # Example
///
/// ```
/// #![feature(ascii_char, ascii_char_variants)]
/// use core::ascii::Char;
///
/// assert_eq!(Some(Char::Digit0), Char::from_digit(0));
/// assert_eq!(Some(Char::Digit9), Char::from_digit(9));
/// assert_eq!(Some(Char::SmallA), Char::from_digit(10));
/// assert_eq!(Some(Char::SmallF), Char::from_digit(15));
/// assert_eq!(Some(Char::SmallZ), Char::from_digit(35));
/// assert_eq!(None, Char::from_digit(36));
/// ```
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn from_digit(d: u32) -> Option<Self> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is u32 the best option here? char takes u32, but char is a 4-byte type. Should this take u8 since it's a 1-byte type? Should digit also take u32, if u32 is somehow better?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this was motivated by char::from_digit. I really don’t know what makes more sense here.

const DIGITS: [AsciiChar; 36] =
*b"0123456789abcdefghijklmnopqrstuvwxyz".as_ascii().unwrap();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing I don't know how to solve here is that people might also want upper-case. Or base-64 style where a is zero. Why is lower-case special?

Oh, I guess char::from_digit does it this way. Fair enough, then, I guess.

if d < 36 { Some(DIGITS[d as usize]) } else { None }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this more efficient than simply DIGITS.get(d as usize)? Because to me it seems like there is a nother implicit range check in DIGITS[] that could be avoided.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get isn’t const so it cannot be used here. The compiler should be smart enough to remove the second range check.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it.

}

/// Gets this ASCII character as a byte.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
Expand Down Expand Up @@ -567,12 +599,14 @@ impl fmt::Display for AsciiChar {
#[unstable(feature = "ascii_char", issue = "110998")]
impl fmt::Debug for AsciiChar {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use AsciiChar::*;

#[inline]
fn backslash(a: AsciiChar) -> ([AsciiChar; 4], u8) {
([AsciiChar::ReverseSolidus, a, AsciiChar::Null, AsciiChar::Null], 2)
fn backslash(ch: AsciiChar) -> ([AsciiChar; 6], usize) {
([Apostrophe, ReverseSolidus, ch, Apostrophe, Null, Null], 4)
}

let (buf, len) = match self {
let (buf, len) = match *self {
AsciiChar::Null => backslash(AsciiChar::Digit0),
AsciiChar::CharacterTabulation => backslash(AsciiChar::SmallT),
AsciiChar::CarriageReturn => backslash(AsciiChar::SmallR),
Expand All @@ -582,21 +616,15 @@ impl fmt::Debug for AsciiChar {
_ => {
let byte = self.to_u8();
if !byte.is_ascii_control() {
([*self, AsciiChar::Null, AsciiChar::Null, AsciiChar::Null], 1)
([Apostrophe, *self, Apostrophe, Null, Null, Null], 3)
} else {
const HEX_DIGITS: [AsciiChar; 16] = *b"0123456789abcdef".as_ascii().unwrap();

let hi = HEX_DIGITS[usize::from(byte >> 4)];
let lo = HEX_DIGITS[usize::from(byte & 0xf)];
([AsciiChar::ReverseSolidus, AsciiChar::SmallX, hi, lo], 4)
let hi = Self::from_digit(u32::from(byte >> 4)).unwrap();
let lo = Self::from_digit(u32::from(byte & 0xf)).unwrap();
([Apostrophe, ReverseSolidus, SmallX, hi, lo, Apostrophe], 6)
}
}
};

f.write_char('\'')?;
for byte in &buf[..len as usize] {
f.write_str(byte.as_str())?;
}
f.write_char('\'')
f.write_str(buf[..len].as_str())
}
}
12 changes: 7 additions & 5 deletions library/core/src/char/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,13 @@ impl fmt::Display for CharTryFromError {
pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
if radix > 36 {
panic!("from_digit: radix is too high (maximum 36)");
}
if num < radix {
let num = num as u8;
if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
} else {
} else if num >= radix {
None
} else if let Some(chr) = crate::ascii::Char::from_digit(num) {
Some(chr.to_char())
} else {
// SAFETY: We’ve verified `num ≤ radix ≤ 36` and for those values
// ascii::Char::from_digit always returns Some.
unsafe { crate::hint::unreachable_unchecked() }
}
}
38 changes: 38 additions & 0 deletions library/core/tests/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,44 @@ fn ascii_ctype_const() {
}
}

#[test]
fn test_ascii_from_digit() {
use core::ascii::Char;

for d in 0..10 {
let want = Char::from_u8(b'0' + d).unwrap();
assert_eq!(Some(want), Char::digit(d));
assert_eq!(Some(want), Char::from_digit(d.into()));
}

for d in 10..36 {
let want = Char::from_u8(b'a' + d - 10).unwrap();
assert_eq!(None, Char::digit(d));
assert_eq!(Some(want), Char::from_digit(d.into()));
}

for d in 36..=255 {
assert_eq!(None, Char::digit(d));
assert_eq!(None, Char::from_digit(d.into()));
}
}

#[test]
fn test_ascii_char_fmt() {
use core::ascii::Char;

#[track_caller]
fn check(want_display: &str, want_debug: &str, ch: Char) {
assert_eq!(want_display, format!("{ch}"));
assert_eq!(want_debug, format!("{ch:?}"));
}

check("\0", "'\\0'", Char::Null);
check("\n", "'\\n'", Char::LineFeed);
check("\x07", "'\\x07'", Char::Bell);
check("a", "'a'", Char::SmallA);
}

#[test]
fn test_ascii_display() {
assert_eq!(b"foo'bar".escape_ascii().to_string(), r#"foo\'bar"#);
Expand Down