diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 0bb7c432cc35f..2795fe5c769d9 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -105,6 +105,7 @@ #![feature(box_uninit_write)] #![feature(bstr)] #![feature(bstr_internals)] +#![feature(char_max_len)] #![feature(clone_to_uninit)] #![feature(coerce_unsized)] #![feature(const_eval_select)] diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index b29f740ef0f2a..37e9e05a6a1cc 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -1419,7 +1419,9 @@ impl String { pub fn push(&mut self, ch: char) { match ch.len_utf8() { 1 => self.vec.push(ch as u8), - _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + _ => { + self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes()) + } } } @@ -1716,7 +1718,7 @@ impl String { #[rustc_confusables("set")] pub fn insert(&mut self, idx: usize, ch: char) { assert!(self.is_char_boundary(idx)); - let mut bits = [0; 4]; + let mut bits = [0; char::MAX_LEN_UTF8]; let bits = ch.encode_utf8(&mut bits).as_bytes(); unsafe { @@ -2771,7 +2773,7 @@ impl SpecToString for core::ascii::Char { impl SpecToString for char { #[inline] fn spec_to_string(&self) -> String { - String::from(self.encode_utf8(&mut [0; 4])) + String::from(self.encode_utf8(&mut [0; char::MAX_LEN_UTF8])) } } diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs index 391ff04a4b8e4..708dfc582a7eb 100644 --- a/library/alloc/tests/lib.rs +++ b/library/alloc/tests/lib.rs @@ -3,6 +3,7 @@ #![feature(iter_array_chunks)] #![feature(assert_matches)] #![feature(btree_extract_if)] +#![feature(char_max_len)] #![feature(cow_is_borrowed)] #![feature(core_intrinsics)] #![feature(downcast_unchecked)] diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 6f930ab08535c..aeb0e681c226f 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -2,6 +2,7 @@ use std::assert_matches::assert_matches; use std::borrow::Cow; +use std::char::MAX_LEN_UTF8; use std::cmp::Ordering::{Equal, Greater, Less}; use std::str::{from_utf8, from_utf8_unchecked}; @@ -1231,7 +1232,7 @@ fn test_to_uppercase_rev_iterator() { #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_chars_decoding() { - let mut bytes = [0; 4]; + let mut bytes = [0; MAX_LEN_UTF8]; for c in (0..0x110000).filter_map(std::char::from_u32) { let s = c.encode_utf8(&mut bytes); if Some(c) != s.chars().next() { @@ -1243,7 +1244,7 @@ fn test_chars_decoding() { #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_chars_rev_decoding() { - let mut bytes = [0; 4]; + let mut bytes = [0; MAX_LEN_UTF8]; for c in (0..0x110000).filter_map(std::char::from_u32) { let s = c.encode_utf8(&mut bytes); if Some(c) != s.chars().rev().next() { diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index ccfdbf0eb704d..40137ea44ddce 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -71,6 +71,16 @@ impl char { #[stable(feature = "assoc_char_consts", since = "1.52.0")] pub const MAX: char = '\u{10FFFF}'; + /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to + /// UTF-8 encoding. + #[unstable(feature = "char_max_len", issue = "121714")] + pub const MAX_LEN_UTF8: usize = 4; + + /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char` + /// to UTF-16 encoding. + #[unstable(feature = "char_max_len", issue = "121714")] + pub const MAX_LEN_UTF16: usize = 2; + /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a /// decoding error. /// diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs index 59fd7250e8f8e..088c709f1a2af 100644 --- a/library/core/src/char/mod.rs +++ b/library/core/src/char/mod.rs @@ -95,6 +95,16 @@ const MAX_THREE_B: u32 = 0x10000; #[stable(feature = "rust1", since = "1.0.0")] pub const MAX: char = char::MAX; +/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to +/// UTF-8 encoding. +#[unstable(feature = "char_max_len", issue = "121714")] +pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8; + +/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char` +/// to UTF-16 encoding. +#[unstable(feature = "char_max_len", issue = "121714")] +pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16; + /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a /// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead. #[stable(feature = "decode_utf16", since = "1.9.0")] diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index a1bf3a4d7a706..764e7fff33ec7 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -3,7 +3,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell}; -use crate::char::EscapeDebugExtArgs; +use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8}; use crate::marker::PhantomData; use crate::num::fmt as numfmt; use crate::ops::Deref; @@ -187,7 +187,7 @@ pub trait Write { /// ``` #[stable(feature = "fmt_write_char", since = "1.1.0")] fn write_char(&mut self, c: char) -> Result { - self.write_str(c.encode_utf8(&mut [0; 4])) + self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8])) } /// Glue for usage of the [`write!`] macro with implementors of this trait. @@ -2768,7 +2768,7 @@ impl Display for char { if f.options.width.is_none() && f.options.precision.is_none() { f.write_char(*self) } else { - f.pad(self.encode_utf8(&mut [0; 4])) + f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8])) } } } diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 52e2364893eb1..2d941adfd859c 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -38,6 +38,7 @@ issue = "27721" )] +use crate::char::MAX_LEN_UTF8; use crate::cmp::Ordering; use crate::convert::TryInto as _; use crate::slice::memchr; @@ -561,8 +562,8 @@ impl Pattern for char { type Searcher<'a> = CharSearcher<'a>; #[inline] - fn into_searcher(self, haystack: &str) -> Self::Searcher<'_> { - let mut utf8_encoded = [0; 4]; + fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> { + let mut utf8_encoded = [0; MAX_LEN_UTF8]; let utf8_size = self .encode_utf8(&mut utf8_encoded) .len() diff --git a/library/coretests/tests/char.rs b/library/coretests/tests/char.rs index 6422387e9560b..153fb36925e66 100644 --- a/library/coretests/tests/char.rs +++ b/library/coretests/tests/char.rs @@ -1,3 +1,4 @@ +use std::char::MAX_LEN_UTF8; use std::str::FromStr; use std::{char, str}; @@ -259,7 +260,7 @@ fn test_escape_unicode() { #[test] fn test_encode_utf8() { fn check(input: char, expect: &[u8]) { - let mut buf = [0; 4]; + let mut buf = [0; MAX_LEN_UTF8]; let ptr = buf.as_ptr(); let s = input.encode_utf8(&mut buf); assert_eq!(s.as_ptr() as usize, ptr as usize); diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs index 7fe7286260858..adceeb101ff35 100644 --- a/library/coretests/tests/lib.rs +++ b/library/coretests/tests/lib.rs @@ -13,6 +13,7 @@ #![feature(bigint_helper_methods)] #![feature(bstr)] #![feature(cell_update)] +#![feature(char_max_len)] #![feature(clone_to_uninit)] #![feature(const_eval_select)] #![feature(const_swap_nonoverlapping)] diff --git a/library/std/src/fs/tests.rs b/library/std/src/fs/tests.rs index 28f16da1ed8d2..3e26e7007144a 100644 --- a/library/std/src/fs/tests.rs +++ b/library/std/src/fs/tests.rs @@ -1,5 +1,6 @@ use rand::RngCore; +use crate::char::MAX_LEN_UTF8; use crate::fs::{self, File, FileTimes, OpenOptions}; use crate::io::prelude::*; use crate::io::{BorrowedBuf, ErrorKind, SeekFrom}; @@ -155,7 +156,7 @@ fn file_test_io_non_positional_read() { #[test] fn file_test_io_seek_and_tell_smoke_test() { let message = "ten-four"; - let mut read_mem = [0; 4]; + let mut read_mem = [0; MAX_LEN_UTF8]; let set_cursor = 4 as u64; let tell_pos_pre_read; let tell_pos_post_read; @@ -356,7 +357,7 @@ fn file_test_io_seek_shakedown() { let chunk_one: &str = "qwer"; let chunk_two: &str = "asdf"; let chunk_three: &str = "zxcv"; - let mut read_mem = [0; 4]; + let mut read_mem = [0; MAX_LEN_UTF8]; let tmpdir = tmpdir(); let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt"); { @@ -621,7 +622,7 @@ fn file_test_directoryinfo_readdir() { check!(w.write(msg)); } let files = check!(fs::read_dir(dir)); - let mut mem = [0; 4]; + let mut mem = [0; MAX_LEN_UTF8]; for f in files { let f = f.unwrap().path(); { diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index acb3a0578e505..f75c56d4fa510 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -281,6 +281,7 @@ #![feature(cfg_sanitizer_cfi)] #![feature(cfg_target_thread_local)] #![feature(cfi_encoding)] +#![feature(char_max_len)] #![feature(concat_idents)] #![feature(decl_macro)] #![feature(deprecated_suggestion)] diff --git a/library/std/src/sys/pal/windows/stdio.rs b/library/std/src/sys/pal/windows/stdio.rs index fd3f559ba1901..1b245991aa797 100644 --- a/library/std/src/sys/pal/windows/stdio.rs +++ b/library/std/src/sys/pal/windows/stdio.rs @@ -1,5 +1,6 @@ #![unstable(issue = "none", feature = "windows_stdio")] +use core::char::MAX_LEN_UTF8; use core::str::utf8_char_width; use super::api::{self, WinError}; @@ -426,7 +427,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result { impl IncompleteUtf8 { pub const fn new() -> IncompleteUtf8 { - IncompleteUtf8 { bytes: [0; 4], len: 0 } + IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 } } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 6c60d901ee904..a7704a65fc9f5 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -18,7 +18,7 @@ #[cfg(test)] mod tests; -use core::char::{encode_utf8_raw, encode_utf16_raw}; +use core::char::{MAX_LEN_UTF8, MAX_LEN_UTF16, encode_utf8_raw, encode_utf16_raw}; use core::clone::CloneToUninit; use core::str::next_code_point; @@ -240,7 +240,7 @@ impl Wtf8Buf { /// Copied from String::push /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. fn push_code_point_unchecked(&mut self, code_point: CodePoint) { - let mut bytes = [0; 4]; + let mut bytes = [0; MAX_LEN_UTF8]; let bytes = encode_utf8_raw(code_point.value, &mut bytes); self.bytes.extend_from_slice(bytes) } @@ -1001,7 +1001,7 @@ impl<'a> Iterator for EncodeWide<'a> { return Some(tmp); } - let mut buf = [0; 2]; + let mut buf = [0; MAX_LEN_UTF16]; self.code_points.next().map(|code_point| { let n = encode_utf16_raw(code_point.value, &mut buf).len(); if n == 2 {