@@ -291,7 +291,7 @@ mod prim_never {}
291
291
/// Surrogate code points, used by UTF-16, are in the range 0xD800 to 0xDFFF.
292
292
///
293
293
/// No `char` may be constructed, whether as a literal or at runtime, that is not a
294
- /// Unicode scalar value:
294
+ /// Unicode scalar value. Violating this rule causes undefined behavior.
295
295
///
296
296
/// ```compile_fail
297
297
/// // Each of these is a compiler error
@@ -308,9 +308,10 @@ mod prim_never {}
308
308
/// let _ = unsafe { char::from_u32_unchecked(0x110000) };
309
309
/// ```
310
310
///
311
- /// USVs are also the exact set of values that may be encoded in UTF-8. Because
312
- /// `char` values are USVs and `str` values are valid UTF-8, it is safe to store
313
- /// any `char` in a `str` or read any character from a `str` as a `char`.
311
+ /// Unicode scalar values are also the exact set of values that may be encoded in UTF-8. Because
312
+ /// `char` values are Unicode scalar values and functions may assume [incoming `str` values are
313
+ /// valid UTF-8](primitive.str.html#invariant), it is safe to store any `char` in a `str` or read
314
+ /// any character from a `str` as a `char`.
314
315
///
315
316
/// The gap in valid `char` values is understood by the compiler, so in the
316
317
/// below example the two ranges are understood to cover the whole range of
@@ -324,11 +325,10 @@ mod prim_never {}
324
325
/// };
325
326
/// ```
326
327
///
327
- /// All USVs are valid `char` values, but not all of them represent a real
328
- /// character. Many USVs are not currently assigned to a character, but may be
329
- /// in the future ("reserved"); some will never be a character
330
- /// ("noncharacters"); and some may be given different meanings by different
331
- /// users ("private use").
328
+ /// All Unicode scalar values are valid `char` values, but not all of them represent a real
329
+ /// character. Many Unicode scalar values are not currently assigned to a character, but may be in
330
+ /// the future ("reserved"); some will never be a character ("noncharacters"); and some may be given
331
+ /// different meanings by different users ("private use").
332
332
///
333
333
/// [Unicode code point]: https://www.unicode.org/glossary/#code_point
334
334
/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
@@ -887,8 +887,6 @@ mod prim_slice {}
887
887
/// type. It is usually seen in its borrowed form, `&str`. It is also the type
888
888
/// of string literals, `&'static str`.
889
889
///
890
- /// String slices are always valid UTF-8.
891
- ///
892
890
/// # Basic Usage
893
891
///
894
892
/// String literals are string slices:
@@ -942,6 +940,14 @@ mod prim_slice {}
942
940
/// Note: This example shows the internals of `&str`. `unsafe` should not be
943
941
/// used to get a string slice under normal circumstances. Use `as_str`
944
942
/// instead.
943
+ ///
944
+ /// # Invariant
945
+ ///
946
+ /// Rust libraries may assume that string slices are always valid UTF-8.
947
+ ///
948
+ /// Constructing a non-UTF-8 string slice is not immediate undefined behavior, but any function
949
+ /// called on a string slice may assume that it is valid UTF-8, which means that a non-UTF-8 string
950
+ /// slice can lead to undefined behavior down the road.
945
951
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
946
952
mod prim_str { }
947
953
0 commit comments