33use crate :: ast:: { self , LitKind , MetaItemLit , StrStyle } ;
44use crate :: token:: { self , Token } ;
55use rustc_lexer:: unescape:: {
6- byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit ,
7- Mode ,
6+ byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit , Mode ,
87} ;
98use rustc_span:: symbol:: { kw, sym, Symbol } ;
109use rustc_span:: Span ;
@@ -48,6 +47,9 @@ impl LitKind {
4847 return Err ( LitError :: InvalidSuffix ) ;
4948 }
5049
50+ // For byte/char/string literals, chars and escapes have already been
51+ // checked in the lexer (in `cook_lexer_literal`). So we can assume all
52+ // chars and escapes are valid here.
5153 Ok ( match kind {
5254 token:: Bool => {
5355 assert ! ( symbol. is_bool_lit( ) ) ;
@@ -56,12 +58,12 @@ impl LitKind {
5658 token:: Byte => {
5759 return unescape_byte ( symbol. as_str ( ) )
5860 . map ( LitKind :: Byte )
59- . map_err ( |_| LitError :: LexerError ) ;
61+ . map_err ( |_| panic ! ( "failed to unescape byte literal" ) ) ;
6062 }
6163 token:: Char => {
6264 return unescape_char ( symbol. as_str ( ) )
6365 . map ( LitKind :: Char )
64- . map_err ( |_| LitError :: LexerError ) ;
66+ . map_err ( |_| panic ! ( "failed to unescape char literal" ) ) ;
6567 }
6668
6769 // There are some valid suffixes for integer and float literals,
@@ -77,113 +79,69 @@ impl LitKind {
7779 let s = symbol. as_str ( ) ;
7880 // Vanilla strings are so common we optimize for the common case where no chars
7981 // requiring special behaviour are present.
80- let symbol = if s. contains ( [ '\\' , '\r' ] ) {
82+ let symbol = if s. contains ( '\\' ) {
8183 let mut buf = String :: with_capacity ( s. len ( ) ) ;
82- let mut error = Ok ( ( ) ) ;
8384 // Force-inlining here is aggressive but the closure is
84- // called on every char in the string, so it can be
85- // hot in programs with many long strings.
86- unescape_literal (
85+ // called on every char in the string, so it can be hot in
86+ // programs with many long strings containing escapes .
87+ unescape_unicode (
8788 s,
8889 Mode :: Str ,
8990 & mut #[ inline ( always) ]
90- |_, unescaped_char | match unescaped_char {
91+ |_, c | match c {
9192 Ok ( c) => buf. push ( c) ,
9293 Err ( err) => {
93- if err. is_fatal ( ) {
94- error = Err ( LitError :: LexerError ) ;
95- }
94+ assert ! ( !err. is_fatal( ) , "failed to unescape string literal" )
9695 }
9796 } ,
9897 ) ;
99- error?;
10098 Symbol :: intern ( & buf)
10199 } else {
102100 symbol
103101 } ;
104102 LitKind :: Str ( symbol, ast:: StrStyle :: Cooked )
105103 }
106104 token:: StrRaw ( n) => {
107- // Raw strings have no escapes, so we only need to check for invalid chars, and we
108- // can reuse the symbol on success.
109- let mut error = Ok ( ( ) ) ;
110- unescape_literal ( symbol. as_str ( ) , Mode :: RawStr , & mut |_, unescaped_char| {
111- match unescaped_char {
112- Ok ( _) => { }
113- Err ( err) => {
114- if err. is_fatal ( ) {
115- error = Err ( LitError :: LexerError ) ;
116- }
117- }
118- }
119- } ) ;
120- error?;
105+ // Raw strings have no escapes so no work is needed here.
121106 LitKind :: Str ( symbol, ast:: StrStyle :: Raw ( n) )
122107 }
123108 token:: ByteStr => {
124109 let s = symbol. as_str ( ) ;
125110 let mut buf = Vec :: with_capacity ( s. len ( ) ) ;
126- let mut error = Ok ( ( ) ) ;
127- unescape_literal ( s, Mode :: ByteStr , & mut |_, c| match c {
111+ unescape_unicode ( s, Mode :: ByteStr , & mut |_, c| match c {
128112 Ok ( c) => buf. push ( byte_from_char ( c) ) ,
129113 Err ( err) => {
130- if err. is_fatal ( ) {
131- error = Err ( LitError :: LexerError ) ;
132- }
114+ assert ! ( !err. is_fatal( ) , "failed to unescape string literal" )
133115 }
134116 } ) ;
135- error?;
136117 LitKind :: ByteStr ( buf. into ( ) , StrStyle :: Cooked )
137118 }
138119 token:: ByteStrRaw ( n) => {
139- // Raw strings have no escapes, so we only need to check for invalid chars, and we
140- // can convert the symbol directly to a `Lrc<u8>` on success.
141- let s = symbol. as_str ( ) ;
142- let mut error = Ok ( ( ) ) ;
143- unescape_literal ( s, Mode :: RawByteStr , & mut |_, c| match c {
144- Ok ( _) => { }
145- Err ( err) => {
146- if err. is_fatal ( ) {
147- error = Err ( LitError :: LexerError ) ;
148- }
149- }
150- } ) ;
151- LitKind :: ByteStr ( s. to_owned ( ) . into_bytes ( ) . into ( ) , StrStyle :: Raw ( n) )
120+ // Raw strings have no escapes so we can convert the symbol
121+ // directly to a `Lrc<u8>`.
122+ let buf = symbol. as_str ( ) . to_owned ( ) . into_bytes ( ) ;
123+ LitKind :: ByteStr ( buf. into ( ) , StrStyle :: Raw ( n) )
152124 }
153125 token:: CStr => {
154126 let s = symbol. as_str ( ) ;
155127 let mut buf = Vec :: with_capacity ( s. len ( ) ) ;
156- let mut error = Ok ( ( ) ) ;
157- unescape_c_string ( s, Mode :: CStr , & mut |_span, c| match c {
158- Ok ( CStrUnit :: Byte ( b) ) => buf. push ( b) ,
159- Ok ( CStrUnit :: Char ( c) ) => {
128+ unescape_mixed ( s, Mode :: CStr , & mut |_span, c| match c {
129+ Ok ( MixedUnit :: Char ( c) ) => {
160130 buf. extend_from_slice ( c. encode_utf8 ( & mut [ 0 ; 4 ] ) . as_bytes ( ) )
161131 }
132+ Ok ( MixedUnit :: HighByte ( b) ) => buf. push ( b) ,
162133 Err ( err) => {
163- if err. is_fatal ( ) {
164- error = Err ( LitError :: LexerError ) ;
165- }
134+ assert ! ( !err. is_fatal( ) , "failed to unescape C string literal" )
166135 }
167136 } ) ;
168- error?;
169137 buf. push ( 0 ) ;
170138 LitKind :: CStr ( buf. into ( ) , StrStyle :: Cooked )
171139 }
172140 token:: CStrRaw ( n) => {
173- // Raw strings have no escapes, so we only need to check for invalid chars, and we
174- // can convert the symbol directly to a `Lrc<u8>` on success.
175- let s = symbol. as_str ( ) ;
176- let mut error = Ok ( ( ) ) ;
177- unescape_c_string ( s, Mode :: RawCStr , & mut |_, c| match c {
178- Ok ( _) => { }
179- Err ( err) => {
180- if err. is_fatal ( ) {
181- error = Err ( LitError :: LexerError ) ;
182- }
183- }
184- } ) ;
185- error?;
186- let mut buf = s. to_owned ( ) . into_bytes ( ) ;
141+ // Raw strings have no escapes so we can convert the symbol
142+ // directly to a `Lrc<u8>` after appending the terminating NUL
143+ // char.
144+ let mut buf = symbol. as_str ( ) . to_owned ( ) . into_bytes ( ) ;
187145 buf. push ( 0 ) ;
188146 LitKind :: CStr ( buf. into ( ) , StrStyle :: Raw ( n) )
189147 }
0 commit comments