@@ -130,7 +130,7 @@ impl<'a> StringReader<'a> {
130
130
self . ch . is_none ( )
131
131
}
132
132
133
- fn fail_unterminated_raw_string ( & self , pos : BytePos , hash_count : u16 ) {
133
+ fn fail_unterminated_raw_string ( & self , pos : BytePos , hash_count : u16 ) -> ! {
134
134
let mut err = self . struct_span_fatal ( pos, pos, "unterminated raw string" ) ;
135
135
err. span_label ( self . mk_sp ( pos, pos) , "unterminated raw string" ) ;
136
136
@@ -292,15 +292,6 @@ impl<'a> StringReader<'a> {
292
292
self . sess . span_diagnostic . struct_span_fatal ( self . mk_sp ( from_pos, to_pos) , & m[ ..] )
293
293
}
294
294
295
- /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
296
- /// escaped character to the error message
297
- fn err_span_char ( & self , from_pos : BytePos , to_pos : BytePos , m : & str , c : char ) {
298
- let mut m = m. to_string ( ) ;
299
- m. push_str ( ": " ) ;
300
- push_escaped_char ( & mut m, c) ;
301
- self . err_span_ ( from_pos, to_pos, & m[ ..] ) ;
302
- }
303
-
304
295
/// Advance peek_token to refer to the next token, and
305
296
/// possibly update the interner.
306
297
fn advance_token ( & mut self ) -> Result < ( ) , ( ) > {
@@ -1070,7 +1061,13 @@ impl<'a> StringReader<'a> {
1070
1061
self . validate_byte_str_escape ( start_with_quote) ;
1071
1062
( token:: ByteStr , symbol)
1072
1063
} ,
1073
- Some ( 'r' ) => self . scan_raw_byte_string ( ) ,
1064
+ Some ( 'r' ) => {
1065
+ let ( start, end, hash_count) = self . scan_raw_string ( ) ;
1066
+ let symbol = self . name_from_to ( start, end) ;
1067
+ self . validate_raw_byte_str_escape ( start, end) ;
1068
+
1069
+ ( token:: ByteStrRaw ( hash_count) , symbol)
1070
+ }
1074
1071
_ => unreachable ! ( ) , // Should have been a token::Ident above.
1075
1072
} ;
1076
1073
let suffix = self . scan_optional_raw_name ( ) ;
@@ -1086,79 +1083,9 @@ impl<'a> StringReader<'a> {
1086
1083
Ok ( TokenKind :: lit ( token:: Str , symbol, suffix) )
1087
1084
}
1088
1085
'r' => {
1089
- let start_bpos = self . pos ;
1090
- self . bump ( ) ;
1091
- let mut hash_count: u16 = 0 ;
1092
- while self . ch_is ( '#' ) {
1093
- if hash_count == 65535 {
1094
- let bpos = self . next_pos ;
1095
- self . fatal_span_ ( start_bpos,
1096
- bpos,
1097
- "too many `#` symbols: raw strings may be \
1098
- delimited by up to 65535 `#` symbols") . raise ( ) ;
1099
- }
1100
- self . bump ( ) ;
1101
- hash_count += 1 ;
1102
- }
1103
-
1104
- if self . is_eof ( ) {
1105
- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1106
- } else if !self . ch_is ( '"' ) {
1107
- let last_bpos = self . pos ;
1108
- let curr_char = self . ch . unwrap ( ) ;
1109
- self . fatal_span_char ( start_bpos,
1110
- last_bpos,
1111
- "found invalid character; only `#` is allowed \
1112
- in raw string delimitation",
1113
- curr_char) . raise ( ) ;
1114
- }
1115
- self . bump ( ) ;
1116
- let content_start_bpos = self . pos ;
1117
- let mut content_end_bpos;
1118
- let mut valid = true ;
1119
- ' outer: loop {
1120
- if self . is_eof ( ) {
1121
- self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1122
- }
1123
- // if self.ch_is('"') {
1124
- // content_end_bpos = self.pos;
1125
- // for _ in 0..hash_count {
1126
- // self.bump();
1127
- // if !self.ch_is('#') {
1128
- // continue 'outer;
1129
- let c = self . ch . unwrap ( ) ;
1130
- match c {
1131
- '"' => {
1132
- content_end_bpos = self . pos ;
1133
- for _ in 0 ..hash_count {
1134
- self . bump ( ) ;
1135
- if !self . ch_is ( '#' ) {
1136
- continue ' outer;
1137
- }
1138
- }
1139
- break ;
1140
- }
1141
- '\r' => {
1142
- if !self . nextch_is ( '\n' ) {
1143
- let last_bpos = self . pos ;
1144
- self . err_span_ ( start_bpos,
1145
- last_bpos,
1146
- "bare CR not allowed in raw string, use \\ r \
1147
- instead") ;
1148
- valid = false ;
1149
- }
1150
- }
1151
- _ => ( ) ,
1152
- }
1153
- self . bump ( ) ;
1154
- }
1155
-
1156
- self . bump ( ) ;
1157
- let symbol = if valid {
1158
- self . name_from_to ( content_start_bpos, content_end_bpos)
1159
- } else {
1160
- Symbol :: intern ( "??" )
1161
- } ;
1086
+ let ( start, end, hash_count) = self . scan_raw_string ( ) ;
1087
+ let symbol = self . name_from_to ( start, end) ;
1088
+ self . validate_raw_str_escape ( start, end) ;
1162
1089
let suffix = self . scan_optional_raw_name ( ) ;
1163
1090
1164
1091
Ok ( TokenKind :: lit ( token:: StrRaw ( hash_count) , symbol, suffix) )
@@ -1315,16 +1242,18 @@ impl<'a> StringReader<'a> {
1315
1242
id
1316
1243
}
1317
1244
1318
- fn scan_raw_byte_string ( & mut self ) -> ( token:: LitKind , Symbol ) {
1245
+ /// Scans a raw (byte) string, returning byte position range for `"<literal>"`
1246
+ /// (including quotes) along with `#` character count in `(b)r##..."<literal>"##...`;
1247
+ fn scan_raw_string ( & mut self ) -> ( BytePos , BytePos , u16 ) {
1319
1248
let start_bpos = self . pos ;
1320
1249
self . bump ( ) ;
1321
- let mut hash_count = 0 ;
1250
+ let mut hash_count: u16 = 0 ;
1322
1251
while self . ch_is ( '#' ) {
1323
1252
if hash_count == 65535 {
1324
1253
let bpos = self . next_pos ;
1325
1254
self . fatal_span_ ( start_bpos,
1326
1255
bpos,
1327
- "too many `#` symbols: raw byte strings may be \
1256
+ "too many `#` symbols: raw strings may be \
1328
1257
delimited by up to 65535 `#` symbols") . raise ( ) ;
1329
1258
}
1330
1259
self . bump ( ) ;
@@ -1334,13 +1263,13 @@ impl<'a> StringReader<'a> {
1334
1263
if self . is_eof ( ) {
1335
1264
self . fail_unterminated_raw_string ( start_bpos, hash_count) ;
1336
1265
} else if !self . ch_is ( '"' ) {
1337
- let pos = self . pos ;
1338
- let ch = self . ch . unwrap ( ) ;
1266
+ let last_bpos = self . pos ;
1267
+ let curr_char = self . ch . unwrap ( ) ;
1339
1268
self . fatal_span_char ( start_bpos,
1340
- pos ,
1341
- "found invalid character; only `#` is allowed in raw \
1342
- string delimitation",
1343
- ch ) . raise ( ) ;
1269
+ last_bpos ,
1270
+ "found invalid character; only `#` is allowed \
1271
+ in raw string delimitation",
1272
+ curr_char ) . raise ( ) ;
1344
1273
}
1345
1274
self . bump ( ) ;
1346
1275
let content_start_bpos = self . pos ;
@@ -1360,19 +1289,14 @@ impl<'a> StringReader<'a> {
1360
1289
}
1361
1290
break ;
1362
1291
}
1363
- Some ( c) => {
1364
- if c > '\x7F' {
1365
- let pos = self . pos ;
1366
- self . err_span_char ( pos, pos, "raw byte string must be ASCII" , c) ;
1367
- }
1368
- }
1292
+ _ => ( ) ,
1369
1293
}
1370
1294
self . bump ( ) ;
1371
1295
}
1372
1296
1373
1297
self . bump ( ) ;
1374
1298
1375
- ( token :: ByteStrRaw ( hash_count ) , self . name_from_to ( content_start_bpos, content_end_bpos) )
1299
+ ( content_start_bpos, content_end_bpos, hash_count )
1376
1300
}
1377
1301
1378
1302
fn validate_char_escape ( & self , start_with_quote : BytePos ) {
@@ -1422,6 +1346,40 @@ impl<'a> StringReader<'a> {
1422
1346
} ) ;
1423
1347
}
1424
1348
1349
+ fn validate_raw_str_escape ( & self , content_start : BytePos , content_end : BytePos ) {
1350
+ self . with_str_from_to ( content_start, content_end, |lit : & str | {
1351
+ unescape:: unescape_raw_str ( lit, & mut |range, c| {
1352
+ if let Err ( err) = c {
1353
+ emit_unescape_error (
1354
+ & self . sess . span_diagnostic ,
1355
+ lit,
1356
+ self . mk_sp ( content_start - BytePos ( 1 ) , content_end + BytePos ( 1 ) ) ,
1357
+ unescape:: Mode :: Str ,
1358
+ range,
1359
+ err,
1360
+ )
1361
+ }
1362
+ } )
1363
+ } ) ;
1364
+ }
1365
+
1366
+ fn validate_raw_byte_str_escape ( & self , content_start : BytePos , content_end : BytePos ) {
1367
+ self . with_str_from_to ( content_start, content_end, |lit : & str | {
1368
+ unescape:: unescape_raw_byte_str ( lit, & mut |range, c| {
1369
+ if let Err ( err) = c {
1370
+ emit_unescape_error (
1371
+ & self . sess . span_diagnostic ,
1372
+ lit,
1373
+ self . mk_sp ( content_start - BytePos ( 1 ) , content_end + BytePos ( 1 ) ) ,
1374
+ unescape:: Mode :: ByteStr ,
1375
+ range,
1376
+ err,
1377
+ )
1378
+ }
1379
+ } )
1380
+ } ) ;
1381
+ }
1382
+
1425
1383
fn validate_byte_str_escape ( & self , start_with_quote : BytePos ) {
1426
1384
self . with_str_from_to ( start_with_quote + BytePos ( 1 ) , self . pos - BytePos ( 1 ) , |lit| {
1427
1385
unescape:: unescape_byte_str ( lit, & mut |range, c| {
0 commit comments