Skip to content

Commit bedbf72

Browse files
committed
Auto merge of #50566 - nnethercote:bump, r=petrochenkov
Streamline `StringReader::bump` These patches make `bump` smaller and nicer. They speed up most runs for coercions and tuple-stress by 1--3%.
2 parents 612ca14 + e913d69 commit bedbf72

File tree

4 files changed

+66
-65
lines changed

4 files changed

+66
-65
lines changed

src/libsyntax/parse/lexer/comments.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,19 @@ fn read_block_comment(rdr: &mut StringReader,
238238
debug!(">>> block comment");
239239
let p = rdr.pos;
240240
let mut lines: Vec<String> = Vec::new();
241-
let col = rdr.col;
241+
242+
// Count the number of chars since the start of the line by rescanning.
243+
let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos());
244+
let end_src_index = rdr.src_index(rdr.pos);
245+
assert!(src_index <= end_src_index);
246+
let mut n = 0;
247+
while src_index < end_src_index {
248+
let c = char_at(&rdr.src, src_index);
249+
src_index += c.len_utf8();
250+
n += 1;
251+
}
252+
let col = CharPos(n);
253+
242254
rdr.bump();
243255
rdr.bump();
244256

src/libsyntax/parse/lexer/mod.rs

+42-64
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,11 @@ pub struct StringReader<'a> {
4444
pub next_pos: BytePos,
4545
/// The absolute offset within the codemap of the current character
4646
pub pos: BytePos,
47-
/// The column of the next character to read
48-
pub col: CharPos,
4947
/// The current character (which has been read from self.pos)
5048
pub ch: Option<char>,
5149
pub filemap: Lrc<syntax_pos::FileMap>,
52-
/// If Some, stop reading the source at this position (inclusive).
53-
pub terminator: Option<BytePos>,
50+
/// Stop reading src at this index.
51+
pub end_src_index: usize,
5452
/// Whether to record new-lines and multibyte chars in filemap.
5553
/// This is only necessary the first time a filemap is lexed.
5654
/// If part of a filemap is being re-lexed, this should be set to false.
@@ -61,7 +59,7 @@ pub struct StringReader<'a> {
6159
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
6260
// cache a direct reference to the source text, so that we don't have to
6361
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
64-
source_text: Lrc<String>,
62+
src: Lrc<String>,
6563
/// Stack of open delimiters and their spans. Used for error message.
6664
token: token::Token,
6765
span: Span,
@@ -113,14 +111,7 @@ impl<'a> StringReader<'a> {
113111
self.unwrap_or_abort(res)
114112
}
115113
fn is_eof(&self) -> bool {
116-
if self.ch.is_none() {
117-
return true;
118-
}
119-
120-
match self.terminator {
121-
Some(t) => self.next_pos > t,
122-
None => false,
123-
}
114+
self.ch.is_none()
124115
}
125116
/// Return the next token. EFFECT: advances the string_reader.
126117
pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
@@ -176,21 +167,20 @@ impl<'a> StringReader<'a> {
176167
filemap.name));
177168
}
178169

179-
let source_text = (*filemap.src.as_ref().unwrap()).clone();
170+
let src = (*filemap.src.as_ref().unwrap()).clone();
180171

181172
StringReader {
182173
sess,
183174
next_pos: filemap.start_pos,
184175
pos: filemap.start_pos,
185-
col: CharPos(0),
186176
ch: Some('\n'),
187177
filemap,
188-
terminator: None,
178+
end_src_index: src.len(),
189179
save_new_lines_and_multibyte: true,
190180
// dummy values; not read
191181
peek_tok: token::Eof,
192182
peek_span: syntax_pos::DUMMY_SP,
193-
source_text,
183+
src,
194184
fatal_errs: Vec::new(),
195185
token: token::Eof,
196186
span: syntax_pos::DUMMY_SP,
@@ -222,7 +212,7 @@ impl<'a> StringReader<'a> {
222212
// Seek the lexer to the right byte range.
223213
sr.save_new_lines_and_multibyte = false;
224214
sr.next_pos = span.lo();
225-
sr.terminator = Some(span.hi());
215+
sr.end_src_index = sr.src_index(span.hi());
226216

227217
sr.bump();
228218

@@ -326,9 +316,7 @@ impl<'a> StringReader<'a> {
326316
/// offending string to the error message
327317
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
328318
m.push_str(": ");
329-
let from = self.byte_offset(from_pos).to_usize();
330-
let to = self.byte_offset(to_pos).to_usize();
331-
m.push_str(&self.source_text[from..to]);
319+
m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
332320
self.fatal_span_(from_pos, to_pos, &m[..])
333321
}
334322

@@ -354,8 +342,9 @@ impl<'a> StringReader<'a> {
354342
Ok(())
355343
}
356344

357-
fn byte_offset(&self, pos: BytePos) -> BytePos {
358-
(pos - self.filemap.start_pos)
345+
#[inline]
346+
fn src_index(&self, pos: BytePos) -> usize {
347+
(pos - self.filemap.start_pos).to_usize()
359348
}
360349

361350
/// Calls `f` with a string slice of the source text spanning from `start`
@@ -386,7 +375,7 @@ impl<'a> StringReader<'a> {
386375
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
387376
where F: FnOnce(&str) -> T
388377
{
389-
f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
378+
f(&self.src[self.src_index(start)..self.src_index(end)])
390379
}
391380

392381
/// Converts CRLF to LF in the given string, raising an error on bare CR.
@@ -438,47 +427,39 @@ impl<'a> StringReader<'a> {
438427
}
439428
}
440429

441-
442430
/// Advance the StringReader by one character. If a newline is
443431
/// discovered, add it to the FileMap's list of line start offsets.
444432
pub fn bump(&mut self) {
445-
let new_pos = self.next_pos;
446-
let new_byte_offset = self.byte_offset(new_pos).to_usize();
447-
let end = self.terminator.map_or(self.source_text.len(), |t| {
448-
self.byte_offset(t).to_usize()
449-
});
450-
if new_byte_offset < end {
451-
let old_ch_is_newline = self.ch.unwrap() == '\n';
452-
let new_ch = char_at(&self.source_text, new_byte_offset);
453-
let new_ch_len = new_ch.len_utf8();
454-
455-
self.ch = Some(new_ch);
456-
self.pos = new_pos;
457-
self.next_pos = new_pos + Pos::from_usize(new_ch_len);
458-
if old_ch_is_newline {
433+
let next_src_index = self.src_index(self.next_pos);
434+
if next_src_index < self.end_src_index {
435+
let next_ch = char_at(&self.src, next_src_index);
436+
let next_ch_len = next_ch.len_utf8();
437+
438+
if self.ch.unwrap() == '\n' {
459439
if self.save_new_lines_and_multibyte {
460-
self.filemap.next_line(self.pos);
440+
self.filemap.next_line(self.next_pos);
461441
}
462-
self.col = CharPos(0);
463-
} else {
464-
self.col = self.col + CharPos(1);
465442
}
466-
if new_ch_len > 1 {
443+
if next_ch_len > 1 {
467444
if self.save_new_lines_and_multibyte {
468-
self.filemap.record_multibyte_char(self.pos, new_ch_len);
445+
self.filemap.record_multibyte_char(self.next_pos, next_ch_len);
469446
}
470447
}
471-
self.filemap.record_width(self.pos, new_ch);
448+
self.filemap.record_width(self.next_pos, next_ch);
449+
450+
self.ch = Some(next_ch);
451+
self.pos = self.next_pos;
452+
self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
472453
} else {
473454
self.ch = None;
474-
self.pos = new_pos;
455+
self.pos = self.next_pos;
475456
}
476457
}
477458

478459
pub fn nextch(&self) -> Option<char> {
479-
let offset = self.byte_offset(self.next_pos).to_usize();
480-
if offset < self.source_text.len() {
481-
Some(char_at(&self.source_text, offset))
460+
let next_src_index = self.src_index(self.next_pos);
461+
if next_src_index < self.end_src_index {
462+
Some(char_at(&self.src, next_src_index))
482463
} else {
483464
None
484465
}
@@ -489,17 +470,15 @@ impl<'a> StringReader<'a> {
489470
}
490471

491472
pub fn nextnextch(&self) -> Option<char> {
492-
let offset = self.byte_offset(self.next_pos).to_usize();
493-
let s = &self.source_text[..];
494-
if offset >= s.len() {
495-
return None;
496-
}
497-
let next = offset + char_at(s, offset).len_utf8();
498-
if next < s.len() {
499-
Some(char_at(s, next))
500-
} else {
501-
None
473+
let next_src_index = self.src_index(self.next_pos);
474+
if next_src_index < self.end_src_index {
475+
let next_next_src_index =
476+
next_src_index + char_at(&self.src, next_src_index).len_utf8();
477+
if next_next_src_index < self.end_src_index {
478+
return Some(char_at(&self.src, next_next_src_index));
479+
}
502480
}
481+
None
503482
}
504483

505484
pub fn nextnextch_is(&self, c: char) -> bool {
@@ -1359,8 +1338,8 @@ impl<'a> StringReader<'a> {
13591338
loop {
13601339
self.bump();
13611340
if self.ch_is('\'') {
1362-
let start = self.byte_offset(start).to_usize();
1363-
let end = self.byte_offset(self.pos).to_usize();
1341+
let start = self.src_index(start);
1342+
let end = self.src_index(self.pos);
13641343
self.bump();
13651344
let span = self.mk_sp(start_with_quote, self.pos);
13661345
self.sess.span_diagnostic
@@ -1369,8 +1348,7 @@ impl<'a> StringReader<'a> {
13691348
.span_suggestion(span,
13701349
"if you meant to write a `str` literal, \
13711350
use double quotes",
1372-
format!("\"{}\"",
1373-
&self.source_text[start..end]))
1351+
format!("\"{}\"", &self.src[start..end]))
13741352
.emit();
13751353
return Ok(token::Literal(token::Str_(Symbol::intern("??")), None))
13761354
}

src/libsyntax/str.rs

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
#[inline]
1112
pub fn char_at(s: &str, byte: usize) -> char {
1213
s[byte..].chars().next().unwrap()
1314
}

src/libsyntax_pos/lib.rs

+10
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,15 @@ impl FileMap {
971971
lines.push(pos);
972972
}
973973

974+
/// Return the BytePos of the beginning of the current line.
975+
pub fn line_begin_pos(&self) -> BytePos {
976+
let lines = self.lines.borrow();
977+
match lines.last() {
978+
Some(&line_pos) => line_pos,
979+
None => self.start_pos,
980+
}
981+
}
982+
974983
/// Add externally loaded source.
975984
/// If the hash of the input doesn't match or no input is supplied via None,
976985
/// it is interpreted as an error and the corresponding enum variant is set.
@@ -1047,6 +1056,7 @@ impl FileMap {
10471056
self.multibyte_chars.borrow_mut().push(mbc);
10481057
}
10491058

1059+
#[inline]
10501060
pub fn record_width(&self, pos: BytePos, ch: char) {
10511061
let width = match ch {
10521062
'\t' =>

0 commit comments

Comments
 (0)