Skip to content

Commit e1c927b

Browse files
committed
Replaced multi-byte character handling in end_point with potentially more performant variant.
1 parent ee606c0 commit e1c927b

File tree

1 file changed

+59
-23
lines changed

1 file changed

+59
-23
lines changed

src/libsyntax/codemap.rs

+59-23
Original file line numberDiff line numberDiff line change
@@ -610,38 +610,74 @@ impl CodeMap {
610610

611611
/// Returns a new span representing just the end-point of this span
612612
pub fn end_point(&self, sp: Span) -> Span {
613-
let hi = sp.hi().0.checked_sub(1).unwrap_or(sp.hi().0);
614-
let hi = self.get_start_of_char_bytepos(BytePos(hi));
615-
let lo = cmp::max(hi.0, sp.lo().0);
616-
sp.with_lo(BytePos(lo))
613+
let pos = sp.hi().0;
614+
615+
let width = self.find_width_of_character_at_span(sp, false);
616+
let corrected_end_position = pos.checked_sub(width).unwrap_or(pos);
617+
618+
let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0));
619+
sp.with_lo(end_point)
617620
}
618621

619622
/// Returns a new span representing the next character after the end-point of this span
620623
pub fn next_point(&self, sp: Span) -> Span {
621-
let hi = sp.lo().0.checked_add(1).unwrap_or(sp.lo().0);
622-
let hi = self.get_start_of_char_bytepos(BytePos(hi));
623-
let lo = cmp::max(sp.hi().0, hi.0);
624-
Span::new(BytePos(lo), BytePos(lo), sp.ctxt())
624+
let pos = sp.lo().0;
625+
626+
let width = self.find_width_of_character_at_span(sp, true);
627+
let corrected_next_position = pos.checked_add(width).unwrap_or(pos);
628+
629+
let next_point = BytePos(cmp::max(sp.hi().0, corrected_next_position));
630+
Span::new(next_point, next_point, sp.ctxt())
625631
}
626632

627-
fn get_start_of_char_bytepos(&self, bpos: BytePos) -> BytePos {
628-
let idx = self.lookup_filemap_idx(bpos);
629-
let files = self.files.borrow();
630-
let map = &(*files)[idx];
633+
/// Finds the width of a character, either before or after the provided span.
634+
fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 {
635+
// Disregard malformed spans and assume a one-byte wide character.
636+
if sp.lo() > sp.hi() {
637+
return 1;
638+
}
631639

632-
for mbc in map.multibyte_chars.borrow().iter() {
633-
if mbc.pos < bpos {
634-
if bpos.to_usize() >= mbc.pos.to_usize() + mbc.bytes {
635-
// If we do, then return the start of the character.
636-
return mbc.pos;
637-
}
638-
} else {
639-
break;
640-
}
640+
let local_begin = self.lookup_byte_offset(sp.lo());
641+
let local_end = self.lookup_byte_offset(sp.hi());
642+
643+
let start_index = local_begin.pos.to_usize();
644+
let end_index = local_end.pos.to_usize();
645+
646+
// Disregard indexes that are at the start or end of their spans, they can't fit bigger
647+
// characters.
648+
if (!forwards && end_index == usize::min_value()) ||
649+
(forwards && start_index == usize::max_value()) {
650+
return 1;
651+
}
652+
653+
let source_len = (local_begin.fm.end_pos - local_begin.fm.start_pos).to_usize();
654+
// Ensure indexes are also not malformed.
655+
if start_index > end_index || end_index > source_len {
656+
return 1;
641657
}
642658

643-
// If this isn't a multibyte character, return the original position.
644-
return bpos;
659+
// We need to extend the snippet to the end of the src rather than to end_index so when
660+
// searching forwards for boundaries we've got somewhere to search.
661+
let snippet = if let Some(ref src) = local_begin.fm.src {
662+
let len = src.len();
663+
(&src[start_index..len]).to_string()
664+
} else if let Some(src) = local_begin.fm.external_src.borrow().get_source() {
665+
let len = src.len();
666+
(&src[start_index..len]).to_string()
667+
} else {
668+
return 1;
669+
};
670+
671+
let mut target = if forwards { end_index + 1 } else { end_index - 1 };
672+
while !snippet.is_char_boundary(target - start_index) {
673+
target = if forwards { target + 1 } else { target - 1 };
674+
}
675+
676+
if forwards {
677+
(target - end_index) as u32
678+
} else {
679+
(end_index - target) as u32
680+
}
645681
}
646682

647683
pub fn get_filemap(&self, filename: &FileName) -> Option<Rc<FileMap>> {

0 commit comments

Comments
 (0)