Skip to content

Commit 01d6455

Browse files
committed
ilex: Add token silencing to Stream
1 parent 4bf70b8 commit 01d6455

File tree

3 files changed

+70
-27
lines changed

3 files changed

+70
-27
lines changed

ilex/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ regex-syntax = "0.8.2"
2626
regex-automata = "0.4.3" # Bless Andrew for his patience.
2727
rustc_apfloat = "0.2.0" # By eddyb's recommendation.
2828
unicode-xid = "0.2.4"
29+
bitvec = "1.0.1"

ilex/src/rt/lexer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::num::NonZeroU32;
33
use std::ops::Index;
44
use std::ops::RangeBounds;
55

6+
use bitvec::vec::BitVec;
67
use byteyarn::Yarn;
78
use regex_automata::hybrid::dfa::Cache;
89

@@ -57,6 +58,7 @@ impl<'a, 'ctx> Lexer<'a, 'ctx> {
5758
toks: Vec::new(),
5859
meta_idx: Vec::new(),
5960
meta: Vec::new(),
61+
silent: BitVec::new(),
6062
},
6163

6264
cursor: 0,

ilex/src/token/stream.rs

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use std::mem;
44
use std::num::NonZeroU32;
55
use std::slice;
66

7+
use bitvec::vec::BitVec;
8+
79
use crate::file::Context;
810
use crate::file::File;
911
use crate::file::Span;
@@ -15,6 +17,8 @@ use crate::spec::Lexeme;
1517
use crate::spec::Spec;
1618
use crate::token;
1719

20+
use super::Token;
21+
1822
/// A tree-like stream of tokens.
1923
///
2024
/// This is type returned by by [`File::lex()`] when lexing succeeds.
@@ -26,6 +30,8 @@ pub struct Stream<'ctx> {
2630
pub(crate) toks: Vec<rt::Token>,
2731
pub(crate) meta_idx: Vec<token::Id>,
2832
pub(crate) meta: Vec<rt::Metadata>,
33+
34+
pub(crate) silent: BitVec, // Set of lexemes that have been silenced.
2935
}
3036

3137
impl<'ctx> Stream<'ctx> {
@@ -65,6 +71,28 @@ impl<'ctx> Stream<'ctx> {
6571
self.token_at_hint(id, meta_hint).unwrap()
6672
}
6773

74+
/// Returns whether the given lexeme has been slienced.
75+
pub fn is_silenced<R>(&self, lexeme: Lexeme<R>) -> bool {
76+
self.silent.get(lexeme.index()).is_some_and(|p| *p)
77+
}
78+
79+
/// Silences the given lexeme in this stream.
80+
///
81+
/// This means that all tokens with this lexeme will be skipped when yielded
82+
/// from [`Cursor::next()`]. Use [`Cursor::noisy()`] to yield all tokens,
83+
/// including silenced ones.
84+
///
85+
/// This is useful for tokens that can appear anywhere in the stream, but
86+
/// which should be ignored unless they are being explicitly searched for.
87+
/// This is useful, for example, for [`rule::LineEnd`] tokens.
88+
pub fn silence<R>(&mut self, lexeme: Lexeme<R>) {
89+
let idx = lexeme.index();
90+
if self.silent.len() <= idx {
91+
self.silent.resize(idx + 1, false);
92+
}
93+
self.silent.set(idx, true);
94+
}
95+
6896
/// Returns the last token pushed to this stream.
6997
pub(crate) fn last_token(&self) -> token::Any {
7098
let mut cursor = self.cursor();
@@ -296,6 +324,22 @@ impl<'lex> Cursor<'lex> {
296324
self.cursor >= self.end
297325
}
298326

327+
/// Returns an iterator that yields all of the values in this cursor,
328+
/// including silenced ones.
329+
pub fn noisy(&mut self) -> impl Iterator<Item = token::Any<'lex>> + '_ {
330+
iter::from_fn(move || loop {
331+
if self.is_empty() {
332+
return None;
333+
}
334+
335+
let next = self.stream.token_at_hint(self.id(), self.meta_cursor);
336+
self.step_forward();
337+
if next.is_some() {
338+
return next;
339+
}
340+
})
341+
}
342+
299343
/// Returns the next token under the cursor without consuming it.
300344
pub fn peek_any(&self) -> Option<token::Any<'lex>> {
301345
let mut copy = *self;
@@ -514,18 +558,8 @@ impl fmt::Debug for Cursor<'_> {
514558
impl<'lex> Iterator for Cursor<'lex> {
515559
type Item = token::Any<'lex>;
516560
fn next(&mut self) -> Option<Self::Item> {
517-
loop {
518-
if self.is_empty() {
519-
return None;
520-
}
521-
522-
let next = self.stream.token_at_hint(self.id(), self.meta_cursor);
523-
self.step_forward();
524-
525-
if next.is_some() {
526-
return next;
527-
}
528-
}
561+
let stream = self.stream;
562+
self.noisy().find(|next| !stream.is_silenced(next.lexeme()))
529563
}
530564
}
531565

@@ -623,24 +657,30 @@ pub mod switch {
623657
where
624658
X: Impl<'lex, T>,
625659
{
626-
let Some(next) = cursor.next() else {
627-
report.builtins(cursor.spec()).expected(
628-
self.0.lexemes(0),
629-
Lexeme::eof(),
630-
cursor.end(),
631-
);
660+
loop {
661+
let Some(next) = cursor.noisy().next() else {
662+
report.builtins(cursor.spec()).expected(
663+
self.0.lexemes(0),
664+
Lexeme::eof(),
665+
cursor.end(),
666+
);
632667

633-
return None;
634-
};
668+
return None;
669+
};
635670

636-
if let Some(found) = self.0.apply(next, cursor) {
637-
return Some(found);
638-
}
671+
if let Some(found) = self.0.apply(next, cursor) {
672+
return Some(found);
673+
}
639674

640-
report
641-
.builtins(cursor.spec())
642-
.expected(self.0.lexemes(0), next, next);
643-
None
675+
if cursor.stream.is_silenced(next.lexeme()) {
676+
continue;
677+
}
678+
679+
report
680+
.builtins(cursor.spec())
681+
.expected(self.0.lexemes(0), next, next);
682+
return None;
683+
}
644684
}
645685

646686
/// Takes the next token from `cursor` and matches it against this switch.

0 commit comments

Comments
 (0)