Skip to content

Commit

Permalink
ilex: Add token silencing to Stream
Browse files Browse the repository at this point in the history
  • Loading branch information
mcy committed Jan 26, 2025
1 parent 4bf70b8 commit 01d6455
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 27 deletions.
1 change: 1 addition & 0 deletions ilex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ regex-syntax = "0.8.2"
regex-automata = "0.4.3" # Bless Andrew for his patience.
rustc_apfloat = "0.2.0" # By eddyb's recommendation.
unicode-xid = "0.2.4"
bitvec = "1.0.1"
2 changes: 2 additions & 0 deletions ilex/src/rt/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::num::NonZeroU32;
use std::ops::Index;
use std::ops::RangeBounds;

use bitvec::vec::BitVec;
use byteyarn::Yarn;
use regex_automata::hybrid::dfa::Cache;

Expand Down Expand Up @@ -57,6 +58,7 @@ impl<'a, 'ctx> Lexer<'a, 'ctx> {
toks: Vec::new(),
meta_idx: Vec::new(),
meta: Vec::new(),
silent: BitVec::new(),
},

cursor: 0,
Expand Down
94 changes: 67 additions & 27 deletions ilex/src/token/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::mem;
use std::num::NonZeroU32;
use std::slice;

use bitvec::vec::BitVec;

use crate::file::Context;
use crate::file::File;
use crate::file::Span;
Expand All @@ -15,6 +17,8 @@ use crate::spec::Lexeme;
use crate::spec::Spec;
use crate::token;

use super::Token;

/// A tree-like stream of tokens.
///
/// This is type returned by by [`File::lex()`] when lexing succeeds.
Expand All @@ -26,6 +30,8 @@ pub struct Stream<'ctx> {
pub(crate) toks: Vec<rt::Token>,
pub(crate) meta_idx: Vec<token::Id>,
pub(crate) meta: Vec<rt::Metadata>,

pub(crate) silent: BitVec, // Set of lexemes that have been silenced.
}

impl<'ctx> Stream<'ctx> {
Expand Down Expand Up @@ -65,6 +71,28 @@ impl<'ctx> Stream<'ctx> {
self.token_at_hint(id, meta_hint).unwrap()
}

/// Returns whether the given lexeme has been slienced.
pub fn is_silenced<R>(&self, lexeme: Lexeme<R>) -> bool {
self.silent.get(lexeme.index()).is_some_and(|p| *p)
}

/// Silences the given lexeme in this stream.
///
/// This means that all tokens with this lexeme will be skipped when yielded
/// from [`Cursor::next()`]. Use [`Cursor::noisy()`] to yield all tokens,
/// including silenced ones.
///
/// This is useful for tokens that can appear anywhere in the stream, but
/// which should be ignored unless they are being explicitly searched for.
/// This is useful, for example, for [`rule::LineEnd`] tokens.
pub fn silence<R>(&mut self, lexeme: Lexeme<R>) {
let idx = lexeme.index();
if self.silent.len() <= idx {
self.silent.resize(idx + 1, false);
}
self.silent.set(idx, true);
}

/// Returns the last token pushed to this stream.
pub(crate) fn last_token(&self) -> token::Any {
let mut cursor = self.cursor();
Expand Down Expand Up @@ -296,6 +324,22 @@ impl<'lex> Cursor<'lex> {
self.cursor >= self.end
}

/// Returns an iterator that yields all of the values in this cursor,
/// including silenced ones.
pub fn noisy(&mut self) -> impl Iterator<Item = token::Any<'lex>> + '_ {
iter::from_fn(move || loop {
if self.is_empty() {
return None;
}

let next = self.stream.token_at_hint(self.id(), self.meta_cursor);
self.step_forward();
if next.is_some() {
return next;
}
})
}

/// Returns the next token under the cursor without consuming it.
pub fn peek_any(&self) -> Option<token::Any<'lex>> {
let mut copy = *self;
Expand Down Expand Up @@ -514,18 +558,8 @@ impl fmt::Debug for Cursor<'_> {
impl<'lex> Iterator for Cursor<'lex> {
type Item = token::Any<'lex>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.is_empty() {
return None;
}

let next = self.stream.token_at_hint(self.id(), self.meta_cursor);
self.step_forward();

if next.is_some() {
return next;
}
}
let stream = self.stream;
self.noisy().find(|next| !stream.is_silenced(next.lexeme()))
}
}

Expand Down Expand Up @@ -623,24 +657,30 @@ pub mod switch {
where
X: Impl<'lex, T>,
{
let Some(next) = cursor.next() else {
report.builtins(cursor.spec()).expected(
self.0.lexemes(0),
Lexeme::eof(),
cursor.end(),
);
loop {
let Some(next) = cursor.noisy().next() else {
report.builtins(cursor.spec()).expected(
self.0.lexemes(0),
Lexeme::eof(),
cursor.end(),
);

return None;
};
return None;
};

if let Some(found) = self.0.apply(next, cursor) {
return Some(found);
}
if let Some(found) = self.0.apply(next, cursor) {
return Some(found);
}

report
.builtins(cursor.spec())
.expected(self.0.lexemes(0), next, next);
None
if cursor.stream.is_silenced(next.lexeme()) {
continue;
}

report
.builtins(cursor.spec())
.expected(self.0.lexemes(0), next, next);
return None;
}
}

/// Takes the next token from `cursor` and matches it against this switch.
Expand Down

0 comments on commit 01d6455

Please sign in to comment.