Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
 - Commit more code for add out of scope checking for scanner states.

 - Fix error which caused infinity loop in AST construction when production
   type resolution was skipped for rules that had a terminal as it's
   last symbol

 - Miscellaneous refactors
  • Loading branch information
acweathersby committed May 2, 2024
1 parent 0680a2d commit 40b1ad2
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 64 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ members = [
"./crates/radlr-build",
"./crates/radlr-core",
"./crates/radlr-bytecode",
#"./crates/radlr-x86",
"./crates/radlr-rust-runtime",
"./crates/radlr-test",
"./crates/radlr-formatter",
Expand Down
26 changes: 19 additions & 7 deletions crates/radlr-ascript/build_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,7 @@ fn collect_types(adb: &mut AscriptDatabase) {
Some(node) => {
extract_type_data(node.get_type().to_cardinal(), types);
}
None => {
unreachable!()
}
None => {}
},

_ => {}
Expand Down Expand Up @@ -174,7 +172,7 @@ pub fn extract_structs(db: &ParserDatabase, adb: &mut AscriptDatabase) {
let rule = &db_rule.rule;
let g_id = db_rule.rule.g_id;

let rule = match &rule.ast {
let ast_rule = match &rule.ast {
None => AscriptRule::LastSymbol(id, Initializer {
ty: AscriptType::Undefined,
name: Default::default(),
Expand Down Expand Up @@ -230,7 +228,7 @@ pub fn extract_structs(db: &ParserDatabase, adb: &mut AscriptDatabase) {
}
}

adb.rules.push(rule);
adb.rules.push(ast_rule);
}

#[allow(irrefutable_let_patterns)]
Expand Down Expand Up @@ -391,7 +389,9 @@ pub fn resolve_nonterm_types(db: &ParserDatabase, adb: &mut AscriptDatabase) ->
let set = rule_nonterms.entry(item.rule_id()).or_insert_with(|| OrderedSet::new());
set.insert(nonterm_key);
}
None => {}
None => {
rule_nonterms.entry(item.rule_id()).or_insert_with(|| OrderedSet::new());
}
}
}
Some(_) => {
Expand All @@ -405,7 +405,19 @@ pub fn resolve_nonterm_types(db: &ParserDatabase, adb: &mut AscriptDatabase) ->
// Start with rules that do not rely on non-term-values;
let mut queue = VecDeque::from_iter(rule_nonterms.iter().filter_map(|(r, s)| s.is_empty().then_some(*r)));

let max_iterations = db.rules().len().pow(2);
let mut total_iterations = 0;

while let Some(rule_id) = queue.pop_front() {
total_iterations += 1;

if total_iterations > max_iterations {
panic!(
"Could not resolve rule {}",
db.db_rule(rule_id).rule.tok.blame(1, 1, "could not resolve the AST of this rule", None)
)
}

let index: usize = rule_id.into();
let item = Item::from((rule_id, db));

Expand Down Expand Up @@ -457,8 +469,8 @@ pub fn resolve_nonterm_types(db: &ParserDatabase, adb: &mut AscriptDatabase) ->
let nonterm_id = db.db_rule(rule_id).nonterm;

if ty.is_unknown() {
queue.push_back(rule_id);
queue.extend(rule_nonterms.iter().filter_map(|(r, s)| s.contains(&nonterm_id).then_some(*r)));
queue.push_back(rule_id);
continue;
}

Expand Down
4 changes: 4 additions & 0 deletions crates/radlr-build/targets/rust/rust_bytecode_script.atat
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ impl RuntimeDatabase for @name {
fn token_id_to_str(&self, tok_id: u32) -> Option<&str> {
self.token_id_to_str.get(&tok_id).map(|s| *s)
}

fn entrypoints(&self) -> Vec<(std::string::String,\ u32)> {
vec![]
}
}

impl<T: ParserInput> ParserProducer<T> for @name {
Expand Down
24 changes: 17 additions & 7 deletions crates/radlr-core/compile/states/build_graph/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ pub(crate) fn handle_kernel_items(
if pred.state_type().is_peek() && pred.state_type().peek_level() > 0 && states_queued == 0 {
// Todo(anthony) : if peeking, determine if the peek has terminated in a
// non-deterministic way. If so, produce a NonDeterministicPeek error.
panic!("Undeterministic PARSE");
let root_data = pred.root_data.db_key;
//panic!("Undeterministic PARSE");
//let root_data = pred.root_data.db_key;

Err(RadlrError::StateConstructionError(crate::compile::states::build_states::StateConstructionError::NonDeterministicPeek(
pred.get_root_shared(),
Expand Down Expand Up @@ -92,6 +92,8 @@ fn get_firsts(gb: &mut ConcurrentGraphBuilder, pred: &GraphNode, config: &Parser
let mut oos_scan_completed_tokens = OrderedSet::<PrecedentDBTerm>::new();
let mut oos_scan_incompletes = OrderedSet::<PrecedentDBTerm>::new();

let mut ooos = false;

let mut too_process_items = Vec::new();

for item in pred.kernel_items() {
Expand All @@ -110,6 +112,7 @@ fn get_firsts(gb: &mut ConcurrentGraphBuilder, pred: &GraphNode, config: &Parser
too_process_items.extend(follow);
}
} else {
ooos = true;
if !item.is_initial() {
oos_scan_incompletes.insert(token);
}
Expand Down Expand Up @@ -171,9 +174,9 @@ fn handle_scanner_items(
) -> RadlrResult<GroupedFirsts> {
if node.is_scanner() {
for (_, (_, pair)) in &mut groups {
if pair.iter().any(|p| !p.kernel.is_oos()) {
if pair.iter().any(|p| !p.kernel.origin.is_scanner_oos()) {
// Remove all oos items from group
let filtered_items = pair.iter().filter_map(|i| (!i.kernel.is_oos()).then_some(*i)).collect::<Vec<_>>();
let filtered_items = pair.iter().filter_map(|i| (!i.kernel.origin.is_scanner_oos()).then_some(*i)).collect::<Vec<_>>();
*pair = filtered_items;
}
}
Expand Down Expand Up @@ -266,16 +269,23 @@ fn handle_completed_items(

// TODO(anthony) - create the correct filter to identify the number of rules
// that are being reduced (compare item indices.)
let default: Lookaheads = if completed.1.iter().to_kernel().items_are_the_same_rule() {
completed.1
let default: Lookaheads = if completed.1.iter().to_kernel().items_are_the_same_rule()
|| completed.1.iter().all(|i| i.kernel.origin.is_scanner_oos())
{
completed.1.clone()
} else {
lookahead_pairs.iter().filter(|i| i.is_eoi_complete()).cloned().collect()
};

if default.len() > 0 {
handle_completed_groups(gb, pred, config, groups, SymbolId::Default, default)?;
} else {
debug_assert!(!lookahead_pairs.is_empty())
#[cfg(debug_assertions)]
debug_assert!(
!lookahead_pairs.is_empty(),
"No default reduce! {pred:?} {}",
completed.1.iter().map(|c| c._debug_string_(gb.db())).collect::<Vec<_>>().join("\n")
)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ fn complete_scan(
first: TransitionPair,
) {
if first.kernel.origin.is_scanner_oos() {
let state = StagedNode::new(gb).parent(pred.clone()).make_leaf().sym(sym).ty(StateType::ScannerCompleteOOS).commit(gb);
StagedNode::new(gb).parent(pred.clone()).make_leaf().sym(sym).ty(StateType::ScannerCompleteOOS).commit(gb);
} else {
let (follow, completed_items): (Vec<Items>, Vec<Items>) =
completed.iter().into_iter().map(|i| get_follow_internal(gb, pred, i.kernel, FollowType::ScannerCompleted)).unzip();
Expand Down
19 changes: 11 additions & 8 deletions crates/radlr-core/compile/states/build_graph/flow/conflict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pub(super) fn resolve_reduce_reduce_conflict(
Ok(ReduceReduceConflictResolution::Peek(k as u16, follow_pairs))
}
_ => {
return Ok(ReduceReduceConflictResolution::Peek(100 as u16, follow_pairs));
#[cfg(debug_assertions)]
{
for (i, follow_pair) in follow_pairs.iter().enumerate() {
Expand All @@ -86,16 +87,18 @@ pub(super) fn resolve_reduce_reduce_conflict(
println!("{}", item._debug_string_w_db_(&gb.db()));
let origin_state_id = item.origin_state;

let origin_state = gb.get_state(origin_state_id.0 as u64).unwrap();
if let Some(origin_state) = gb.get_state(origin_state_id.0 as u64) {
let kernel_items = origin_state.kernel_items();

let kernel_items = origin_state.kernel_items();
if item.is_initial() && origin_state_id.is_root() {
break;
}

if item.is_initial() && origin_state_id.is_root() {
break;
}

if let Some(i) = kernel_items.iter().find(|i| item.is_successor_of(i)) {
item = *i;
if let Some(i) = kernel_items.iter().find(|i| item.is_successor_of(i)) {
item = *i;
} else {
break;
}
} else {
break;
}
Expand Down
33 changes: 17 additions & 16 deletions crates/radlr-core/compile/states/build_graph/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,8 @@ fn get_state_symbols<'a>(builder: &mut ConcurrentGraphBuilder, node: &GraphNode)
if scanner_data.symbols.is_empty() {
None
} else {
let hash_symbols = scanner_data.symbols.clone();
//hash_symbols.extend(scanner_data.follow.iter());
let hash = hash_id_value_u64((&scanner_data.skipped, hash_symbols));
let hash = hash_id_value_u64((&scanner_data.skipped, &scanner_data.symbols));

scanner_data.hash = hash;

Some(scanner_data)
Expand Down Expand Up @@ -996,18 +995,20 @@ impl ConcurrentGraphBuilder {
if state.is_scanner() && parser_config.ALLOW_LOOKAHEAD_SCANNERS {
if let Some(pred) = pred {
let kernel_items = &mut state.kernel;
let mut completed_symbols = OrderedSet::new();
for item in kernel_items.iter() {
if item.is_complete() {
if let Origin::TerminalGoal(t, p) = item.origin {
let term: PrecedentDBTerm = (t, p, false).into();
completed_symbols.insert(term);
if kernel_items.iter().any(|i| i.is_incomplete()) {
let mut completed_symbols = OrderedSet::new();
for item in kernel_items.iter() {
if item.is_complete() {
if let Origin::TerminalGoal(t, p) = item.origin {
let term: PrecedentDBTerm = (t, p, false).into();
completed_symbols.insert(term);
}
}
}
}

if !completed_symbols.is_empty() {
kernel_items.extend(self.get_oos_scanner_follow(pred, &completed_symbols));
if !completed_symbols.is_empty() {
kernel_items.extend(self.get_oos_scanner_follow(pred, &completed_symbols));
}
}
}
}
Expand All @@ -1018,7 +1019,7 @@ impl ConcurrentGraphBuilder {

let is_root = update_root_info(&mut state, pred);

state = self.append_state_hashes(is_root, state);
state = self.commit_state(is_root, state);

if !state.is_scanner() {
if let Some(scanner_data) = get_state_symbols(self, &state) {
Expand Down Expand Up @@ -1119,14 +1120,14 @@ impl ConcurrentGraphBuilder {
/// Create hash id's for the given state.
///
/// WARNING: Ensure the state's root_data is set before calling this method.
fn append_state_hashes(&mut self, is_root: bool, mut state: GraphNode) -> GraphNode {
fn commit_state(&mut self, is_root: bool, mut state: GraphNode) -> GraphNode {
let lookahead =
if is_root { 0 } else { create_lookahead_hash(self, &state, std::collections::hash_map::DefaultHasher::new()) };

let state_hash = create_state_hash(&state, lookahead, std::collections::hash_map::DefaultHasher::new());
state.hash_id = create_state_hash(&state, lookahead, std::collections::hash_map::DefaultHasher::new());

state.hash_id = state_hash;
state.id = StateId::new(state.hash_id as usize, is_root.then_some(GraphIdSubType::Root).unwrap_or(GraphIdSubType::Regular));

state.kernel = state
.kernel
.into_iter()
Expand Down
9 changes: 7 additions & 2 deletions crates/radlr-core/types/parser_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ pub struct ParserConfig {
/// Enables using wide data types ( u16 | u32 | u64 | u128 ) to recognize a
/// sequence of characters.
pub ALLOW_BYTE_SEQUENCES: bool,
/// Enables context sensitive scanners, which will track lookahead symbols in
/// their call site. May increase the number scanner states significantly.
/// Enables FOLLOW context sensitive scanners, which will consider the tokens
/// that _follow_ the states which the scanner is constructing tokens
/// for.
///
/// May significantly increase the number scanner states.
pub ALLOW_LOOKAHEAD_SCANNERS: bool,
/// The maximum number of lookead symbols allowed before parser construction
/// is aborted or a different disambiguating strategy is employed.
Expand Down Expand Up @@ -190,6 +193,8 @@ impl ParserConfig {
self
}

/// Adds FOLLOW aware scanning behavior. May significantly increase the
/// number of scanner states in more complex grammars.
pub fn use_lookahead_scanners(mut self, enable: bool) -> Self {
self.ALLOW_LOOKAHEAD_SCANNERS = enable;
self
Expand Down
Loading

0 comments on commit 40b1ad2

Please sign in to comment.