Skip to content

transmutability: remove NFA intermediate representation #139990

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 193 additions & 96 deletions compiler/rustc_transmute/src/layout/dfa.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
use std::fmt;
use std::sync::atomic::{AtomicU32, Ordering};

use tracing::instrument;

use super::{Byte, Nfa, Ref, nfa};
use super::{Byte, Ref, Tree, Uninhabited};
use crate::Map;

#[derive(PartialEq, Clone, Debug)]
#[derive(PartialEq)]
#[cfg_attr(test, derive(Clone))]
pub(crate) struct Dfa<R>
where
R: Ref,
{
pub(crate) transitions: Map<State, Transitions<R>>,
pub(crate) start: State,
pub(crate) accepting: State,
pub(crate) accept: State,
}

#[derive(PartialEq, Clone, Debug)]
Expand All @@ -34,35 +33,15 @@ where
}
}

impl<R> Transitions<R>
where
R: Ref,
{
#[cfg(test)]
fn insert(&mut self, transition: Transition<R>, state: State) {
match transition {
Transition::Byte(b) => {
self.byte_transitions.insert(b, state);
}
Transition::Ref(r) => {
self.ref_transitions.insert(r, state);
}
}
}
}

/// The states in a `Nfa` represent byte offsets.
/// The states in a [`Dfa`] represent byte offsets.
#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
pub(crate) struct State(u32);
pub(crate) struct State(pub(crate) u32);

#[cfg(test)]
#[derive(Hash, Eq, PartialEq, Clone, Copy)]
pub(crate) enum Transition<R>
where
R: Ref,
{
Byte(Byte),
Ref(R),
impl State {
pub(crate) fn new() -> Self {
static COUNTER: AtomicU32 = AtomicU32::new(0);
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
}
}

impl fmt::Debug for State {
Expand All @@ -71,19 +50,6 @@ impl fmt::Debug for State {
}
}

#[cfg(test)]
impl<R> fmt::Debug for Transition<R>
where
R: Ref,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self {
Self::Byte(b) => b.fmt(f),
Self::Ref(r) => r.fmt(f),
}
}
}

impl<R> Dfa<R>
where
R: Ref,
Expand All @@ -92,60 +58,167 @@ where
pub(crate) fn bool() -> Self {
let mut transitions: Map<State, Transitions<R>> = Map::default();
let start = State::new();
let accepting = State::new();
let accept = State::new();

transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x00)), accepting);
transitions.entry(start).or_default().byte_transitions.insert(Byte::Init(0x00), accept);

transitions.entry(start).or_default().insert(Transition::Byte(Byte::Init(0x01)), accepting);
transitions.entry(start).or_default().byte_transitions.insert(Byte::Init(0x01), accept);

Self { transitions, start, accepting }
Self { transitions, start, accept }
}

#[instrument(level = "debug")]
pub(crate) fn from_nfa(nfa: Nfa<R>) -> Self {
let Nfa { transitions: nfa_transitions, start: nfa_start, accepting: nfa_accepting } = nfa;
pub(crate) fn unit() -> Self {
let transitions: Map<State, Transitions<R>> = Map::default();
let start = State::new();
let accept = start;

Self { transitions, start, accept }
}

let mut dfa_transitions: Map<State, Transitions<R>> = Map::default();
let mut nfa_to_dfa: Map<nfa::State, State> = Map::default();
let dfa_start = State::new();
nfa_to_dfa.insert(nfa_start, dfa_start);
pub(crate) fn from_byte(byte: Byte) -> Self {
let mut transitions: Map<State, Transitions<R>> = Map::default();
let start = State::new();
let accept = State::new();

let mut queue = vec![(nfa_start, dfa_start)];
transitions.entry(start).or_default().byte_transitions.insert(byte, accept);

while let Some((nfa_state, dfa_state)) = queue.pop() {
if nfa_state == nfa_accepting {
continue;
}
Self { transitions, start, accept }
}

for (nfa_transition, next_nfa_states) in nfa_transitions[&nfa_state].iter() {
let dfa_transitions =
dfa_transitions.entry(dfa_state).or_insert_with(Default::default);

let mapped_state = next_nfa_states.iter().find_map(|x| nfa_to_dfa.get(x).copied());

let next_dfa_state = match nfa_transition {
&nfa::Transition::Byte(b) => *dfa_transitions
.byte_transitions
.entry(b)
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
&nfa::Transition::Ref(r) => *dfa_transitions
.ref_transitions
.entry(r)
.or_insert_with(|| mapped_state.unwrap_or_else(State::new)),
};

for &next_nfa_state in next_nfa_states {
nfa_to_dfa.entry(next_nfa_state).or_insert_with(|| {
queue.push((next_nfa_state, next_dfa_state));
next_dfa_state
});
pub(crate) fn from_ref(r: R) -> Self {
let mut transitions: Map<State, Transitions<R>> = Map::default();
let start = State::new();
let accept = State::new();

transitions.entry(start).or_default().ref_transitions.insert(r, accept);

Self { transitions, start, accept }
}

pub(crate) fn from_tree(tree: Tree<!, R>) -> Result<Self, Uninhabited> {
Ok(match tree {
Tree::Byte(b) => Self::from_byte(b),
Tree::Ref(r) => Self::from_ref(r),
Tree::Alt(alts) => {
// Convert and filter the inhabited alternatives.
let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
// If there are no alternatives, return `Uninhabited`.
let dfa = alts.next().ok_or(Uninhabited)?;
// Combine the remaining alternatives with `dfa`.
alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
}
Tree::Seq(elts) => {
let mut dfa = Self::unit();
for elt in elts.into_iter().map(Self::from_tree) {
dfa = dfa.concat(elt?);
}
dfa
}
})
}

/// Concatenate two `Dfa`s.
pub(crate) fn concat(self, other: Self) -> Self {
if self.start == self.accept {
return other;
} else if other.start == other.accept {
return self;
}

let dfa_accepting = nfa_to_dfa[&nfa_accepting];
let start = self.start;
let accept = other.accept;

let mut transitions: Map<State, Transitions<R>> = self.transitions;

Self { transitions: dfa_transitions, start: dfa_start, accepting: dfa_accepting }
for (source, transition) in other.transitions {
let fix_state = |state| if state == other.start { self.accept } else { state };
let entry = transitions.entry(fix_state(source)).or_default();
for (edge, destination) in transition.byte_transitions {
entry.byte_transitions.insert(edge, fix_state(destination));
}
for (edge, destination) in transition.ref_transitions {
entry.ref_transitions.insert(edge, fix_state(destination));
}
}

Self { transitions, start, accept }
}

/// Compute the union of two `Dfa`s.
pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
// We implement `union` by lazily initializing a set of states
// corresponding to the product of states in `self` and `other`, and
// then add transitions between these states that correspond to where
// they exist between `self` and `other`.

let a = self;
let b = other;

let accept = new_state();

let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();

let mut mapped = |(a_state, b_state)| {
if Some(a.accept) == a_state || Some(b.accept) == b_state {
// If either `a_state` or `b_state` are accepting, map to a
// common `accept` state.
accept
} else {
*mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
}
};

let start = mapped((Some(a.start), Some(b.start)));
let mut transitions: Map<State, Transitions<R>> = Map::default();
let mut queue = vec![(Some(a.start), Some(b.start))];
let empty_transitions = Transitions::default();

while let Some((a_src, b_src)) = queue.pop() {
let a_transitions =
a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
let b_transitions =
b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);

let byte_transitions =
a_transitions.byte_transitions.keys().chain(b_transitions.byte_transitions.keys());

for byte_transition in byte_transitions {
let a_dst = a_transitions.byte_transitions.get(byte_transition).copied();
let b_dst = b_transitions.byte_transitions.get(byte_transition).copied();

assert!(a_dst.is_some() || b_dst.is_some());

let src = mapped((a_src, b_src));
let dst = mapped((a_dst, b_dst));

transitions.entry(src).or_default().byte_transitions.insert(*byte_transition, dst);

if !transitions.contains_key(&dst) {
queue.push((a_dst, b_dst))
}
}

let ref_transitions =
a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());

for ref_transition in ref_transitions {
let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();

assert!(a_dst.is_some() || b_dst.is_some());

let src = mapped((a_src, b_src));
let dst = mapped((a_dst, b_dst));

transitions.entry(src).or_default().ref_transitions.insert(*ref_transition, dst);

if !transitions.contains_key(&dst) {
queue.push((a_dst, b_dst))
}
}
}

Self { transitions, start, accept }
}

pub(crate) fn bytes_from(&self, start: State) -> Option<&Map<Byte, State>> {
Expand All @@ -159,24 +232,48 @@ where
pub(crate) fn refs_from(&self, start: State) -> Option<&Map<R, State>> {
Some(&self.transitions.get(&start)?.ref_transitions)
}
}

impl State {
pub(crate) fn new() -> Self {
static COUNTER: AtomicU32 = AtomicU32::new(0);
Self(COUNTER.fetch_add(1, Ordering::SeqCst))
#[cfg(test)]
pub(crate) fn from_edges<B: Copy + Into<Byte>>(
start: u32,
accept: u32,
edges: &[(u32, B, u32)],
) -> Self {
let start = State(start);
let accept = State(accept);
let mut transitions: Map<State, Transitions<R>> = Map::default();

for &(src, edge, dst) in edges {
let src = State(src);
let dst = State(dst);
let old = transitions.entry(src).or_default().byte_transitions.insert(edge.into(), dst);
assert!(old.is_none());
}

Self { start, accept, transitions }
}
}

#[cfg(test)]
impl<R> From<nfa::Transition<R>> for Transition<R>
/// Serialize the DFA using the Graphviz DOT format.
impl<R> fmt::Debug for Dfa<R>
where
R: Ref,
{
fn from(nfa_transition: nfa::Transition<R>) -> Self {
match nfa_transition {
nfa::Transition::Byte(byte) => Transition::Byte(byte),
nfa::Transition::Ref(r) => Transition::Ref(r),
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "digraph {{")?;
writeln!(f, " {:?} [shape = doublecircle]", self.start)?;
writeln!(f, " {:?} [shape = doublecircle]", self.accept)?;

for (src, transitions) in self.transitions.iter() {
for (t, dst) in transitions.byte_transitions.iter() {
writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
}

for (t, dst) in transitions.ref_transitions.iter() {
writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
}
}

writeln!(f, "}}")
}
}
10 changes: 7 additions & 3 deletions compiler/rustc_transmute/src/layout/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ use std::hash::Hash;
pub(crate) mod tree;
pub(crate) use tree::Tree;

pub(crate) mod nfa;
pub(crate) use nfa::Nfa;

pub(crate) mod dfa;
pub(crate) use dfa::Dfa;

Expand All @@ -29,6 +26,13 @@ impl fmt::Debug for Byte {
}
}

#[cfg(test)]
impl From<u8> for Byte {
fn from(src: u8) -> Self {
Self::Init(src)
}
}

pub(crate) trait Def: Debug + Hash + Eq + PartialEq + Copy + Clone {
fn has_safety_invariants(&self) -> bool;
}
Expand Down
Loading
Loading