1
1
use std:: fmt;
2
2
use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
3
3
4
- use tracing:: instrument;
5
-
6
- use super :: { Byte , Nfa , Ref , nfa} ;
4
+ use super :: { Byte , Ref , Tree , Uninhabited } ;
7
5
use crate :: Map ;
8
6
9
- #[ derive( PartialEq , Clone , Debug ) ]
7
+ #[ derive( PartialEq , Clone ) ]
10
8
pub ( crate ) struct Dfa < R >
11
9
where
12
10
R : Ref ,
@@ -34,35 +32,15 @@ where
34
32
}
35
33
}
36
34
37
- impl < R > Transitions < R >
38
- where
39
- R : Ref ,
40
- {
41
- #[ cfg( test) ]
42
- fn insert ( & mut self , transition : Transition < R > , state : State ) {
43
- match transition {
44
- Transition :: Byte ( b) => {
45
- self . byte_transitions . insert ( b, state) ;
46
- }
47
- Transition :: Ref ( r) => {
48
- self . ref_transitions . insert ( r, state) ;
49
- }
50
- }
51
- }
52
- }
53
-
54
35
/// The states in a `Nfa` represent byte offsets.
55
36
#[ derive( Hash , Eq , PartialEq , PartialOrd , Ord , Copy , Clone ) ]
56
- pub ( crate ) struct State ( u32 ) ;
37
+ pub ( crate ) struct State ( pub ( crate ) u32 ) ;
57
38
58
- #[ cfg( test) ]
59
- #[ derive( Hash , Eq , PartialEq , Clone , Copy ) ]
60
- pub ( crate ) enum Transition < R >
61
- where
62
- R : Ref ,
63
- {
64
- Byte ( Byte ) ,
65
- Ref ( R ) ,
39
+ impl State {
40
+ pub ( crate ) fn new ( ) -> Self {
41
+ static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
42
+ Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
43
+ }
66
44
}
67
45
68
46
impl fmt:: Debug for State {
@@ -71,19 +49,6 @@ impl fmt::Debug for State {
71
49
}
72
50
}
73
51
74
- #[ cfg( test) ]
75
- impl < R > fmt:: Debug for Transition < R >
76
- where
77
- R : Ref ,
78
- {
79
- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
80
- match & self {
81
- Self :: Byte ( b) => b. fmt ( f) ,
82
- Self :: Ref ( r) => r. fmt ( f) ,
83
- }
84
- }
85
- }
86
-
87
52
impl < R > Dfa < R >
88
53
where
89
54
R : Ref ,
@@ -94,58 +59,161 @@ where
94
59
let start = State :: new ( ) ;
95
60
let accepting = State :: new ( ) ;
96
61
97
- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , accepting) ;
62
+ transitions. entry ( start) . or_default ( ) . byte_transitions . insert ( Byte :: Init ( 0x00 ) , accepting) ;
98
63
99
- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , accepting) ;
64
+ transitions. entry ( start) . or_default ( ) . byte_transitions . insert ( Byte :: Init ( 0x01 ) , accepting) ;
100
65
101
66
Self { transitions, start, accepting }
102
67
}
103
68
104
- #[ instrument( level = "debug" ) ]
105
- pub ( crate ) fn from_nfa ( nfa : Nfa < R > ) -> Self {
106
- let Nfa { transitions : nfa_transitions, start : nfa_start, accepting : nfa_accepting } = nfa;
69
+ pub ( crate ) fn unit ( ) -> Self {
70
+ let transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
71
+ let start = State :: new ( ) ;
72
+ let accepting = start;
73
+
74
+ Self { transitions, start, accepting }
75
+ }
107
76
108
- let mut dfa_transitions : Map < State , Transitions < R > > = Map :: default ( ) ;
109
- let mut nfa_to_dfa : Map < nfa :: State , State > = Map :: default ( ) ;
110
- let dfa_start = State :: new ( ) ;
111
- nfa_to_dfa . insert ( nfa_start , dfa_start ) ;
77
+ pub ( crate ) fn from_byte ( byte : Byte ) -> Self {
78
+ let mut transitions : Map < State , Transitions < R > > = Map :: default ( ) ;
79
+ let start = State :: new ( ) ;
80
+ let accepting = State :: new ( ) ;
112
81
113
- let mut queue = vec ! [ ( nfa_start , dfa_start ) ] ;
82
+ transitions . entry ( start ) . or_default ( ) . byte_transitions . insert ( byte , accepting ) ;
114
83
115
- while let Some ( ( nfa_state, dfa_state) ) = queue. pop ( ) {
116
- if nfa_state == nfa_accepting {
117
- continue ;
118
- }
84
+ Self { transitions, start, accepting }
85
+ }
86
+
87
+ pub ( crate ) fn from_ref ( r : R ) -> Self {
88
+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
89
+ let start = State :: new ( ) ;
90
+ let accepting = State :: new ( ) ;
91
+
92
+ transitions. entry ( start) . or_default ( ) . ref_transitions . insert ( r, accepting) ;
119
93
120
- for ( nfa_transition, next_nfa_states) in nfa_transitions[ & nfa_state] . iter ( ) {
121
- let dfa_transitions =
122
- dfa_transitions. entry ( dfa_state) . or_insert_with ( Default :: default) ;
123
-
124
- let mapped_state = next_nfa_states. iter ( ) . find_map ( |x| nfa_to_dfa. get ( x) . copied ( ) ) ;
125
-
126
- let next_dfa_state = match nfa_transition {
127
- & nfa:: Transition :: Byte ( b) => * dfa_transitions
128
- . byte_transitions
129
- . entry ( b)
130
- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
131
- & nfa:: Transition :: Ref ( r) => * dfa_transitions
132
- . ref_transitions
133
- . entry ( r)
134
- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
135
- } ;
136
-
137
- for & next_nfa_state in next_nfa_states {
138
- nfa_to_dfa. entry ( next_nfa_state) . or_insert_with ( || {
139
- queue. push ( ( next_nfa_state, next_dfa_state) ) ;
140
- next_dfa_state
141
- } ) ;
94
+ Self { transitions, start, accepting }
95
+ }
96
+
97
+ pub ( crate ) fn from_tree ( tree : Tree < !, R > ) -> Result < Self , Uninhabited > {
98
+ Ok ( match tree {
99
+ Tree :: Byte ( b) => Self :: from_byte ( b) ,
100
+ Tree :: Ref ( r) => Self :: from_ref ( r) ,
101
+ Tree :: Alt ( alts) => {
102
+ // Convert and filter the inhabited alternatives.
103
+ let mut alts = alts. into_iter ( ) . map ( Self :: from_tree) . filter_map ( Result :: ok) ;
104
+ // If there is not at least one alternative, return `Uninhabited`.
105
+ let dfa = alts. next ( ) . ok_or ( Uninhabited ) ?;
106
+ // Combine the remaining alternatives with `dfa`.
107
+ alts. fold ( dfa, |dfa, alt| dfa. union ( alt, State :: new) )
108
+ }
109
+ Tree :: Seq ( elts) => {
110
+ let mut dfa = Self :: unit ( ) ;
111
+ for elt in elts. into_iter ( ) . map ( Self :: from_tree) {
112
+ dfa = dfa. concat ( elt?) ;
142
113
}
114
+ dfa
115
+ }
116
+ } )
117
+ }
118
+
119
+ /// Concatenate two `Dfa`s.
120
+ pub ( crate ) fn concat ( self , other : Self ) -> Self {
121
+ if self . start == self . accepting {
122
+ return other;
123
+ } else if other. start == other. accepting {
124
+ return self ;
125
+ }
126
+
127
+ let start = self . start ;
128
+ let accepting = other. accepting ;
129
+
130
+ let mut transitions: Map < State , Transitions < R > > = self . transitions ;
131
+
132
+ for ( source, transition) in other. transitions {
133
+ let fix_state = |state| if state == other. start { self . accepting } else { state } ;
134
+ let entry = transitions. entry ( fix_state ( source) ) . or_default ( ) ;
135
+ for ( edge, destination) in transition. byte_transitions {
136
+ entry. byte_transitions . insert ( edge, fix_state ( destination) ) ;
137
+ }
138
+ for ( edge, destination) in transition. ref_transitions {
139
+ entry. ref_transitions . insert ( edge, fix_state ( destination) ) ;
143
140
}
144
141
}
145
142
146
- let dfa_accepting = nfa_to_dfa[ & nfa_accepting] ;
143
+ Self { transitions, start, accepting }
144
+ }
145
+
146
+ /// Compute the union of two `Nfa`s.
147
+ pub ( crate ) fn union ( self , other : Self , mut new_state : impl FnMut ( ) -> State ) -> Self {
148
+ // We implement `union` by lazily initializing a set of states
149
+ // corresponding to the product of states in `self` and `other`, and
150
+ // then add transitions between these states that correspond to where
151
+ // they exist between `self` and `other`.
152
+
153
+ let a = self ;
154
+ let b = other;
155
+
156
+ let accepting = new_state ( ) ;
157
+
158
+ let mut mapping: Map < ( Option < State > , Option < State > ) , State > = Map :: default ( ) ;
159
+
160
+ let mut mapped = |( a_state, b_state) | {
161
+ if Some ( a. accepting ) == a_state || Some ( b. accepting ) == b_state {
162
+ // If either `a_state` or `b_state` are accepting, map to a
163
+ // common `accepting` state.
164
+ accepting
165
+ } else {
166
+ * mapping. entry ( ( a_state, b_state) ) . or_insert_with ( & mut new_state)
167
+ }
168
+ } ;
169
+
170
+ let start = mapped ( ( Some ( a. start ) , Some ( b. start ) ) ) ;
171
+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
172
+ let mut queue = vec ! [ ( Some ( a. start) , Some ( b. start) ) ] ;
173
+ let empty_transitions = Transitions :: default ( ) ;
174
+
175
+ while let Some ( ( a_src, b_src) ) = queue. pop ( ) {
176
+ let a_transitions =
177
+ a_src. and_then ( |a_src| a. transitions . get ( & a_src) ) . unwrap_or ( & empty_transitions) ;
178
+ let b_transitions =
179
+ b_src. and_then ( |b_src| b. transitions . get ( & b_src) ) . unwrap_or ( & empty_transitions) ;
180
+
181
+ let byte_transitions =
182
+ a_transitions. byte_transitions . keys ( ) . chain ( b_transitions. byte_transitions . keys ( ) ) ;
183
+
184
+ for byte_transition in byte_transitions {
185
+ let a_dst = a_transitions. byte_transitions . get ( byte_transition) . copied ( ) ;
186
+ let b_dst = b_transitions. byte_transitions . get ( byte_transition) . copied ( ) ;
187
+
188
+ assert ! ( a_dst. is_some( ) || b_dst. is_some( ) ) ;
189
+
190
+ let src = mapped ( ( a_src, b_src) ) ;
191
+ let dst = mapped ( ( a_dst, b_dst) ) ;
192
+
193
+ transitions. entry ( src) . or_default ( ) . byte_transitions . insert ( * byte_transition, dst) ;
194
+
195
+ queue. push ( ( a_dst, b_dst) )
196
+ }
197
+
198
+ let ref_transitions =
199
+ a_transitions. ref_transitions . keys ( ) . chain ( b_transitions. ref_transitions . keys ( ) ) ;
147
200
148
- Self { transitions : dfa_transitions, start : dfa_start, accepting : dfa_accepting }
201
+ for ref_transition in ref_transitions {
202
+ let a_dst = a_transitions. ref_transitions . get ( ref_transition) . copied ( ) ;
203
+ let b_dst = b_transitions. ref_transitions . get ( ref_transition) . copied ( ) ;
204
+
205
+ assert ! ( a_dst. is_some( ) || b_dst. is_some( ) ) ;
206
+
207
+ let src = mapped ( ( a_src, b_src) ) ;
208
+ let dst = mapped ( ( a_dst, b_dst) ) ;
209
+
210
+ transitions. entry ( src) . or_default ( ) . ref_transitions . insert ( * ref_transition, dst) ;
211
+
212
+ queue. push ( ( a_dst, b_dst) )
213
+ }
214
+ }
215
+
216
+ Self { transitions, start, accepting }
149
217
}
150
218
151
219
pub ( crate ) fn bytes_from ( & self , start : State ) -> Option < & Map < Byte , State > > {
@@ -159,24 +227,48 @@ where
159
227
pub ( crate ) fn refs_from ( & self , start : State ) -> Option < & Map < R , State > > {
160
228
Some ( & self . transitions . get ( & start) ?. ref_transitions )
161
229
}
162
- }
163
230
164
- impl State {
165
- pub ( crate ) fn new ( ) -> Self {
166
- static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
167
- Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
231
+ #[ cfg( test) ]
232
+ pub ( crate ) fn from_edges < B : Copy + Into < Byte > > (
233
+ start : u32 ,
234
+ accept : u32 ,
235
+ edges : & [ ( u32 , B , u32 ) ] ,
236
+ ) -> Self {
237
+ let start = State ( start) ;
238
+ let accepting = State ( accept) ;
239
+ let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
240
+
241
+ for & ( src, edge, dst) in edges {
242
+ let src = State ( src) ;
243
+ let dst = State ( dst) ;
244
+ let old = transitions. entry ( src) . or_default ( ) . byte_transitions . insert ( edge. into ( ) , dst) ;
245
+ assert ! ( old. is_none( ) ) ;
246
+ }
247
+
248
+ Self { start, accepting, transitions }
168
249
}
169
250
}
170
251
171
- # [ cfg ( test ) ]
172
- impl < R > From < nfa :: Transition < R > > for Transition < R >
252
+ /// Serialize the DFA using the Graphviz DOT format.
253
+ impl < R > fmt :: Debug for Dfa < R >
173
254
where
174
255
R : Ref ,
175
256
{
176
- fn from ( nfa_transition : nfa:: Transition < R > ) -> Self {
177
- match nfa_transition {
178
- nfa:: Transition :: Byte ( byte) => Transition :: Byte ( byte) ,
179
- nfa:: Transition :: Ref ( r) => Transition :: Ref ( r) ,
257
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
258
+ writeln ! ( f, "digraph {{" ) ?;
259
+ writeln ! ( f, " {:?} [shape = doublecircle]" , self . start) ?;
260
+ writeln ! ( f, " {:?} [shape = doublecircle]" , self . accepting) ?;
261
+
262
+ for ( src, transitions) in self . transitions . iter ( ) {
263
+ for ( t, dst) in transitions. byte_transitions . iter ( ) {
264
+ writeln ! ( f, " {src:?} -> {dst:?} [label=\" {t:?}\" ]" ) ?;
265
+ }
266
+
267
+ for ( t, dst) in transitions. ref_transitions . iter ( ) {
268
+ writeln ! ( f, " {src:?} -> {dst:?} [label=\" {t:?}\" ]" ) ?;
269
+ }
180
270
}
271
+
272
+ writeln ! ( f, "}}" )
181
273
}
182
274
}
0 commit comments