Skip to content

Commit bdcaa6e

Browse files
committed
introduce a generic SCC computation
1 parent ebfb45e commit bdcaa6e

File tree

5 files changed

+514
-3
lines changed

5 files changed

+514
-3
lines changed

src/librustc_data_structures/graph/implementation/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use graph::*;
11+
use graph::implementation::*;
1212
use std::fmt::Debug;
1313

1414
type TestGraph = Graph<&'static str, &'static str>;

src/librustc_data_structures/graph/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub mod dominators;
1414
pub mod implementation;
1515
pub mod iterate;
1616
mod reference;
17+
pub mod scc;
1718

1819
#[cfg(test)]
1920
mod test;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! Routine to compute the strongly connected components (SCCs) of a
12+
//! graph, as well as the resulting DAG if each SCC is replaced with a
13+
//! node in the graph. This uses Tarjan's algorithm that completes in
14+
//! O(n) time.
15+
16+
use graph::{DirectedGraph, WithNumNodes, WithSuccessors};
17+
use indexed_vec::{Idx, IndexVec};
18+
use std::ops::Range;
19+
20+
mod test;
21+
22+
/// Strongly connected components (SCC) of a graph. The type `N` is
23+
/// the index type for the graph nodes and `S` is the index type for
24+
/// the SCCs. We can map from each node to the SCC that it
25+
/// participates in, and we also have the successors of each SCC.
26+
pub struct Sccs<N: Idx, S: Idx> {
27+
/// For each node, what is the SCC index of the SCC to which it
28+
/// belongs.
29+
scc_indices: IndexVec<N, S>,
30+
31+
/// Data about each SCC.
32+
scc_data: SccData<S>,
33+
}
34+
35+
struct SccData<S: Idx> {
36+
/// For each SCC, the range of `all_successors` where its
37+
/// successors can be found.
38+
ranges: IndexVec<S, Range<usize>>,
39+
40+
/// Contains the succcessors for all the Sccs, concatenated. The
41+
/// range of indices corresponding to a given SCC is found in its
42+
/// SccData.
43+
all_successors: Vec<S>,
44+
}
45+
46+
impl<N: Idx, S: Idx> Sccs<N, S> {
47+
pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
48+
SccsConstruction::construct(graph)
49+
}
50+
51+
/// Returns the number of SCCs in the graph.
52+
pub fn num_sccs(&self) -> usize {
53+
self.scc_data.len()
54+
}
55+
56+
/// Returns the SCC to which a node `r` belongs.
57+
pub fn scc(&self, r: N) -> S {
58+
self.scc_indices[r]
59+
}
60+
61+
/// Returns the successor of the given SCC.
62+
pub fn successors(&self, scc: S) -> &[S] {
63+
self.scc_data.successors(scc)
64+
}
65+
}
66+
67+
impl<S: Idx> SccData<S> {
68+
/// Number of SCCs,
69+
fn len(&self) -> usize {
70+
self.ranges.len()
71+
}
72+
73+
/// Returns the successor of the given SCC.
74+
fn successors(&self, scc: S) -> &[S] {
75+
// Annoyingly, `range` does not implement `Copy`, so we have
76+
// to do `range.start..range.end`:
77+
let range = &self.ranges[scc];
78+
&self.all_successors[range.start..range.end]
79+
}
80+
81+
/// Creates a new SCC with `successors` as its successors and
82+
/// returns the resulting index.
83+
fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
84+
// Store the successors on `scc_successors_vec`, remembering
85+
// the range of indices.
86+
let all_successors_start = self.all_successors.len();
87+
self.all_successors.extend(successors);
88+
let all_successors_end = self.all_successors.len();
89+
90+
debug!(
91+
"create_scc({:?}) successors={:?}",
92+
self.ranges.len(),
93+
&self.all_successors[all_successors_start..all_successors_end],
94+
);
95+
96+
self.ranges.push(all_successors_start..all_successors_end)
97+
}
98+
}
99+
100+
struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors + 'c, S: Idx> {
101+
graph: &'c G,
102+
103+
/// The state of each node; used during walk to record the stack
104+
/// and after walk to record what cycle each node ended up being
105+
/// in.
106+
node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
107+
108+
/// The stack of nodes that we are visiting as part of the DFS.
109+
node_stack: Vec<G::Node>,
110+
111+
/// The stack of successors: as we visit a node, we mark our
112+
/// position in this stack, and when we encounter a successor SCC,
113+
/// we push it on the stack. When we complete an SCC, we can pop
114+
/// everything off the stack that was found along the way.
115+
successors_stack: Vec<S>,
116+
scc_data: SccData<S>,
117+
}
118+
119+
#[derive(Copy, Clone, Debug)]
120+
enum NodeState<N, S> {
121+
/// This node has not yet been visited as part of the DFS.
122+
///
123+
/// After SCC construction is complete, this state ought to be
124+
/// impossible.
125+
NotVisited,
126+
127+
/// This node is currently being walk as part of our DFS. It is on
128+
/// the stack at the depth `depth`.
129+
///
130+
/// After SCC construction is complete, this state ought to be
131+
/// impossible.
132+
BeingVisited { depth: usize },
133+
134+
/// Indicates that this node is a member of the given cycle.
135+
InCycle { scc_index: S },
136+
137+
/// Indicates that this node is a member of whatever cycle
138+
/// `parent` is a member of. This state is transient: whenever we
139+
/// see it, we try to overwrite it with the current state of
140+
/// `parent` (this is the "path compression" step of a union-find
141+
/// algorithm).
142+
InCycleWith { parent: N },
143+
}
144+
145+
#[derive(Copy, Clone, Debug)]
146+
enum WalkReturn<S> {
147+
Cycle { min_depth: usize },
148+
Complete { scc_index: S },
149+
}
150+
151+
impl<'c, G, S> SccsConstruction<'c, G, S>
152+
where
153+
G: DirectedGraph + WithNumNodes + WithSuccessors,
154+
S: Idx,
155+
{
156+
/// Identifies SCCs in the graph `G` and computes the resulting
157+
/// DAG. This uses a variant of [Tarjan's
158+
/// algorithm][wikipedia]. The high-level summary of the algorithm
159+
/// is that we do a depth-first search. Along the way, we keep a
160+
/// stack of each node whose successors are being visited. We
161+
/// track the depth of each node on this stack (there is no depth
162+
/// if the node is not on the stack). When we find that some node
163+
/// N with depth D can reach some other node N' with lower depth
164+
/// D' (i.e., D' < D), we know that N, N', and all nodes in
165+
/// between them on the stack are part of an SCC.
166+
///
167+
/// For each node, we track the lowest depth of any successor we
168+
/// have found, along with that
169+
///
170+
/// [wikipedia]: https://bit.ly/2EZIx84
171+
fn construct(graph: &'c G) -> Sccs<G::Node, S> {
172+
let num_nodes = graph.num_nodes();
173+
174+
let mut this = Self {
175+
graph,
176+
node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
177+
node_stack: Vec::with_capacity(num_nodes),
178+
successors_stack: Vec::new(),
179+
scc_data: SccData {
180+
ranges: IndexVec::new(),
181+
all_successors: Vec::new(),
182+
},
183+
};
184+
185+
let scc_indices = (0..num_nodes)
186+
.map(G::Node::new)
187+
.map(|node| match this.walk_node(0, node) {
188+
WalkReturn::Complete { scc_index } => scc_index,
189+
WalkReturn::Cycle { min_depth } => panic!(
190+
"`walk_node(0, {:?})` returned cycle with depth {:?}",
191+
node, min_depth
192+
),
193+
})
194+
.collect();
195+
196+
Sccs {
197+
scc_indices,
198+
scc_data: this.scc_data,
199+
}
200+
}
201+
202+
fn walk_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
203+
debug!("walk_node(depth = {:?}, node = {:?})", depth, node);
204+
match self.find_state(node) {
205+
NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
206+
207+
NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
208+
209+
NodeState::NotVisited => self.walk_unvisited_node(depth, node),
210+
211+
NodeState::InCycleWith { parent } => panic!(
212+
"`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
213+
parent
214+
),
215+
}
216+
}
217+
218+
/// Fetches the state of the node `r`. If `r` is recorded as being
219+
/// in a cycle with some other node `r2`, then fetches the state
220+
/// of `r2` (and updates `r` to reflect current result). This is
221+
/// basically the "find" part of a standard union-find algorithm
222+
/// (with path compression).
223+
fn find_state(&mut self, r: G::Node) -> NodeState<G::Node, S> {
224+
debug!("find_state(r = {:?} in state {:?})", r, self.node_states[r]);
225+
match self.node_states[r] {
226+
NodeState::InCycle { scc_index } => NodeState::InCycle { scc_index },
227+
NodeState::BeingVisited { depth } => NodeState::BeingVisited { depth },
228+
NodeState::NotVisited => NodeState::NotVisited,
229+
NodeState::InCycleWith { parent } => {
230+
let parent_state = self.find_state(parent);
231+
debug!("find_state: parent_state = {:?}", parent_state);
232+
match parent_state {
233+
NodeState::InCycle { .. } => {
234+
self.node_states[r] = parent_state;
235+
parent_state
236+
}
237+
238+
NodeState::BeingVisited { depth } => {
239+
self.node_states[r] = NodeState::InCycleWith {
240+
parent: self.node_stack[depth],
241+
};
242+
parent_state
243+
}
244+
245+
NodeState::NotVisited | NodeState::InCycleWith { .. } => {
246+
panic!("invalid parent state: {:?}", parent_state)
247+
}
248+
}
249+
}
250+
}
251+
}
252+
253+
/// Walks a node that has never been visited before.
254+
fn walk_unvisited_node(&mut self, depth: usize, node: G::Node) -> WalkReturn<S> {
255+
debug!(
256+
"walk_unvisited_node(depth = {:?}, node = {:?})",
257+
depth, node
258+
);
259+
260+
debug_assert!(match self.node_states[node] {
261+
NodeState::NotVisited => true,
262+
_ => false,
263+
});
264+
265+
self.node_states[node] = NodeState::BeingVisited { depth };
266+
self.node_stack.push(node);
267+
268+
// Walk each successor of the node, looking to see if any of
269+
// them can reach a node that is presently on the stack. If
270+
// so, that means they can also reach us.
271+
let mut min_depth = depth;
272+
let mut min_cycle_root = node;
273+
let successors_len = self.successors_stack.len();
274+
for successor_node in self.graph.successors(node) {
275+
debug!(
276+
"walk_unvisited_node: node = {:?} successor_ode = {:?}",
277+
node, successor_node
278+
);
279+
match self.walk_node(depth + 1, successor_node) {
280+
WalkReturn::Cycle {
281+
min_depth: successor_min_depth,
282+
} => {
283+
assert!(successor_min_depth <= depth);
284+
if successor_min_depth < min_depth {
285+
debug!(
286+
"walk_unvisited_node: node = {:?} successor_min_depth = {:?}",
287+
node, successor_min_depth
288+
);
289+
min_depth = successor_min_depth;
290+
min_cycle_root = successor_node;
291+
}
292+
}
293+
294+
WalkReturn::Complete {
295+
scc_index: successor_scc_index,
296+
} => {
297+
debug!(
298+
"walk_unvisited_node: node = {:?} successor_scc_index = {:?}",
299+
node, successor_scc_index
300+
);
301+
self.successors_stack.push(successor_scc_index);
302+
}
303+
}
304+
}
305+
306+
let r = self.node_stack.pop();
307+
debug_assert_eq!(r, Some(node));
308+
309+
if min_depth == depth {
310+
let scc_index = self.scc_data
311+
.create_scc(self.successors_stack.drain(successors_len..));
312+
self.node_states[node] = NodeState::InCycle { scc_index };
313+
WalkReturn::Complete { scc_index }
314+
} else {
315+
// We are not the head of the cycle. Return back to our
316+
// caller. They will take ownership of the
317+
// `self.successors` data that we pushed.
318+
self.node_states[node] = NodeState::InCycleWith {
319+
parent: min_cycle_root,
320+
};
321+
WalkReturn::Cycle { min_depth }
322+
}
323+
}
324+
}

0 commit comments

Comments
 (0)