Skip to content

Commit eb5cb95

Browse files
committed
Auto merge of #39424 - nikomatsakis:incr-comp-skip-typeck-3, r=mw
rewrite the predecessors code to create a reduced graph The old code created a flat listing of "HIR -> WorkProduct" edges. While perfectly general, this could lead to a lot of repetition if the same HIR nodes affect many work-products. This is set to be a problem when we start to skip typeck, since we will be adding a lot more "work-product"-like nodes. The newer code uses an alternative strategy: it "reduces" the graph instead. Basically we walk the dep-graph and convert it to a DAG, where we only keep intermediate nodes if they are used by multiple work-products. This DAG does not contain the same set of nodes as the original graph, but it is guaranteed that (a) every output node is included in the graph and (b) the set of input nodes that can reach each output node is unchanged. (Input nodes are basically HIR nodes and foreign metadata; output nodes are nodes that have assocaited state which we will persist to disk in some way. These are assumed to be disjoint sets.) r? @michaelwoerister Fixes #39494
2 parents 8967085 + b3096e2 commit eb5cb95

File tree

15 files changed

+1209
-479
lines changed

15 files changed

+1209
-479
lines changed

src/librustc_incremental/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
#![feature(staged_api)]
2424
#![feature(rand)]
2525
#![feature(core_intrinsics)]
26+
#![feature(conservative_impl_trait)]
27+
#![feature(field_init_shorthand)]
28+
#![feature(pub_restricted)]
2629

2730
extern crate graphviz;
2831
#[macro_use] extern crate rustc;

src/librustc_incremental/persist/data.rs

+8-9
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use super::directory::DefPathIndex;
2121
/// Data for use when recompiling the **current crate**.
2222
#[derive(Debug, RustcEncodable, RustcDecodable)]
2323
pub struct SerializedDepGraph {
24-
pub edges: Vec<SerializedEdge>,
24+
pub edges: Vec<SerializedEdgeSet>,
2525

2626
/// These are hashes of two things:
2727
/// - the HIR nodes in this crate
@@ -45,14 +45,13 @@ pub struct SerializedDepGraph {
4545
pub hashes: Vec<SerializedHash>,
4646
}
4747

48-
/// Represents a "reduced" dependency edge. Unlike the full dep-graph,
49-
/// the dep-graph we serialize contains only edges `S -> T` where the
50-
/// source `S` is something hashable (a HIR node or foreign metadata)
51-
/// and the target `T` is something significant, like a work-product.
52-
/// Normally, significant nodes are only those that have saved data on
53-
/// disk, but in unit-testing the set of significant nodes can be
54-
/// increased.
55-
pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>);
48+
/// Represents a set of "reduced" dependency edge. We group the
49+
/// outgoing edges from a single source together.
50+
#[derive(Debug, RustcEncodable, RustcDecodable)]
51+
pub struct SerializedEdgeSet {
52+
pub source: DepNode<DefPathIndex>,
53+
pub targets: Vec<DepNode<DefPathIndex>>
54+
}
5655

5756
#[derive(Debug, RustcEncodable, RustcDecodable)]
5857
pub struct SerializedHash {

src/librustc_incremental/persist/dirty_clean.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ pub fn check_dirty_clean_annotations<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
6767

6868
let _ignore = tcx.dep_graph.in_ignore();
6969
let dirty_inputs: FxHashSet<DepNode<DefId>> =
70-
dirty_inputs.iter()
71-
.filter_map(|d| retraced.map(d))
72-
.collect();
70+
dirty_inputs.keys()
71+
.filter_map(|d| retraced.map(d))
72+
.collect();
7373
let query = tcx.dep_graph.query();
7474
debug!("query-nodes: {:?}", query.nodes());
7575
let krate = tcx.hir.krate();

src/librustc_incremental/persist/load.rs

+94-79
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
//! Code to save/load the dep-graph from files.
1212
13-
use rustc::dep_graph::DepNode;
13+
use rustc::dep_graph::{DepNode, WorkProductId};
1414
use rustc::hir::def_id::DefId;
1515
use rustc::hir::svh::Svh;
1616
use rustc::session::Session;
@@ -19,6 +19,7 @@ use rustc_data_structures::fx::{FxHashSet, FxHashMap};
1919
use rustc_serialize::Decodable as RustcDecodable;
2020
use rustc_serialize::opaque::Decoder;
2121
use std::path::{Path};
22+
use std::sync::Arc;
2223

2324
use IncrementalHashesMap;
2425
use ich::Fingerprint;
@@ -30,7 +31,9 @@ use super::fs::*;
3031
use super::file_format;
3132
use super::work_product;
3233

33-
pub type DirtyNodes = FxHashSet<DepNode<DefPathIndex>>;
34+
// The key is a dirty node. The value is **some** base-input that we
35+
// can blame it on.
36+
pub type DirtyNodes = FxHashMap<DepNode<DefPathIndex>, DepNode<DefPathIndex>>;
3437

3538
/// If we are in incremental mode, and a previous dep-graph exists,
3639
/// then load up those nodes/edges that are still valid into the
@@ -149,86 +152,75 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
149152
let directory = DefIdDirectory::decode(&mut dep_graph_decoder)?;
150153
let serialized_dep_graph = SerializedDepGraph::decode(&mut dep_graph_decoder)?;
151154

155+
let edge_map: FxHashMap<_, _> = serialized_dep_graph.edges
156+
.into_iter()
157+
.map(|s| (s.source, s.targets))
158+
.collect();
159+
152160
// Retrace the paths in the directory to find their current location (if any).
153161
let retraced = directory.retrace(tcx);
154162

155-
// Compute the set of Hir nodes whose data has changed or which
156-
// have been removed. These are "raw" source nodes, which means
157-
// that they still use the original `DefPathIndex` values from the
158-
// encoding, rather than having been retraced to a `DefId`. The
159-
// reason for this is that this way we can include nodes that have
160-
// been removed (which no longer have a `DefId` in the current
161-
// compilation).
162-
let dirty_raw_source_nodes = dirty_nodes(tcx,
163-
incremental_hashes_map,
164-
&serialized_dep_graph.hashes,
165-
&retraced);
166-
167-
// Create a list of (raw-source-node ->
168-
// retracted-target-node) edges. In the process of retracing the
169-
// target nodes, we may discover some of them def-paths no longer exist,
170-
// in which case there is no need to mark the corresopnding nodes as dirty
171-
// (they are just not present). So this list may be smaller than the original.
172-
//
173-
// Note though that in the common case the target nodes are
174-
// `DepNode::WorkProduct` instances, and those don't have a
175-
// def-id, so they will never be considered to not exist. Instead,
176-
// we do a secondary hashing step (later, in trans) when we know
177-
// the set of symbols that go into a work-product: if any symbols
178-
// have been removed (or added) the hash will be different and
179-
// we'll ignore the work-product then.
180-
let retraced_edges: Vec<_> =
181-
serialized_dep_graph.edges.iter()
182-
.filter_map(|&(ref raw_source_node, ref raw_target_node)| {
183-
retraced.map(raw_target_node)
184-
.map(|target_node| (raw_source_node, target_node))
185-
})
186-
.collect();
187-
188-
// Compute which work-products have an input that has changed or
189-
// been removed. Put the dirty ones into a set.
190-
let mut dirty_target_nodes = FxHashSet();
191-
for &(raw_source_node, ref target_node) in &retraced_edges {
192-
if dirty_raw_source_nodes.contains(raw_source_node) {
193-
if !dirty_target_nodes.contains(target_node) {
194-
dirty_target_nodes.insert(target_node.clone());
195-
196-
if tcx.sess.opts.debugging_opts.incremental_info {
197-
// It'd be nice to pretty-print these paths better than just
198-
// using the `Debug` impls, but wev.
199-
println!("incremental: module {:?} is dirty because {:?} \
200-
changed or was removed",
201-
target_node,
202-
raw_source_node.map_def(|&index| {
203-
Some(directory.def_path_string(tcx, index))
204-
}).unwrap());
163+
// Compute the set of nodes from the old graph where some input
164+
// has changed or been removed. These are "raw" source nodes,
165+
// which means that they still use the original `DefPathIndex`
166+
// values from the encoding, rather than having been retraced to a
167+
// `DefId`. The reason for this is that this way we can include
168+
// nodes that have been removed (which no longer have a `DefId` in
169+
// the current compilation).
170+
let dirty_raw_nodes = initial_dirty_nodes(tcx,
171+
incremental_hashes_map,
172+
&serialized_dep_graph.hashes,
173+
&retraced);
174+
let dirty_raw_nodes = transitive_dirty_nodes(&edge_map, dirty_raw_nodes);
175+
176+
// Recreate the edges in the graph that are still clean.
177+
let mut clean_work_products = FxHashSet();
178+
let mut dirty_work_products = FxHashSet(); // incomplete; just used to suppress debug output
179+
for (source, targets) in &edge_map {
180+
for target in targets {
181+
// If the target is dirty, skip the edge. If this is an edge
182+
// that targets a work-product, we can print the blame
183+
// information now.
184+
if let Some(blame) = dirty_raw_nodes.get(target) {
185+
if let DepNode::WorkProduct(ref wp) = *target {
186+
if tcx.sess.opts.debugging_opts.incremental_info {
187+
if dirty_work_products.insert(wp.clone()) {
188+
// It'd be nice to pretty-print these paths better than just
189+
// using the `Debug` impls, but wev.
190+
println!("incremental: module {:?} is dirty because {:?} \
191+
changed or was removed",
192+
wp,
193+
blame.map_def(|&index| {
194+
Some(directory.def_path_string(tcx, index))
195+
}).unwrap());
196+
}
197+
}
205198
}
199+
continue;
206200
}
207-
}
208-
}
209201

210-
// For work-products that are still clean, add their deps into the
211-
// graph. This is needed because later we will have to save this
212-
// back out again!
213-
let dep_graph = tcx.dep_graph.clone();
214-
for (raw_source_node, target_node) in retraced_edges {
215-
if dirty_target_nodes.contains(&target_node) {
216-
continue;
202+
// If the source is dirty, the target will be dirty.
203+
assert!(!dirty_raw_nodes.contains_key(source));
204+
205+
// Retrace the source -> target edges to def-ids and then
206+
// create an edge in the graph. Retracing may yield none if
207+
// some of the data happens to have been removed; this ought
208+
// to be impossible unless it is dirty, so we can unwrap.
209+
let source_node = retraced.map(source).unwrap();
210+
let target_node = retraced.map(target).unwrap();
211+
let _task = tcx.dep_graph.in_task(target_node);
212+
tcx.dep_graph.read(source_node);
213+
if let DepNode::WorkProduct(ref wp) = *target {
214+
clean_work_products.insert(wp.clone());
215+
}
217216
}
218-
219-
let source_node = retraced.map(raw_source_node).unwrap();
220-
221-
debug!("decode_dep_graph: clean edge: {:?} -> {:?}", source_node, target_node);
222-
223-
let _task = dep_graph.in_task(target_node);
224-
dep_graph.read(source_node);
225217
}
226218

227219
// Add in work-products that are still clean, and delete those that are
228220
// dirty.
229-
reconcile_work_products(tcx, work_products, &dirty_target_nodes);
221+
reconcile_work_products(tcx, work_products, &clean_work_products);
230222

231-
dirty_clean::check_dirty_clean_annotations(tcx, &dirty_raw_source_nodes, &retraced);
223+
dirty_clean::check_dirty_clean_annotations(tcx, &dirty_raw_nodes, &retraced);
232224

233225
load_prev_metadata_hashes(tcx,
234226
&retraced,
@@ -238,13 +230,13 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
238230

239231
/// Computes which of the original set of def-ids are dirty. Stored in
240232
/// a bit vector where the index is the DefPathIndex.
241-
fn dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
242-
incremental_hashes_map: &IncrementalHashesMap,
243-
serialized_hashes: &[SerializedHash],
244-
retraced: &RetracedDefIdDirectory)
245-
-> DirtyNodes {
233+
fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
234+
incremental_hashes_map: &IncrementalHashesMap,
235+
serialized_hashes: &[SerializedHash],
236+
retraced: &RetracedDefIdDirectory)
237+
-> DirtyNodes {
246238
let mut hcx = HashContext::new(tcx, incremental_hashes_map);
247-
let mut dirty_nodes = FxHashSet();
239+
let mut dirty_nodes = FxHashMap();
248240

249241
for hash in serialized_hashes {
250242
if let Some(dep_node) = retraced.map(&hash.dep_node) {
@@ -277,21 +269,44 @@ fn dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
277269
hash.dep_node);
278270
}
279271

280-
dirty_nodes.insert(hash.dep_node.clone());
272+
dirty_nodes.insert(hash.dep_node.clone(), hash.dep_node.clone());
281273
}
282274

283275
dirty_nodes
284276
}
285277

278+
fn transitive_dirty_nodes(edge_map: &FxHashMap<DepNode<DefPathIndex>, Vec<DepNode<DefPathIndex>>>,
279+
mut dirty_nodes: DirtyNodes)
280+
-> DirtyNodes
281+
{
282+
let mut stack: Vec<(DepNode<DefPathIndex>, DepNode<DefPathIndex>)> = vec![];
283+
stack.extend(dirty_nodes.iter().map(|(s, b)| (s.clone(), b.clone())));
284+
while let Some((source, blame)) = stack.pop() {
285+
// we know the source is dirty (because of the node `blame`)...
286+
assert!(dirty_nodes.contains_key(&source));
287+
288+
// ...so we dirty all the targets (with the same blame)
289+
if let Some(targets) = edge_map.get(&source) {
290+
for target in targets {
291+
if !dirty_nodes.contains_key(target) {
292+
dirty_nodes.insert(target.clone(), blame.clone());
293+
stack.push((target.clone(), blame.clone()));
294+
}
295+
}
296+
}
297+
}
298+
dirty_nodes
299+
}
300+
286301
/// Go through the list of work-products produced in the previous run.
287302
/// Delete any whose nodes have been found to be dirty or which are
288303
/// otherwise no longer applicable.
289304
fn reconcile_work_products<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
290305
work_products: Vec<SerializedWorkProduct>,
291-
dirty_target_nodes: &FxHashSet<DepNode<DefId>>) {
306+
clean_work_products: &FxHashSet<Arc<WorkProductId>>) {
292307
debug!("reconcile_work_products({:?})", work_products);
293308
for swp in work_products {
294-
if dirty_target_nodes.contains(&DepNode::WorkProduct(swp.id.clone())) {
309+
if !clean_work_products.contains(&swp.id) {
295310
debug!("reconcile_work_products: dep-node for {:?} is dirty", swp);
296311
delete_dirty_work_product(tcx, swp);
297312
} else {

0 commit comments

Comments
 (0)