Skip to content

Commit e4b01c7

Browse files
committed
Auto merge of rust-lang#69778 - Marwes:dep_graph, r=davidtwco
perf(dep_graph): Avoid allocating a set on when the number reads are … …small `reserve_and_rehash` takes up 1.4% of the runtime on the `packed-simd` benchmark which I believe is due to the number of reads are very low in many cases (see rust-lang#50565 for instance). This avoids allocating the set until we start allocating the `reads` `SmallVec` but it is possible that a lower limit might be better (not tested since the improvement will be hard to spot either way).
2 parents d1e81ef + 4168c25 commit e4b01c7

File tree

1 file changed

+24
-11
lines changed

1 file changed

+24
-11
lines changed

src/librustc/dep_graph/graph.rs

+24-11
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,7 @@ impl DepGraph {
328328
{
329329
if let Some(ref data) = self.data {
330330
let (result, task_deps) = ty::tls::with_context(|icx| {
331-
let task_deps = Lock::new(TaskDeps {
332-
#[cfg(debug_assertions)]
333-
node: None,
334-
reads: SmallVec::new(),
335-
read_set: Default::default(),
336-
});
331+
let task_deps = Lock::new(TaskDeps::default());
337332

338333
let r = {
339334
let icx = ty::tls::ImplicitCtxt { task_deps: Some(&task_deps), ..icx.clone() };
@@ -953,7 +948,7 @@ pub enum WorkProductFileKind {
953948
#[derive(Clone)]
954949
struct DepNodeData {
955950
node: DepNode,
956-
edges: SmallVec<[DepNodeIndex; 8]>,
951+
edges: EdgesVec,
957952
fingerprint: Fingerprint,
958953
}
959954

@@ -1078,7 +1073,7 @@ impl CurrentDepGraph {
10781073
fn alloc_node(
10791074
&self,
10801075
dep_node: DepNode,
1081-
edges: SmallVec<[DepNodeIndex; 8]>,
1076+
edges: EdgesVec,
10821077
fingerprint: Fingerprint,
10831078
) -> DepNodeIndex {
10841079
debug_assert!(
@@ -1090,7 +1085,7 @@ impl CurrentDepGraph {
10901085
fn intern_node(
10911086
&self,
10921087
dep_node: DepNode,
1093-
edges: SmallVec<[DepNodeIndex; 8]>,
1088+
edges: EdgesVec,
10941089
fingerprint: Fingerprint,
10951090
) -> DepNodeIndex {
10961091
match self.node_to_node_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
@@ -1113,11 +1108,25 @@ impl DepGraphData {
11131108
let icx = if let Some(icx) = icx { icx } else { return };
11141109
if let Some(task_deps) = icx.task_deps {
11151110
let mut task_deps = task_deps.lock();
1111+
let task_deps = &mut *task_deps;
11161112
if cfg!(debug_assertions) {
11171113
self.current.total_read_count.fetch_add(1, Relaxed);
11181114
}
1119-
if task_deps.read_set.insert(source) {
1115+
1116+
// As long as we only have a low number of reads we can avoid doing a hash
1117+
// insert and potentially allocating/reallocating the hashmap
1118+
let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
1119+
task_deps.reads.iter().all(|other| *other != source)
1120+
} else {
1121+
task_deps.read_set.insert(source)
1122+
};
1123+
if new_read {
11201124
task_deps.reads.push(source);
1125+
if task_deps.reads.len() == TASK_DEPS_READS_CAP {
1126+
// Fill `read_set` with what we have so far so we can use the hashset next
1127+
// time
1128+
task_deps.read_set.extend(task_deps.reads.iter().copied());
1129+
}
11211130

11221131
#[cfg(debug_assertions)]
11231132
{
@@ -1139,10 +1148,14 @@ impl DepGraphData {
11391148
}
11401149
}
11411150

1151+
/// The capacity of the `reads` field `SmallVec`
1152+
const TASK_DEPS_READS_CAP: usize = 8;
1153+
type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;
1154+
#[derive(Default)]
11421155
pub struct TaskDeps {
11431156
#[cfg(debug_assertions)]
11441157
node: Option<DepNode>,
1145-
reads: SmallVec<[DepNodeIndex; 8]>,
1158+
reads: EdgesVec,
11461159
read_set: FxHashSet<DepNodeIndex>,
11471160
}
11481161

0 commit comments

Comments
 (0)