Skip to content

Commit 469dc8f

Browse files
committed
Add comments with the same level of detail as the PR description
1 parent 94fe18f commit 469dc8f

File tree

3 files changed

+54
-12
lines changed

3 files changed

+54
-12
lines changed

compiler/rustc_query_system/src/dep_graph/edges.rs

+13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::dep_graph::DepNodeIndex;
22
use smallvec::SmallVec;
33
use std::hash::{Hash, Hasher};
4+
use std::iter::Extend;
45
use std::ops::Deref;
56

67
#[derive(Default, Debug)]
@@ -58,3 +59,15 @@ impl FromIterator<DepNodeIndex> for EdgesVec {
5859
vec
5960
}
6061
}
62+
63+
impl Extend<DepNodeIndex> for EdgesVec {
64+
#[inline]
65+
fn extend<T>(&mut self, iter: T)
66+
where
67+
T: IntoIterator<Item = DepNodeIndex>,
68+
{
69+
for elem in iter {
70+
self.push(elem);
71+
}
72+
}
73+
}

compiler/rustc_query_system/src/dep_graph/graph.rs

+1-5
Original file line numberDiff line numberDiff line change
@@ -574,11 +574,7 @@ impl<K: DepKind> DepGraph<K> {
574574

575575
let mut edges = EdgesVec::new();
576576
K::read_deps(|task_deps| match task_deps {
577-
TaskDepsRef::Allow(deps) => {
578-
for index in deps.lock().reads.iter().copied() {
579-
edges.push(index);
580-
}
581-
}
577+
TaskDepsRef::Allow(deps) => edges.extend(deps.lock().reads.iter().copied()),
582578
TaskDepsRef::EvalAlways => {
583579
edges.push(DepNodeIndex::FOREVER_RED_NODE);
584580
}

compiler/rustc_query_system/src/dep_graph/serialized.rs

+40-7
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,29 @@
1111
//! sequence of NodeInfos to the different arrays in SerializedDepGraph. Since the
1212
//! node and edge count are stored at the end of the file, all the arrays can be
1313
//! pre-allocated with the right length.
14+
//!
15+
//! The encoding of the de-pgraph is generally designed around the fact that fixed-size
16+
//! reads of encoded data are generally faster than variable-sized reads. Ergo we adopt
17+
//! essentially the same varint encoding scheme used in the rmeta format; the edge lists
18+
//! for each node on the graph store a 2-bit integer which is the number of bytes per edge
19+
//! index in that node's edge list. We effectively ignore that an edge index of 0 could be
20+
//! encoded with 0 bytes in order to not require 3 bits to store the byte width of the edges.
21+
//! The overhead of calculating the correct byte width for each edge is mitigated by
22+
//! building edge lists with [`EdgesVec`] which keeps a running max of the edges in a node.
23+
//!
24+
//! When we decode this data, we do not immediately create [`SerializedDepNodeIndex`] and
25+
//! instead keep the data in its denser serialized form which lets us turn our on-disk size
26+
//! efficiency directly into a peak memory reduction. When we convert these encoded-in-memory
27+
//! values into their fully-deserialized type, we use a fixed-size read of the encoded array
28+
//! then mask off any errant bytes we read. The array of edge index bytes is padded to permit this.
29+
//!
30+
//! We also encode and decode the entire rest of each node using [`SerializedNodeHeader`]
31+
//! to let this encoding and decoding be done in one fixed-size operation. These headers contain
32+
//! two [`Fingerprint`]s along with the serialized [`DepKind`], and the number of edge indices
33+
//! in the node and the number of bytes used to encode the edge indices for this node. The
34+
//! [`DepKind`], number of edges, and bytes per edge are all bit-packed together, if they fit.
35+
//! If the number of edges in this node does not fit in the bits available in the header, we
36+
//! store it directly after the header with leb128.
1437
1538
use super::query::DepGraphQuery;
1639
use super::{DepKind, DepNode, DepNodeIndex};
@@ -37,7 +60,7 @@ const DEP_NODE_SIZE: usize = std::mem::size_of::<SerializedDepNodeIndex>();
3760
/// Amount of padding we need to add to the edge list data so that we can retrieve every
3861
/// SerializedDepNodeIndex with a fixed-size read then mask.
3962
const DEP_NODE_PAD: usize = DEP_NODE_SIZE - 1;
40-
/// Amount of bits we need to store the number of used bytes in a SerializedDepNodeIndex.
63+
/// Number of bits we need to store the number of used bytes in a SerializedDepNodeIndex.
4164
/// Note that wherever we encode byte widths like this we actually store the number of bytes used
4265
/// minus 1; for a 4-byte value we technically would have 5 widths to store, but using one byte to
4366
/// store zeroes (which are relatively rare) is a decent tradeoff to save a bit in our bitfields.
@@ -181,8 +204,15 @@ impl<'a, K: DepKind + Decodable<MemDecoder<'a>>> Decodable<MemDecoder<'a>>
181204
let mut nodes = IndexVec::with_capacity(node_count);
182205
let mut fingerprints = IndexVec::with_capacity(node_count);
183206
let mut edge_list_indices = IndexVec::with_capacity(node_count);
184-
// This slightly over-estimates the amount of bytes used for all the edge data but never by
185-
// more than ~6%, because over-estimation only occurs for large nodes.
207+
// This estimation assumes that all of the encoded bytes are for the edge lists or for the
208+
// fixed-size node headers. But that's not necessarily true; if any edge list has a length
209+
// that spills out of the size we can bit-pack into SerializedNodeHeader then some of the
210+
// total serialized size is also used by leb128-encoded edge list lengths. Neglecting that
211+
// contribution to graph_bytes means our estimation of the bytes needed for edge_list_data
212+
// slightly overshoots. But it cannot overshoot by much; consider that the worse case is
213+
// for a node with length 64, which means the spilled 1-byte leb128 length is 1 byte of at
214+
// least (34 byte header + 1 byte len + 64 bytes edge data), which is ~1%. A 2-byte leb128
215+
// length is about the same fractional overhead and it amortizes for yet greater lengths.
186216
let mut edge_list_data = Vec::with_capacity(
187217
graph_bytes - node_count * std::mem::size_of::<SerializedNodeHeader<K>>(),
188218
);
@@ -254,10 +284,13 @@ struct Unpacked<K> {
254284
fingerprint: Fingerprint,
255285
}
256286

257-
// Bit fields are
258-
// 0..? length of the edge
259-
// ?..?+2 bytes per index
260-
// ?+2..16 kind
287+
// Bit fields, where
288+
// M: bits used to store the length of a node's edge list
289+
// N: bits used to store the byte width of elements of the edge list
290+
// are
291+
// 0..M length of the edge
292+
// M..M+N bytes per index
293+
// M+N..16 kind
261294
impl<K: DepKind> SerializedNodeHeader<K> {
262295
const TOTAL_BITS: usize = std::mem::size_of::<K>() * 8;
263296
const LEN_BITS: usize = Self::TOTAL_BITS - Self::KIND_BITS - Self::WIDTH_BITS;

0 commit comments

Comments
 (0)