Skip to content

Commit b4cf2cd

Browse files
committed
Simplify FixedSizeEncoding using const generics.
1 parent b9287a8 commit b4cf2cd

File tree

2 files changed

+86
-107
lines changed

2 files changed

+86
-107
lines changed

compiler/rustc_metadata/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#![feature(proc_macro_internals)]
88
#![feature(macro_metavar_expr)]
99
#![feature(min_specialization)]
10+
#![feature(slice_as_chunks)]
1011
#![feature(try_blocks)]
1112
#![feature(never_type)]
1213
#![recursion_limit = "256"]

compiler/rustc_metadata/src/rmeta/table.rs

+85-107
Original file line numberDiff line numberDiff line change
@@ -16,76 +16,34 @@ use tracing::debug;
1616
/// Unchecked invariant: `Self::default()` should encode as `[0; BYTE_LEN]`,
1717
/// but this has no impact on safety.
1818
pub(super) trait FixedSizeEncoding: Default {
19-
const BYTE_LEN: usize;
20-
21-
// FIXME(eddyb) convert to and from `[u8; Self::BYTE_LEN]` instead,
22-
// once that starts being allowed by the compiler (i.e. lazy normalization).
23-
fn from_bytes(b: &[u8]) -> Self;
24-
fn write_to_bytes(self, b: &mut [u8]);
25-
26-
// FIXME(eddyb) make these generic functions, or at least defaults here.
27-
// (same problem as above, needs `[u8; Self::BYTE_LEN]`)
28-
// For now, a macro (`fixed_size_encoding_byte_len_and_defaults`) is used.
29-
30-
/// Read a `Self` value (encoded as `Self::BYTE_LEN` bytes),
31-
/// from `&b[i * Self::BYTE_LEN..]`, returning `None` if `i`
32-
/// is not in bounds, or `Some(Self::from_bytes(...))` otherwise.
33-
fn maybe_read_from_bytes_at(b: &[u8], i: usize) -> Option<Self>;
34-
/// Write a `Self` value (encoded as `Self::BYTE_LEN` bytes),
35-
/// at `&mut b[i * Self::BYTE_LEN..]`, using `Self::write_to_bytes`.
36-
fn write_to_bytes_at(self, b: &mut [u8], i: usize);
37-
}
19+
/// This should be `[u8; BYTE_LEN]`;
20+
type ByteArray;
3821

39-
// HACK(eddyb) this shouldn't be needed (see comments on the methods above).
40-
macro_rules! fixed_size_encoding_byte_len_and_defaults {
41-
($byte_len:expr) => {
42-
const BYTE_LEN: usize = $byte_len;
43-
fn maybe_read_from_bytes_at(b: &[u8], i: usize) -> Option<Self> {
44-
const BYTE_LEN: usize = $byte_len;
45-
// HACK(eddyb) ideally this would be done with fully safe code,
46-
// but slicing `[u8]` with `i * N..` is optimized worse, due to the
47-
// possibility of `i * N` overflowing, than indexing `[[u8; N]]`.
48-
let b = unsafe {
49-
std::slice::from_raw_parts(b.as_ptr() as *const [u8; BYTE_LEN], b.len() / BYTE_LEN)
50-
};
51-
b.get(i).map(|b| FixedSizeEncoding::from_bytes(b))
52-
}
53-
fn write_to_bytes_at(self, b: &mut [u8], i: usize) {
54-
const BYTE_LEN: usize = $byte_len;
55-
// HACK(eddyb) ideally this would be done with fully safe code,
56-
// see similar comment in `read_from_bytes_at` for why it can't yet.
57-
let b = unsafe {
58-
std::slice::from_raw_parts_mut(
59-
b.as_mut_ptr() as *mut [u8; BYTE_LEN],
60-
b.len() / BYTE_LEN,
61-
)
62-
};
63-
self.write_to_bytes(&mut b[i]);
64-
}
65-
};
22+
fn from_bytes(b: &Self::ByteArray) -> Self;
23+
fn write_to_bytes(self, b: &mut Self::ByteArray);
6624
}
6725

6826
impl FixedSizeEncoding for u32 {
69-
fixed_size_encoding_byte_len_and_defaults!(4);
27+
type ByteArray = [u8; 4];
7028

71-
fn from_bytes(b: &[u8]) -> Self {
72-
let mut bytes = [0; Self::BYTE_LEN];
73-
bytes.copy_from_slice(&b[..Self::BYTE_LEN]);
74-
Self::from_le_bytes(bytes)
29+
#[inline]
30+
fn from_bytes(b: &[u8; 4]) -> Self {
31+
Self::from_le_bytes(*b)
7532
}
7633

77-
fn write_to_bytes(self, b: &mut [u8]) {
78-
b[..Self::BYTE_LEN].copy_from_slice(&self.to_le_bytes());
34+
#[inline]
35+
fn write_to_bytes(self, b: &mut [u8; 4]) {
36+
*b = self.to_le_bytes();
7937
}
8038
}
8139

8240
macro_rules! fixed_size_enum {
8341
($ty:ty { $(($($pat:tt)*))* }) => {
8442
impl FixedSizeEncoding for Option<$ty> {
85-
fixed_size_encoding_byte_len_and_defaults!(1);
43+
type ByteArray = [u8;1];
8644

8745
#[inline]
88-
fn from_bytes(b: &[u8]) -> Self {
46+
fn from_bytes(b: &[u8;1]) -> Self {
8947
use $ty::*;
9048
if b[0] == 0 {
9149
return None;
@@ -97,7 +55,7 @@ macro_rules! fixed_size_enum {
9755
}
9856

9957
#[inline]
100-
fn write_to_bytes(self, b: &mut [u8]) {
58+
fn write_to_bytes(self, b: &mut [u8;1]) {
10159
use $ty::*;
10260
b[0] = match self {
10361
None => 0,
@@ -184,45 +142,45 @@ fixed_size_enum! {
184142

185143
// We directly encode `DefPathHash` because a `Lazy` would encur a 25% cost.
186144
impl FixedSizeEncoding for Option<DefPathHash> {
187-
fixed_size_encoding_byte_len_and_defaults!(16);
145+
type ByteArray = [u8; 16];
188146

189147
#[inline]
190-
fn from_bytes(b: &[u8]) -> Self {
191-
Some(DefPathHash(Fingerprint::from_le_bytes(b.try_into().unwrap())))
148+
fn from_bytes(b: &[u8; 16]) -> Self {
149+
Some(DefPathHash(Fingerprint::from_le_bytes(*b)))
192150
}
193151

194152
#[inline]
195-
fn write_to_bytes(self, b: &mut [u8]) {
153+
fn write_to_bytes(self, b: &mut [u8; 16]) {
196154
let Some(DefPathHash(fingerprint)) = self else {
197155
panic!("Trying to encode absent DefPathHash.")
198156
};
199-
b[..Self::BYTE_LEN].copy_from_slice(&fingerprint.to_le_bytes());
157+
*b = fingerprint.to_le_bytes();
200158
}
201159
}
202160

203161
// We directly encode RawDefId because using a `Lazy` would incur a 50% overhead in the worst case.
204162
impl FixedSizeEncoding for Option<RawDefId> {
205-
fixed_size_encoding_byte_len_and_defaults!(2 * u32::BYTE_LEN);
163+
type ByteArray = [u8; 8];
206164

207165
#[inline]
208-
fn from_bytes(b: &[u8]) -> Self {
209-
let krate = u32::from_bytes(&b[0..4]);
210-
let index = u32::from_bytes(&b[4..8]);
166+
fn from_bytes(b: &[u8; 8]) -> Self {
167+
let krate = u32::from_le_bytes(b[0..4].try_into().unwrap());
168+
let index = u32::from_le_bytes(b[4..8].try_into().unwrap());
211169
if krate == 0 {
212170
return None;
213171
}
214172
Some(RawDefId { krate: krate - 1, index })
215173
}
216174

217175
#[inline]
218-
fn write_to_bytes(self, b: &mut [u8]) {
176+
fn write_to_bytes(self, b: &mut [u8; 8]) {
219177
match self {
220-
None => 0u32.write_to_bytes(b),
178+
None => *b = [0; 8],
221179
Some(RawDefId { krate, index }) => {
222180
// CrateNum is less than `CrateNum::MAX_AS_U32`.
223181
debug_assert!(krate < u32::MAX);
224-
(1 + krate).write_to_bytes(&mut b[0..4]);
225-
index.write_to_bytes(&mut b[4..8]);
182+
b[0..4].copy_from_slice(&(1 + krate).to_le_bytes());
183+
b[4..8].copy_from_slice(&index.to_le_bytes());
226184
}
227185
}
228186
}
@@ -232,44 +190,51 @@ impl FixedSizeEncoding for Option<RawDefId> {
232190
// generic `Lazy<T>` impl, but in the general case we might not need / want to
233191
// fit every `usize` in `u32`.
234192
impl<T> FixedSizeEncoding for Option<Lazy<T>> {
235-
fixed_size_encoding_byte_len_and_defaults!(u32::BYTE_LEN);
193+
type ByteArray = [u8; 4];
236194

237-
fn from_bytes(b: &[u8]) -> Self {
238-
Some(Lazy::from_position(NonZeroUsize::new(u32::from_bytes(b) as usize)?))
195+
#[inline]
196+
fn from_bytes(b: &[u8; 4]) -> Self {
197+
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
198+
Some(Lazy::from_position(position))
239199
}
240200

241-
fn write_to_bytes(self, b: &mut [u8]) {
201+
#[inline]
202+
fn write_to_bytes(self, b: &mut [u8; 4]) {
242203
let position = self.map_or(0, |lazy| lazy.position.get());
243204
let position: u32 = position.try_into().unwrap();
244-
245205
position.write_to_bytes(b)
246206
}
247207
}
248208

249209
impl<T> FixedSizeEncoding for Option<Lazy<[T]>> {
250-
fixed_size_encoding_byte_len_and_defaults!(u32::BYTE_LEN * 2);
210+
type ByteArray = [u8; 8];
251211

252-
fn from_bytes(b: &[u8]) -> Self {
253-
Some(Lazy::from_position_and_meta(
254-
<Option<Lazy<T>>>::from_bytes(b)?.position,
255-
u32::from_bytes(&b[u32::BYTE_LEN..]) as usize,
256-
))
212+
#[inline]
213+
fn from_bytes(b: &[u8; 8]) -> Self {
214+
let ([ref position_bytes, ref meta_bytes],[])= b.as_chunks::<4>() else { panic!() };
215+
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
216+
let len = u32::from_bytes(meta_bytes) as usize;
217+
Some(Lazy::from_position_and_meta(position, len))
257218
}
258219

259-
fn write_to_bytes(self, b: &mut [u8]) {
260-
self.map(|lazy| Lazy::<T>::from_position(lazy.position)).write_to_bytes(b);
220+
#[inline]
221+
fn write_to_bytes(self, b: &mut [u8; 8]) {
222+
let ([ref mut position_bytes, ref mut meta_bytes],[])= b.as_chunks_mut::<4>() else { panic!() };
223+
224+
let position = self.map_or(0, |lazy| lazy.position.get());
225+
let position: u32 = position.try_into().unwrap();
226+
position.write_to_bytes(position_bytes);
261227

262228
let len = self.map_or(0, |lazy| lazy.meta);
263229
let len: u32 = len.try_into().unwrap();
264-
265-
len.write_to_bytes(&mut b[u32::BYTE_LEN..]);
230+
len.write_to_bytes(meta_bytes);
266231
}
267232
}
268233

269234
/// Random-access table (i.e. offering constant-time `get`/`set`), similar to
270235
/// `Vec<Option<T>>`, but without requiring encoding or decoding all the values
271236
/// eagerly and in-order.
272-
/// A total of `(max_idx + 1) * <Option<T> as FixedSizeEncoding>::BYTE_LEN` bytes
237+
/// A total of `(max_idx + 1)` times `Option<T> as FixedSizeEncoding>::ByteArray`
273238
/// are used for a table, where `max_idx` is the largest index passed to
274239
/// `TableBuilder::set`.
275240
pub(super) struct Table<I: Idx, T>
@@ -287,53 +252,54 @@ pub(super) struct TableBuilder<I: Idx, T>
287252
where
288253
Option<T>: FixedSizeEncoding,
289254
{
290-
// FIXME(eddyb) use `IndexVec<I, [u8; <Option<T>>::BYTE_LEN]>` instead of
291-
// `Vec<u8>`, once that starts working (i.e. lazy normalization).
292-
// Then again, that has the downside of not allowing `TableBuilder::encode` to
293-
// obtain a `&[u8]` entirely in safe code, for writing the bytes out.
294-
bytes: Vec<u8>,
295-
_marker: PhantomData<(fn(&I), T)>,
255+
blocks: IndexVec<I, <Option<T> as FixedSizeEncoding>::ByteArray>,
256+
_marker: PhantomData<T>,
296257
}
297258

298259
impl<I: Idx, T> Default for TableBuilder<I, T>
299260
where
300261
Option<T>: FixedSizeEncoding,
301262
{
302263
fn default() -> Self {
303-
TableBuilder { bytes: vec![], _marker: PhantomData }
264+
TableBuilder { blocks: Default::default(), _marker: PhantomData }
304265
}
305266
}
306267

307268
impl<I: Idx, T> TableBuilder<I, T>
308269
where
309270
Option<T>: FixedSizeEncoding,
310271
{
311-
pub(crate) fn set(&mut self, i: I, value: T) {
272+
pub(crate) fn set<const N: usize>(&mut self, i: I, value: T)
273+
where
274+
Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>,
275+
{
312276
// FIXME(eddyb) investigate more compact encodings for sparse tables.
313277
// On the PR @michaelwoerister mentioned:
314278
// > Space requirements could perhaps be optimized by using the HAMT `popcnt`
315279
// > trick (i.e. divide things into buckets of 32 or 64 items and then
316280
// > store bit-masks of which item in each bucket is actually serialized).
317-
let i = i.index();
318-
let needed = (i + 1) * <Option<T>>::BYTE_LEN;
319-
if self.bytes.len() < needed {
320-
self.bytes.resize(needed, 0);
321-
}
322-
323-
Some(value).write_to_bytes_at(&mut self.bytes, i);
281+
self.blocks.ensure_contains_elem(i, || [0; N]);
282+
Some(value).write_to_bytes(&mut self.blocks[i]);
324283
}
325284

326-
pub(crate) fn encode(&self, buf: &mut Encoder) -> Lazy<Table<I, T>> {
285+
pub(crate) fn encode<const N: usize>(&self, buf: &mut Encoder) -> Lazy<Table<I, T>>
286+
where
287+
Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>,
288+
{
327289
let pos = buf.position();
328-
buf.emit_raw_bytes(&self.bytes).unwrap();
329-
Lazy::from_position_and_meta(NonZeroUsize::new(pos as usize).unwrap(), self.bytes.len())
290+
for block in &self.blocks {
291+
buf.emit_raw_bytes(block).unwrap();
292+
}
293+
let num_bytes = self.blocks.len() * N;
294+
Lazy::from_position_and_meta(NonZeroUsize::new(pos as usize).unwrap(), num_bytes)
330295
}
331296
}
332297

333298
impl<I: Idx, T> LazyMeta for Table<I, T>
334299
where
335300
Option<T>: FixedSizeEncoding,
336301
{
302+
/// Number of bytes in the data stream.
337303
type Meta = usize;
338304
}
339305

@@ -343,16 +309,28 @@ where
343309
{
344310
/// Given the metadata, extract out the value at a particular index (if any).
345311
#[inline(never)]
346-
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> Option<T> {
312+
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>, const N: usize>(
313+
&self,
314+
metadata: M,
315+
i: I,
316+
) -> Option<T>
317+
where
318+
Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>,
319+
{
347320
debug!("Table::lookup: index={:?} len={:?}", i, self.meta);
348321

349322
let start = self.position.get();
350323
let bytes = &metadata.blob()[start..start + self.meta];
351-
<Option<T>>::maybe_read_from_bytes_at(bytes, i.index())?
324+
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
325+
let bytes = bytes.get(i.index())?;
326+
FixedSizeEncoding::from_bytes(bytes)
352327
}
353328

354329
/// Size of the table in entries, including possible gaps.
355-
pub(super) fn size(&self) -> usize {
356-
self.meta / <Option<T>>::BYTE_LEN
330+
pub(super) fn size<const N: usize>(&self) -> usize
331+
where
332+
Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>,
333+
{
334+
self.meta / N
357335
}
358336
}

0 commit comments

Comments
 (0)