From c737c62e70cf8627e01e0b3a1088e96bff1b154d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Tue, 15 Aug 2023 16:03:17 +0200 Subject: [PATCH 1/3] Make Sharded an enum and specialize it for the single thread case --- compiler/rustc_data_structures/src/sharded.rs | 66 +++++++++---------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs index 40cbf14958e77..6bc0b346d921e 100644 --- a/compiler/rustc_data_structures/src/sharded.rs +++ b/compiler/rustc_data_structures/src/sharded.rs @@ -1,31 +1,25 @@ use crate::fx::{FxHashMap, FxHasher}; #[cfg(parallel_compiler)] -use crate::sync::is_dyn_thread_safe; -use crate::sync::{CacheAligned, Lock, LockGuard}; +use crate::sync::{is_dyn_thread_safe, CacheAligned}; +use crate::sync::{Lock, LockGuard}; use std::borrow::Borrow; use std::collections::hash_map::RawEntryMut; use std::hash::{Hash, Hasher}; use std::mem; -#[cfg(parallel_compiler)] // 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700, // but this should be tested on higher core count CPUs. How the `Sharded` type gets used // may also affect the ideal number of shards. -const SHARD_BITS: usize = 5; - -#[cfg(not(parallel_compiler))] -const SHARD_BITS: usize = 0; +const SHARD_BITS: usize = if cfg!(parallel_compiler) { 5 } else { 0 }; pub const SHARDS: usize = 1 << SHARD_BITS; /// An array of cache-line aligned inner locked structures with convenience methods. -pub struct Sharded { - /// This mask is used to ensure that accesses are inbounds of `shards`. - /// When dynamic thread safety is off, this field is set to 0 causing only - /// a single shard to be used for greater cache efficiency. +/// A single field is used when the compiler uses only one thread. +pub enum Sharded { + Single(Lock), #[cfg(parallel_compiler)] - mask: usize, - shards: [CacheAligned>; SHARDS], + Shards(Box<[CacheAligned>; SHARDS]>), } impl Default for Sharded { @@ -38,29 +32,14 @@ impl Default for Sharded { impl Sharded { #[inline] pub fn new(mut value: impl FnMut() -> T) -> Self { - Sharded { - #[cfg(parallel_compiler)] - mask: if is_dyn_thread_safe() { SHARDS - 1 } else { 0 }, - shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))), - } - } - - #[inline(always)] - fn mask(&self) -> usize { #[cfg(parallel_compiler)] - { - if SHARDS == 1 { 0 } else { self.mask } - } - #[cfg(not(parallel_compiler))] - { - 0 + if is_dyn_thread_safe() { + return Sharded::Shards(Box::new( + [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))), + )); } - } - #[inline(always)] - fn count(&self) -> usize { - // `self.mask` is always one below the used shard count - self.mask() + 1 + Sharded::Single(Lock::new(value())) } /// The shard is selected by hashing `val` with `FxHasher`. @@ -75,9 +54,24 @@ impl Sharded { } #[inline] - pub fn get_shard_by_index(&self, i: usize) -> &Lock { - // SAFETY: The index get ANDed with the mask, ensuring it is always inbounds. - unsafe { &self.shards.get_unchecked(i & self.mask()).0 } + pub fn get_shard_by_index(&self, _i: usize) -> &Lock { + match self { + Self::Single(single) => &single, + #[cfg(parallel_compiler)] + Self::Shards(shards) => { + // SAFETY: The index gets ANDed with the shard mask, ensuring it is always inbounds. + unsafe { &shards.get_unchecked(_i & (SHARDS - 1)).0 } + } + } + } + + #[inline] + fn count(&self) -> usize { + match self { + Self::Single(..) => 1, + #[cfg(parallel_compiler)] + Self::Shards(..) => SHARDS, + } } pub fn lock_shards(&self) -> Vec> { From 81220c0acef24bd88e960a9b2c2eea3140365864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 16 Aug 2023 10:00:25 +0200 Subject: [PATCH 2/3] Keep SHARDS fixed instead of a function of `cfg!(parallel_compiler)` --- compiler/rustc_data_structures/src/sharded.rs | 23 +++++++++++++++---- .../rustc_query_system/src/dep_graph/graph.rs | 2 +- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs index 6bc0b346d921e..39cce5c8ee6fd 100644 --- a/compiler/rustc_data_structures/src/sharded.rs +++ b/compiler/rustc_data_structures/src/sharded.rs @@ -10,9 +10,10 @@ use std::mem; // 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700, // but this should be tested on higher core count CPUs. How the `Sharded` type gets used // may also affect the ideal number of shards. -const SHARD_BITS: usize = if cfg!(parallel_compiler) { 5 } else { 0 }; +const SHARD_BITS: usize = 5; -pub const SHARDS: usize = 1 << SHARD_BITS; +#[cfg(parallel_compiler)] +const SHARDS: usize = 1 << SHARD_BITS; /// An array of cache-line aligned inner locked structures with convenience methods. /// A single field is used when the compiler uses only one thread. @@ -44,8 +45,12 @@ impl Sharded { /// The shard is selected by hashing `val` with `FxHasher`. #[inline] - pub fn get_shard_by_value(&self, val: &K) -> &Lock { - self.get_shard_by_hash(if SHARDS == 1 { 0 } else { make_hash(val) }) + pub fn get_shard_by_value(&self, _val: &K) -> &Lock { + match self { + Self::Single(single) => &single, + #[cfg(parallel_compiler)] + Self::Shards(shards) => self.get_shard_by_hash(make_hash(_val)), + } } #[inline] @@ -83,6 +88,16 @@ impl Sharded { } } +#[inline] +pub fn shards() -> usize { + #[cfg(parallel_compiler)] + if is_dyn_thread_safe() { + return SHARDS; + } + + 1 +} + pub type ShardedHashMap = Sharded>; impl ShardedHashMap { diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 30422ea110264..0d4d13ac20d46 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -1166,7 +1166,7 @@ impl CurrentDepGraph { )), new_node_to_index: Sharded::new(|| { FxHashMap::with_capacity_and_hasher( - new_node_count_estimate / sharded::SHARDS, + new_node_count_estimate / sharded::shards(), Default::default(), ) }), From 0823f0c32b600dafcc707e76dc1898eadc3c2b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 16 Aug 2023 10:44:32 +0200 Subject: [PATCH 3/3] Remove `count` --- compiler/rustc_data_structures/src/sharded.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs index 39cce5c8ee6fd..52ab5a7fb144e 100644 --- a/compiler/rustc_data_structures/src/sharded.rs +++ b/compiler/rustc_data_structures/src/sharded.rs @@ -49,7 +49,7 @@ impl Sharded { match self { Self::Single(single) => &single, #[cfg(parallel_compiler)] - Self::Shards(shards) => self.get_shard_by_hash(make_hash(_val)), + Self::Shards(..) => self.get_shard_by_hash(make_hash(_val)), } } @@ -70,21 +70,20 @@ impl Sharded { } } - #[inline] - fn count(&self) -> usize { + pub fn lock_shards(&self) -> Vec> { match self { - Self::Single(..) => 1, + Self::Single(single) => vec![single.lock()], #[cfg(parallel_compiler)] - Self::Shards(..) => SHARDS, + Self::Shards(shards) => shards.iter().map(|shard| shard.0.lock()).collect(), } } - pub fn lock_shards(&self) -> Vec> { - (0..self.count()).map(|i| self.get_shard_by_index(i).lock()).collect() - } - pub fn try_lock_shards(&self) -> Option>> { - (0..self.count()).map(|i| self.get_shard_by_index(i).try_lock()).collect() + match self { + Self::Single(single) => Some(vec![single.try_lock()?]), + #[cfg(parallel_compiler)] + Self::Shards(shards) => shards.iter().map(|shard| shard.0.try_lock()).collect(), + } } }