Auto merge of #117508 - nnethercote:symbols-FxIndexSet, r=cuviper

Use `FxIndexSet` in the symbol interner.

It makes the code a little nicer.

r? `@ghost`
This commit is contained in:
bors 2023-11-03 04:07:42 +00:00
commit 2429818b20
2 changed files with 12 additions and 21 deletions

View File

@ -3,7 +3,7 @@
//! type, and vice versa. //! type, and vice versa.
use rustc_arena::DroplessArena; use rustc_arena::DroplessArena;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxIndexSet;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::sync::Lock; use rustc_data_structures::sync::Lock;
use rustc_macros::HashStable_Generic; use rustc_macros::HashStable_Generic;
@ -2076,43 +2076,33 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
} }
} }
#[derive(Default)]
pub(crate) struct Interner(Lock<InternerInner>); pub(crate) struct Interner(Lock<InternerInner>);
// The `&'static str`s in this type actually point into the arena. // The `&'static str`s in this type actually point into the arena.
// //
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
// found that to regress performance up to 2% in some cases. This might be
// revisited after further improvements to `indexmap`.
//
// This type is private to prevent accidentally constructing more than one // This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s // `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s. // between `Interner`s.
#[derive(Default)]
struct InternerInner { struct InternerInner {
arena: DroplessArena, arena: DroplessArena,
names: FxHashMap<&'static str, Symbol>, strings: FxIndexSet<&'static str>,
strings: Vec<&'static str>,
} }
impl Interner { impl Interner {
fn prefill(init: &[&'static str]) -> Self { fn prefill(init: &[&'static str]) -> Self {
Interner(Lock::new(InternerInner { Interner(Lock::new(InternerInner {
strings: init.into(), arena: Default::default(),
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), strings: init.iter().copied().collect(),
..Default::default()
})) }))
} }
#[inline] #[inline]
fn intern(&self, string: &str) -> Symbol { fn intern(&self, string: &str) -> Symbol {
let mut inner = self.0.lock(); let mut inner = self.0.lock();
if let Some(&name) = inner.names.get(string) { if let Some(idx) = inner.strings.get_index_of(string) {
return name; return Symbol::new(idx as u32);
} }
let name = Symbol::new(inner.strings.len() as u32);
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
// and immediately convert the clone back to `&[u8]`, all because there // and immediately convert the clone back to `&[u8]`, all because there
// is no `inner.arena.alloc_str()` method. This is clearly safe. // is no `inner.arena.alloc_str()` method. This is clearly safe.
@ -2122,20 +2112,21 @@ impl Interner {
// SAFETY: we can extend the arena allocation to `'static` because we // SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive. // only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) }; let string: &'static str = unsafe { &*(string as *const str) };
inner.strings.push(string);
// This second hash table lookup can be avoided by using `RawEntryMut`, // This second hash table lookup can be avoided by using `RawEntryMut`,
// but this code path isn't hot enough for it to be worth it. See // but this code path isn't hot enough for it to be worth it. See
// #91445 for details. // #91445 for details.
inner.names.insert(string, name); let (idx, is_new) = inner.strings.insert_full(string);
name debug_assert!(is_new); // due to the get_index_of check above
Symbol::new(idx as u32)
} }
/// Get the symbol as a string. /// Get the symbol as a string.
/// ///
/// [`Symbol::as_str()`] should be used in preference to this function. /// [`Symbol::as_str()`] should be used in preference to this function.
fn get(&self, symbol: Symbol) -> &str { fn get(&self, symbol: Symbol) -> &str {
self.0.lock().strings[symbol.0.as_usize()] self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
} }
} }

View File

@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;
#[test] #[test]
fn interner_tests() { fn interner_tests() {
let i = Interner::default(); let i = Interner::prefill(&[]);
// first one is zero: // first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0)); assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry: // re-use gets the same entry: