Move RandomState and DefaultHasher into std::hash, but don't export for now

This commit is contained in:
ltdk 2023-09-08 23:57:15 -04:00
parent a2f5f9691b
commit 075409ddd9
12 changed files with 272 additions and 168 deletions

View File

@ -6,16 +6,13 @@ use self::Entry::*;
use hashbrown::hash_map as base;
use crate::borrow::Borrow;
use crate::cell::Cell;
use crate::collections::TryReserveError;
use crate::collections::TryReserveErrorKind;
use crate::error::Error;
use crate::fmt::{self, Debug};
#[allow(deprecated)]
use crate::hash::{BuildHasher, Hash, Hasher, SipHasher13};
use crate::hash::{BuildHasher, Hash, RandomState};
use crate::iter::FusedIterator;
use crate::ops::Index;
use crate::sys;
/// A [hash map] implemented with quadratic probing and SIMD lookup.
///
@ -3072,152 +3069,6 @@ where
}
}
/// `RandomState` is the default state for [`HashMap`] types.
///
/// A particular instance `RandomState` will create the same instances of
/// [`Hasher`], but the hashers created by two different `RandomState`
/// instances are unlikely to produce the same result for the same values.
///
/// # Examples
///
/// ```
/// use std::collections::HashMap;
/// use std::collections::hash_map::RandomState;
///
/// let s = RandomState::new();
/// let mut map = HashMap::with_hasher(s);
/// map.insert(1, 2);
/// ```
#[derive(Clone)]
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub struct RandomState {
k0: u64,
k1: u64,
}
impl RandomState {
/// Constructs a new `RandomState` that is initialized with random keys.
///
/// # Examples
///
/// ```
/// use std::collections::hash_map::RandomState;
///
/// let s = RandomState::new();
/// ```
#[inline]
#[allow(deprecated)]
// rand
#[must_use]
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub fn new() -> RandomState {
// Historically this function did not cache keys from the OS and instead
// simply always called `rand::thread_rng().gen()` twice. In #31356 it
// was discovered, however, that because we re-seed the thread-local RNG
// from the OS periodically that this can cause excessive slowdown when
// many hash maps are created on a thread. To solve this performance
// trap we cache the first set of randomly generated keys per-thread.
//
// Later in #36481 it was discovered that exposing a deterministic
// iteration order allows a form of DOS attack. To counter that we
// increment one of the seeds on every RandomState creation, giving
// every corresponding HashMap a different iteration order.
thread_local!(static KEYS: Cell<(u64, u64)> = {
Cell::new(sys::hashmap_random_keys())
});
KEYS.with(|keys| {
let (k0, k1) = keys.get();
keys.set((k0.wrapping_add(1), k1));
RandomState { k0, k1 }
})
}
}
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
impl BuildHasher for RandomState {
type Hasher = DefaultHasher;
#[inline]
#[allow(deprecated)]
fn build_hasher(&self) -> DefaultHasher {
DefaultHasher(SipHasher13::new_with_keys(self.k0, self.k1))
}
}
/// The default [`Hasher`] used by [`RandomState`].
///
/// The internal algorithm is not specified, and so it and its hashes should
/// not be relied upon over releases.
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
#[allow(deprecated)]
#[derive(Clone, Debug)]
pub struct DefaultHasher(SipHasher13);
impl DefaultHasher {
/// Creates a new `DefaultHasher`.
///
/// This hasher is not guaranteed to be the same as all other
/// `DefaultHasher` instances, but is the same as all other `DefaultHasher`
/// instances created through `new` or `default`.
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
#[inline]
#[allow(deprecated)]
#[rustc_const_unstable(feature = "const_hash", issue = "104061")]
#[must_use]
pub const fn new() -> DefaultHasher {
DefaultHasher(SipHasher13::new_with_keys(0, 0))
}
}
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Default for DefaultHasher {
/// Creates a new `DefaultHasher` using [`new`].
/// See its documentation for more.
///
/// [`new`]: DefaultHasher::new
#[inline]
fn default() -> DefaultHasher {
DefaultHasher::new()
}
}
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Hasher for DefaultHasher {
// The underlying `SipHasher13` doesn't override the other
// `write_*` methods, so it's ok not to forward them here.
#[inline]
fn write(&mut self, msg: &[u8]) {
self.0.write(msg)
}
#[inline]
fn write_str(&mut self, s: &str) {
self.0.write_str(s);
}
#[inline]
fn finish(&self) -> u64 {
self.0.finish()
}
}
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
impl Default for RandomState {
/// Constructs a new `RandomState`.
#[inline]
fn default() -> RandomState {
RandomState::new()
}
}
#[stable(feature = "std_debug", since = "1.16.0")]
impl fmt::Debug for RandomState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RandomState").finish_non_exhaustive()
}
}
#[inline]
fn map_entry<'a, K: 'a, V: 'a>(raw: base::RustcEntry<'a, K, V>) -> Entry<'a, K, V> {
match raw {

View File

@ -1,8 +1,8 @@
use super::Entry::{Occupied, Vacant};
use super::HashMap;
use super::RandomState;
use crate::assert_matches::assert_matches;
use crate::cell::RefCell;
use crate::hash::RandomState;
use crate::test_helpers::test_rng;
use rand::Rng;
use realstd::collections::TryReserveErrorKind::*;

View File

@ -6,11 +6,11 @@ use hashbrown::hash_set as base;
use crate::borrow::Borrow;
use crate::collections::TryReserveError;
use crate::fmt;
use crate::hash::{BuildHasher, Hash};
use crate::hash::{BuildHasher, Hash, RandomState};
use crate::iter::{Chain, FusedIterator};
use crate::ops::{BitAnd, BitOr, BitXor, Sub};
use super::map::{map_try_reserve_error, RandomState};
use super::map::map_try_reserve_error;
/// A [hash set] implemented as a `HashMap` where the value is `()`.
///

View File

@ -1,6 +1,6 @@
use super::super::map::RandomState;
use super::HashSet;
use crate::hash::RandomState;
use crate::panic::{catch_unwind, AssertUnwindSafe};
use crate::sync::atomic::{AtomicU32, Ordering};
use crate::sync::Arc;

View File

@ -439,6 +439,11 @@ pub mod hash_map {
//! A hash map implemented with quadratic probing and SIMD lookup.
#[stable(feature = "rust1", since = "1.0.0")]
pub use super::hash::map::*;
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub use crate::hash::random::DefaultHasher;
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub use crate::hash::random::RandomState;
}
#[stable(feature = "rust1", since = "1.0.0")]

View File

@ -0,0 +1,91 @@
//! Generic hashing support.
//!
//! This module provides a generic way to compute the [hash] of a value.
//! Hashes are most commonly used with [`HashMap`] and [`HashSet`].
//!
//! [hash]: https://en.wikipedia.org/wiki/Hash_function
//! [`HashMap`]: ../../std/collections/struct.HashMap.html
//! [`HashSet`]: ../../std/collections/struct.HashSet.html
//!
//! The simplest way to make a type hashable is to use `#[derive(Hash)]`:
//!
//! # Examples
//!
//! ```rust
//! use std::hash::{DefaultHasher, Hash, Hasher};
//!
//! #[derive(Hash)]
//! struct Person {
//! id: u32,
//! name: String,
//! phone: u64,
//! }
//!
//! let person1 = Person {
//! id: 5,
//! name: "Janet".to_string(),
//! phone: 555_666_7777,
//! };
//! let person2 = Person {
//! id: 5,
//! name: "Bob".to_string(),
//! phone: 555_666_7777,
//! };
//!
//! assert!(calculate_hash(&person1) != calculate_hash(&person2));
//!
//! fn calculate_hash<T: Hash>(t: &T) -> u64 {
//! let mut s = DefaultHasher::new();
//! t.hash(&mut s);
//! s.finish()
//! }
//! ```
//!
//! If you need more control over how a value is hashed, you need to implement
//! the [`Hash`] trait:
//!
//! ```rust
//! use std::hash::{DefaultHasher, Hash, Hasher};
//!
//! struct Person {
//! id: u32,
//! # #[allow(dead_code)]
//! name: String,
//! phone: u64,
//! }
//!
//! impl Hash for Person {
//! fn hash<H: Hasher>(&self, state: &mut H) {
//! self.id.hash(state);
//! self.phone.hash(state);
//! }
//! }
//!
//! let person1 = Person {
//! id: 5,
//! name: "Janet".to_string(),
//! phone: 555_666_7777,
//! };
//! let person2 = Person {
//! id: 5,
//! name: "Bob".to_string(),
//! phone: 555_666_7777,
//! };
//!
//! assert_eq!(calculate_hash(&person1), calculate_hash(&person2));
//!
//! fn calculate_hash<T: Hash>(t: &T) -> u64 {
//! let mut s = DefaultHasher::new();
//! t.hash(&mut s);
//! s.finish()
//! }
//! ```
#![stable(feature = "rust1", since = "1.0.0")]
pub(crate) mod random;
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::hash::*;
// #[stable(feature = "std_hash_exports", since = "CURRENT_RUSTC_VERSION")]
pub(crate) use self::random::{DefaultHasher, RandomState};

View File

@ -0,0 +1,161 @@
//! This module exists to isolate [`RandomState`] and [`DefaultHasher`] outside of the
//! [`collections`] module without actually publicly exporting them, so that parts of that
//! implementation can more easily be moved to the [`alloc`] crate.
//!
//! Although its items are public and contain stability attributes, they can't actually be accessed
//! outside this crate.
//!
//! [`collections`]: crate::collections
#[allow(deprecated)]
use super::{BuildHasher, Hasher, SipHasher13};
use crate::cell::Cell;
use crate::fmt;
use crate::sys;
/// `RandomState` is the default state for [`HashMap`] types.
///
/// A particular instance `RandomState` will create the same instances of
/// [`Hasher`], but the hashers created by two different `RandomState`
/// instances are unlikely to produce the same result for the same values.
///
/// [`HashMap`]: crate::collections::HashMap
///
/// # Examples
///
/// ```
/// use std::collections::HashMap;
/// use std::hash::RandomState;
///
/// let s = RandomState::new();
/// let mut map = HashMap::with_hasher(s);
/// map.insert(1, 2);
/// ```
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
#[derive(Clone)]
pub struct RandomState {
k0: u64,
k1: u64,
}
impl RandomState {
/// Constructs a new `RandomState` that is initialized with random keys.
///
/// # Examples
///
/// ```
/// use std::hash::RandomState;
///
/// let s = RandomState::new();
/// ```
#[inline]
#[allow(deprecated)]
// rand
#[must_use]
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub fn new() -> RandomState {
// Historically this function did not cache keys from the OS and instead
// simply always called `rand::thread_rng().gen()` twice. In #31356 it
// was discovered, however, that because we re-seed the thread-local RNG
// from the OS periodically that this can cause excessive slowdown when
// many hash maps are created on a thread. To solve this performance
// trap we cache the first set of randomly generated keys per-thread.
//
// Later in #36481 it was discovered that exposing a deterministic
// iteration order allows a form of DOS attack. To counter that we
// increment one of the seeds on every RandomState creation, giving
// every corresponding HashMap a different iteration order.
thread_local!(static KEYS: Cell<(u64, u64)> = {
Cell::new(sys::hashmap_random_keys())
});
KEYS.with(|keys| {
let (k0, k1) = keys.get();
keys.set((k0.wrapping_add(1), k1));
RandomState { k0, k1 }
})
}
}
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
impl BuildHasher for RandomState {
type Hasher = DefaultHasher;
#[inline]
#[allow(deprecated)]
fn build_hasher(&self) -> DefaultHasher {
DefaultHasher(SipHasher13::new_with_keys(self.k0, self.k1))
}
}
/// The default [`Hasher`] used by [`RandomState`].
///
/// The internal algorithm is not specified, and so it and its hashes should
/// not be relied upon over releases.
#[allow(deprecated)]
#[derive(Clone, Debug)]
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
pub struct DefaultHasher(SipHasher13);
impl DefaultHasher {
/// Creates a new `DefaultHasher`.
///
/// This hasher is not guaranteed to be the same as all other
/// `DefaultHasher` instances, but is the same as all other `DefaultHasher`
/// instances created through `new` or `default`.
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
#[inline]
#[allow(deprecated)]
#[rustc_const_unstable(feature = "const_hash", issue = "104061")]
#[must_use]
pub const fn new() -> DefaultHasher {
DefaultHasher(SipHasher13::new_with_keys(0, 0))
}
}
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Default for DefaultHasher {
/// Creates a new `DefaultHasher` using [`new`].
/// See its documentation for more.
///
/// [`new`]: DefaultHasher::new
#[inline]
fn default() -> DefaultHasher {
DefaultHasher::new()
}
}
#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Hasher for DefaultHasher {
// The underlying `SipHasher13` doesn't override the other
// `write_*` methods, so it's ok not to forward them here.
#[inline]
fn write(&mut self, msg: &[u8]) {
self.0.write(msg)
}
#[inline]
fn write_str(&mut self, s: &str) {
self.0.write_str(s);
}
#[inline]
fn finish(&self) -> u64 {
self.0.finish()
}
}
#[stable(feature = "hashmap_build_hasher", since = "1.7.0")]
impl Default for RandomState {
/// Constructs a new `RandomState`.
#[inline]
fn default() -> RandomState {
RandomState::new()
}
}
#[stable(feature = "std_debug", since = "1.16.0")]
impl fmt::Debug for RandomState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RandomState").finish_non_exhaustive()
}
}

View File

@ -493,8 +493,6 @@ pub use core::convert;
pub use core::default;
#[stable(feature = "futures_api", since = "1.36.0")]
pub use core::future;
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::hash;
#[stable(feature = "core_hint", since = "1.27.0")]
pub use core::hint;
#[stable(feature = "i128", since = "1.26.0")]
@ -564,6 +562,7 @@ pub mod env;
pub mod error;
pub mod ffi;
pub mod fs;
pub mod hash;
pub mod io;
pub mod net;
pub mod num;
@ -715,7 +714,7 @@ pub(crate) mod test_helpers {
#[track_caller]
pub(crate) fn test_rng() -> rand_xorshift::XorShiftRng {
use core::hash::{BuildHasher, Hash, Hasher};
let mut hasher = crate::collections::hash_map::RandomState::new().build_hasher();
let mut hasher = crate::hash::RandomState::new().build_hasher();
core::panic::Location::caller().hash(&mut hasher);
let hc64 = hasher.finish();
let seed_vec = hc64.to_le_bytes().into_iter().chain(0u8..8).collect::<Vec<u8>>();

View File

@ -1,8 +1,7 @@
use super::*;
use crate::collections::hash_map::DefaultHasher;
use crate::collections::{BTreeSet, HashSet};
use crate::hash::Hasher;
use crate::hash::{DefaultHasher, Hasher};
use crate::rc::Rc;
use crate::sync::Arc;
use core::hint::black_box;
@ -1461,8 +1460,7 @@ fn test_eq_receivers() {
#[test]
pub fn test_compare() {
use crate::collections::hash_map::DefaultHasher;
use crate::hash::{Hash, Hasher};
use crate::hash::{DefaultHasher, Hash, Hasher};
fn hash<T: Hash>(t: T) -> u64 {
let mut s = DefaultHasher::new();

View File

@ -1,17 +1,17 @@
#![allow(unused)]
use rand::RngCore;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
use std::thread;
use rand::RngCore;
/// Copied from `std::test_helpers::test_rng`, since these tests rely on the
/// seed not being the same for every RNG invocation too.
#[track_caller]
pub(crate) fn test_rng() -> rand_xorshift::XorShiftRng {
use core::hash::{BuildHasher, Hash, Hasher};
let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
let mut hasher = std::hash::RandomState::new().build_hasher();
core::panic::Location::caller().hash(&mut hasher);
let hc64 = hasher.finish();
let seed_vec = hc64.to_le_bytes().into_iter().chain(0u8..8).collect::<Vec<u8>>();

View File

@ -1,7 +1,6 @@
use crate::cli::TestOpts;
use crate::types::{TestDescAndFn, TestId, TestName};
use std::collections::hash_map::DefaultHasher;
use std::hash::Hasher;
use std::hash::{DefaultHasher, Hasher};
use std::time::{SystemTime, UNIX_EPOCH};
pub fn get_shuffle_seed(opts: &TestOpts) -> Option<u64> {

View File

@ -264,8 +264,8 @@ pub fn run_tests<F>(
where
F: FnMut(TestEvent) -> io::Result<()>,
{
use std::collections::{self, HashMap};
use std::hash::BuildHasherDefault;
use std::collections::HashMap;
use std::hash::{DefaultHasher, BuildHasherDefault};
use std::sync::mpsc::RecvTimeoutError;
struct RunningTest {
@ -287,7 +287,7 @@ where
// Use a deterministic hasher
type TestMap =
HashMap<TestId, RunningTest, BuildHasherDefault<collections::hash_map::DefaultHasher>>;
HashMap<TestId, RunningTest, BuildHasherDefault<DefaultHasher>>;
struct TimeoutEntry {
id: TestId,