diff --git a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs new file mode 100644 index 00000000000..c515dad4dea --- /dev/null +++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs @@ -0,0 +1,77 @@ +#![feature(portable_simd)] + +use core_simd::simd::*; + +fn a(i: usize, j: usize) -> f64 { + ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +} + +fn mult_av(v: &[f64], out: &mut [f64]) { + assert!(v.len() == out.len()); + assert!(v.len() % 2 == 0); + + for (i, out) in out.iter_mut().enumerate() { + let mut sum = f64x2::splat(0.0); + + let mut j = 0; + while j < v.len() { + let b = f64x2::from_slice(&v[j..]); + let a = f64x2::from_array([a(i, j), a(i, j + 1)]); + sum += b / a; + j += 2 + } + *out = sum.horizontal_sum(); + } +} + +fn mult_atv(v: &[f64], out: &mut [f64]) { + assert!(v.len() == out.len()); + assert!(v.len() % 2 == 0); + + for (i, out) in out.iter_mut().enumerate() { + let mut sum = f64x2::splat(0.0); + + let mut j = 0; + while j < v.len() { + let b = f64x2::from_slice(&v[j..]); + let a = f64x2::from_array([a(j, i), a(j + 1, i)]); + sum += b / a; + j += 2 + } + *out = sum.horizontal_sum(); + } +} + +fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { + mult_av(v, tmp); + mult_atv(tmp, out); +} + +pub fn spectral_norm(n: usize) -> f64 { + assert!(n % 2 == 0, "only even lengths are accepted"); + + let mut u = vec![1.0; n]; + let mut v = u.clone(); + let mut tmp = u.clone(); + + for _ in 0..10 { + mult_atav(&u, &mut v, &mut tmp); + mult_atav(&v, &mut u, &mut tmp); + } + (dot(&u, &v) / dot(&v, &v)).sqrt() +} + +fn dot(x: &[f64], y: &[f64]) -> f64 { + // This is auto-vectorized: + x.iter().zip(y).map(|(&x, &y)| x * y).sum() +} + +#[cfg(test)] +#[test] +fn test() { + assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991"); +} + +fn main() { + // Empty main to make cargo happy +} diff --git a/library/portable-simd/crates/core_simd/src/comparisons.rs b/library/portable-simd/crates/core_simd/src/comparisons.rs index edef5af3687..d024cf4ddbe 100644 --- a/library/portable-simd/crates/core_simd/src/comparisons.rs +++ b/library/portable-simd/crates/core_simd/src/comparisons.rs @@ -10,6 +10,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_eq(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) } } @@ -17,6 +19,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_ne(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) } } } @@ -30,6 +34,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_lt(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) } } @@ -37,6 +43,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_gt(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) } } @@ -44,6 +52,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_le(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) } } @@ -51,6 +61,8 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn lanes_ge(self, other: Self) -> Mask { + // Safety: `self` is a vector, and the result of the comparison + // is always a valid mask. unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) } } } diff --git a/library/portable-simd/crates/core_simd/src/intrinsics.rs b/library/portable-simd/crates/core_simd/src/intrinsics.rs index 233657202f7..e150946c705 100644 --- a/library/portable-simd/crates/core_simd/src/intrinsics.rs +++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs @@ -2,31 +2,55 @@ //! crate. //! //! The LLVM assembly language is documented here: +//! +//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef: +//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT +//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons, +//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either. +//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note: +//! this means that division by poison or undef is like division by zero, which means it inflicts... +//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program: +//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end". +//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior, +//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract. +//! +//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory. +//! +//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths. /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are -/// simply lowered to the matching LLVM instructions by the compiler. The associated instruction -/// is documented alongside each intrinsic. +/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner. +/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function. extern "platform-intrinsic" { /// add/fadd pub(crate) fn simd_add(x: T, y: T) -> T; /// sub/fsub - pub(crate) fn simd_sub(x: T, y: T) -> T; + pub(crate) fn simd_sub(lhs: T, rhs: T) -> T; /// mul/fmul pub(crate) fn simd_mul(x: T, y: T) -> T; /// udiv/sdiv/fdiv - pub(crate) fn simd_div(x: T, y: T) -> T; + /// ints and uints: {s,u}div incur UB if division by zero occurs. + /// ints: sdiv is UB for int::MIN / -1. + /// floats: fdiv is never UB, but may create NaNs or infinities. + pub(crate) fn simd_div(lhs: T, rhs: T) -> T; /// urem/srem/frem - pub(crate) fn simd_rem(x: T, y: T) -> T; + /// ints and uints: {s,u}rem incur UB if division by zero occurs. + /// ints: srem is UB for int::MIN / -1. + /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno. + pub(crate) fn simd_rem(lhs: T, rhs: T) -> T; /// shl - pub(crate) fn simd_shl(x: T, y: T) -> T; + /// for (u)ints. poison if rhs >= lhs::BITS + pub(crate) fn simd_shl(lhs: T, rhs: T) -> T; - /// lshr/ashr - pub(crate) fn simd_shr(x: T, y: T) -> T; + /// ints: ashr + /// uints: lshr + /// poison if rhs >= lhs::BITS + pub(crate) fn simd_shr(lhs: T, rhs: T) -> T; /// and pub(crate) fn simd_and(x: T, y: T) -> T; @@ -38,6 +62,9 @@ extern "platform-intrinsic" { pub(crate) fn simd_xor(x: T, y: T) -> T; /// fptoui/fptosi/uitofp/sitofp + /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5 + /// but the truncated value must fit in the target type or the result is poison. + /// use `simd_as` instead for a cast that performs a saturating conversion. pub(crate) fn simd_cast(x: T) -> U; /// follows Rust's `T as U` semantics, including saturating float casts /// which amounts to the same as `simd_cast` for many cases @@ -45,6 +72,9 @@ extern "platform-intrinsic" { pub(crate) fn simd_as(x: T) -> U; /// neg/fneg + /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`. + /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it. + /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`. pub(crate) fn simd_neg(x: T) -> T; /// fabs @@ -54,6 +84,7 @@ extern "platform-intrinsic" { pub(crate) fn simd_fmin(x: T, y: T) -> T; pub(crate) fn simd_fmax(x: T, y: T) -> T; + // these return Simd with the same BITS size as the inputs pub(crate) fn simd_eq(x: T, y: T) -> U; pub(crate) fn simd_ne(x: T, y: T) -> U; pub(crate) fn simd_lt(x: T, y: T) -> U; @@ -62,19 +93,31 @@ extern "platform-intrinsic" { pub(crate) fn simd_ge(x: T, y: T) -> U; // shufflevector + // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s pub(crate) fn simd_shuffle(x: T, y: T, idx: U) -> V; + /// llvm.masked.gather + /// like a loop of pointer reads + /// val: vector of values to select if a lane is masked + /// ptr: vector of pointers to read from + /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val) + /// note, the LLVM intrinsic accepts a mask vector of + /// FIXME: review this if/when we fix up our mask story in general? pub(crate) fn simd_gather(val: T, ptr: U, mask: V) -> T; + /// llvm.masked.scatter + /// like gather, but more spicy, as it writes instead of reads pub(crate) fn simd_scatter(val: T, ptr: U, mask: V); // {s,u}add.sat pub(crate) fn simd_saturating_add(x: T, y: T) -> T; // {s,u}sub.sat - pub(crate) fn simd_saturating_sub(x: T, y: T) -> T; + pub(crate) fn simd_saturating_sub(lhs: T, rhs: T) -> T; // reductions + // llvm.vector.reduce.{add,fadd} pub(crate) fn simd_reduce_add_ordered(x: T, y: U) -> U; + // llvm.vector.reduce.{mul,fmul} pub(crate) fn simd_reduce_mul_ordered(x: T, y: U) -> U; #[allow(unused)] pub(crate) fn simd_reduce_all(x: T) -> bool; @@ -91,7 +134,10 @@ extern "platform-intrinsic" { pub(crate) fn simd_bitmask(x: T) -> U; // select - pub(crate) fn simd_select(m: M, a: T, b: T) -> T; + // first argument is a vector of integers, -1 (all bits 1) is "true" + // logically equivalent to (yes & m) | (no & (m^-1), + // but you can use it on floats. + pub(crate) fn simd_select(m: M, yes: T, no: T) -> T; #[allow(unused)] - pub(crate) fn simd_select_bitmask(m: M, a: T, b: T) -> T; + pub(crate) fn simd_select_bitmask(m: M, yes: T, no: T) -> T; } diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs index 960a6640083..91ae34c05e0 100644 --- a/library/portable-simd/crates/core_simd/src/lib.rs +++ b/library/portable-simd/crates/core_simd/src/lib.rs @@ -1,7 +1,9 @@ #![cfg_attr(not(feature = "std"), no_std)] #![feature( const_fn_trait_bound, + convert_float_to_int, decl_macro, + intra_doc_pointers, platform_intrinsics, repr_simd, simd_ffi, diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs index ae1fef53da8..e1cd7930450 100644 --- a/library/portable-simd/crates/core_simd/src/masks.rs +++ b/library/portable-simd/crates/core_simd/src/masks.rs @@ -12,8 +12,10 @@ )] mod mask_impl; -use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +mod to_bitmask; +pub use to_bitmask::ToBitMask; + +use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount}; use core::cmp::Ordering; use core::{fmt, mem}; @@ -42,6 +44,9 @@ mod sealed { use sealed::Sealed; /// Marker trait for types that may be used as SIMD mask elements. +/// +/// # Safety +/// Type must be a signed integer. pub unsafe trait MaskElement: SimdElement + Sealed {} macro_rules! impl_element { @@ -149,6 +154,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub unsafe fn from_int_unchecked(value: Simd) -> Self { + // Safety: the caller must confirm this invariant unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) } } @@ -161,6 +167,7 @@ where #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_int(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); + // Safety: the validity has been checked unsafe { Self::from_int_unchecked(value) } } @@ -179,6 +186,7 @@ where #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, lane: usize) -> bool { + // Safety: the caller must confirm this invariant unsafe { self.0.test_unchecked(lane) } } @@ -190,6 +198,7 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] pub fn test(&self, lane: usize) -> bool { assert!(lane < LANES, "lane index out of range"); + // Safety: the lane index has been checked unsafe { self.test_unchecked(lane) } } @@ -199,6 +208,7 @@ where /// `lane` must be less than `LANES`. #[inline] pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { + // Safety: the caller must confirm this invariant unsafe { self.0.set_unchecked(lane, value); } @@ -211,27 +221,12 @@ where #[inline] pub fn set(&mut self, lane: usize, value: bool) { assert!(lane < LANES, "lane index out of range"); + // Safety: the lane index has been checked unsafe { self.set_unchecked(lane, value); } } - /// Convert this mask to a bitmask, with one bit set per lane. - #[cfg(feature = "generic_const_exprs")] - #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { - self.0.to_bitmask() - } - - /// Convert a bitmask to a mask. - #[cfg(feature = "generic_const_exprs")] - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { - Self(mask_impl::Mask::from_bitmask(bitmask)) - } - /// Returns true if any lane is set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs index b4217dc87ba..ec4dd357ee9 100644 --- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs +++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs @@ -1,7 +1,7 @@ #![allow(unused_imports)] use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; use core::marker::PhantomData; /// A mask where each lane is represented by a single bit. @@ -115,20 +115,22 @@ where unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { - // Safety: these are the same type and we are laundering the generic + pub fn to_bitmask_integer(self) -> U + where + super::Mask: ToBitMask, + { + // Safety: these are the same types unsafe { core::mem::transmute_copy(&self.0) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask(bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { - // Safety: these are the same type and we are laundering the generic - Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData) + pub fn from_bitmask_integer(bitmask: U) -> Self + where + super::Mask: ToBitMask, + { + // Safety: these are the same types + unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) } } #[inline] @@ -137,6 +139,7 @@ where where U: MaskElement, { + // Safety: bitmask layout does not depend on the element width unsafe { core::mem::transmute_copy(&self) } } diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs index e5bb784bb91..8bbdf637de8 100644 --- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs +++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs @@ -2,7 +2,7 @@ use super::MaskElement; use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask}; #[repr(transparent)] pub struct Mask(Simd) @@ -66,6 +66,23 @@ where } } +// Used for bitmask bit order workaround +pub(crate) trait ReverseBits { + fn reverse_bits(self) -> Self; +} + +macro_rules! impl_reverse_bits { + { $($int:ty),* } => { + $( + impl ReverseBits for $int { + fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) } + } + )* + } +} + +impl_reverse_bits! { u8, u16, u32, u64 } + impl Mask where T: MaskElement, @@ -106,44 +123,40 @@ where where U: MaskElement, { + // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type. unsafe { Mask(intrinsics::simd_cast(self.0)) } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new array and does not mutate the original value"] - pub fn to_bitmask(self) -> [u8; LaneCount::::BITMASK_LEN] { - unsafe { - let mut bitmask: [u8; LaneCount::::BITMASK_LEN] = - intrinsics::simd_bitmask(self.0); - - // There is a bug where LLVM appears to implement this operation with the wrong - // bit order. - // TODO fix this in a better way - if cfg!(target_endian = "big") { - for x in bitmask.as_mut() { - *x = x.reverse_bits(); - } - } + pub(crate) fn to_bitmask_integer(self) -> U + where + super::Mask: ToBitMask, + { + // Safety: U is required to be the appropriate bitmask type + let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) }; + // LLVM assumes bit order should match endianness + if cfg!(target_endian = "big") { + bitmask.reverse_bits() + } else { bitmask } } - #[cfg(feature = "generic_const_exprs")] #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask(mut bitmask: [u8; LaneCount::::BITMASK_LEN]) -> Self { - unsafe { - // There is a bug where LLVM appears to implement this operation with the wrong - // bit order. - // TODO fix this in a better way - if cfg!(target_endian = "big") { - for x in bitmask.as_mut() { - *x = x.reverse_bits(); - } - } + pub(crate) fn from_bitmask_integer(bitmask: U) -> Self + where + super::Mask: ToBitMask, + { + // LLVM assumes bit order should match endianness + let bitmask = if cfg!(target_endian = "big") { + bitmask.reverse_bits() + } else { + bitmask + }; + // Safety: U is required to be the appropriate bitmask type + unsafe { Self::from_int_unchecked(intrinsics::simd_select_bitmask( bitmask, Self::splat(true).to_int(), @@ -155,12 +168,14 @@ where #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { + // Safety: use `self` as an integer vector unsafe { intrinsics::simd_reduce_any(self.to_int()) } } #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] pub fn all(self) -> bool { + // Safety: use `self` as an integer vector unsafe { intrinsics::simd_reduce_all(self.to_int()) } } } @@ -184,6 +199,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn bitand(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) } } } @@ -197,6 +213,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn bitor(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) } } } @@ -210,6 +227,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] fn bitxor(self, rhs: Self) -> Self { + // Safety: `self` is an integer vector unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) } } } diff --git a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs new file mode 100644 index 00000000000..1c2037764c1 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs @@ -0,0 +1,57 @@ +use super::{mask_impl, Mask, MaskElement}; +use crate::simd::{LaneCount, SupportedLaneCount}; + +mod sealed { + pub trait Sealed {} +} +pub use sealed::Sealed; + +impl Sealed for Mask +where + T: MaskElement, + LaneCount: SupportedLaneCount, +{ +} + +/// Converts masks to and from integer bitmasks. +/// +/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB. +/// +/// # Safety +/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in +/// the mask. +pub unsafe trait ToBitMask: Sealed { + /// The integer bitmask type. + type BitMask; + + /// Converts a mask to a bitmask. + fn to_bitmask(self) -> Self::BitMask; + + /// Converts a bitmask to a mask. + fn from_bitmask(bitmask: Self::BitMask) -> Self; +} + +macro_rules! impl_integer_intrinsic { + { $(unsafe impl ToBitMask for Mask<_, $lanes:literal>)* } => { + $( + unsafe impl ToBitMask for Mask { + type BitMask = $int; + + fn to_bitmask(self) -> $int { + self.0.to_bitmask_integer() + } + + fn from_bitmask(bitmask: $int) -> Self { + Self(mask_impl::Mask::from_bitmask_integer(bitmask)) + } + } + )* + } +} + +impl_integer_intrinsic! { + unsafe impl ToBitMask for Mask<_, 8> + unsafe impl ToBitMask for Mask<_, 16> + unsafe impl ToBitMask for Mask<_, 32> + unsafe impl ToBitMask for Mask<_, 64> +} diff --git a/library/portable-simd/crates/core_simd/src/math.rs b/library/portable-simd/crates/core_simd/src/math.rs index 7435b6df918..0b4e40983af 100644 --- a/library/portable-simd/crates/core_simd/src/math.rs +++ b/library/portable-simd/crates/core_simd/src/math.rs @@ -22,6 +22,7 @@ macro_rules! impl_uint_arith { /// ``` #[inline] pub fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_add(self, second) } } @@ -41,6 +42,7 @@ macro_rules! impl_uint_arith { /// assert_eq!(sat, Simd::splat(0)); #[inline] pub fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_sub(self, second) } } })+ @@ -68,6 +70,7 @@ macro_rules! impl_int_arith { /// ``` #[inline] pub fn saturating_add(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_add(self, second) } } @@ -87,6 +90,7 @@ macro_rules! impl_int_arith { /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0])); #[inline] pub fn saturating_sub(self, second: Self) -> Self { + // Safety: `self` is a vector unsafe { simd_saturating_sub(self, second) } } diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs index b65038933bf..1b35b3e717a 100644 --- a/library/portable-simd/crates/core_simd/src/ops.rs +++ b/library/portable-simd/crates/core_simd/src/ops.rs @@ -57,29 +57,40 @@ macro_rules! wrap_bitshift { }; } -// Division by zero is poison, according to LLVM. -// So is dividing the MIN value of a signed integer by -1, -// since that would return MAX + 1. -// FIXME: Rust allows ::MIN / -1, -// so we should probably figure out how to make that safe. +/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. +/// It guards against LLVM's UB conditions for integer div or rem using masks and selects, +/// thus guaranteeing a Rust value returns instead. +/// +/// | | LLVM | Rust +/// | :--------------: | :--- | :---------- +/// | N {/,%} 0 | UB | panic!() +/// | <$int>::MIN / -1 | UB | <$int>::MIN +/// | <$int>::MIN % -1 | UB | 0 +/// macro_rules! int_divrem_guard { ( $lhs:ident, $rhs:ident, { const PANIC_ZERO: &'static str = $zero:literal; - const PANIC_OVERFLOW: &'static str = $overflow:literal; $simd_call:ident }, $int:ident ) => { if $rhs.lanes_eq(Simd::splat(0)).any() { panic!($zero); - } else if <$int>::MIN != 0 - && ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) - // type inference can break here, so cut an SInt to size - & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any() - { - panic!($overflow); } else { - unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } + // Prevent otherwise-UB overflow on the MIN / -1 case. + let rhs = if <$int>::MIN != 0 { + // This should, at worst, optimize to a few branchless logical ops + // Ideally, this entire conditional should evaporate + // Fire LLVM and implement those manually if it doesn't get the hint + ($lhs.lanes_eq(Simd::splat(<$int>::MIN)) + // type inference can break here, so cut an SInt to size + & $rhs.lanes_eq(Simd::splat(-1i64 as _))) + .select(Simd::splat(1), $rhs) + } else { + // Nice base case to make it easy to const-fold away the other branch. + $rhs + }; + unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) } } }; } @@ -183,7 +194,6 @@ for_base_ops! { impl Div::div { int_divrem_guard { const PANIC_ZERO: &'static str = "attempt to divide by zero"; - const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow"; simd_div } } @@ -191,7 +201,6 @@ for_base_ops! { impl Rem::rem { int_divrem_guard { const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; - const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow"; simd_rem } } diff --git a/library/portable-simd/crates/core_simd/src/reduction.rs b/library/portable-simd/crates/core_simd/src/reduction.rs index e79a185816b..e1cd743e442 100644 --- a/library/portable-simd/crates/core_simd/src/reduction.rs +++ b/library/portable-simd/crates/core_simd/src/reduction.rs @@ -14,24 +14,28 @@ macro_rules! impl_integer_reductions { /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition. #[inline] pub fn horizontal_sum(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_add_ordered(self, 0) } } /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication. #[inline] pub fn horizontal_product(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_mul_ordered(self, 1) } } /// Horizontal maximum. Returns the maximum lane in the vector. #[inline] pub fn horizontal_max(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_max(self) } } /// Horizontal minimum. Returns the minimum lane in the vector. #[inline] pub fn horizontal_min(self) -> $scalar { + // Safety: `self` is an integer vector unsafe { simd_reduce_min(self) } } } @@ -63,6 +67,7 @@ macro_rules! impl_float_reductions { if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().sum() } else { + // Safety: `self` is a float vector unsafe { simd_reduce_add_ordered(self, 0.) } } } @@ -74,6 +79,7 @@ macro_rules! impl_float_reductions { if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) { self.as_array().iter().product() } else { + // Safety: `self` is a float vector unsafe { simd_reduce_mul_ordered(self, 1.) } } } @@ -84,6 +90,7 @@ macro_rules! impl_float_reductions { /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] pub fn horizontal_max(self) -> $scalar { + // Safety: `self` is a float vector unsafe { simd_reduce_max(self) } } @@ -93,6 +100,7 @@ macro_rules! impl_float_reductions { /// return either. This function will not return `NaN` unless all lanes are `NaN`. #[inline] pub fn horizontal_min(self) -> $scalar { + // Safety: `self` is a float vector unsafe { simd_reduce_min(self) } } } diff --git a/library/portable-simd/crates/core_simd/src/round.rs b/library/portable-simd/crates/core_simd/src/round.rs index 06ccab3ec49..556bc2cc1fe 100644 --- a/library/portable-simd/crates/core_simd/src/round.rs +++ b/library/portable-simd/crates/core_simd/src/round.rs @@ -1,9 +1,10 @@ use crate::simd::intrinsics; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use core::convert::FloatToInt; macro_rules! implement { { - $type:ty, $int_type:ty + $type:ty } => { impl Simd<$type, LANES> where @@ -18,20 +19,22 @@ macro_rules! implement { /// * Not be NaN /// * Not be infinite /// * Be representable in the return type, after truncating off its fractional part + /// + /// If these requirements are infeasible or costly, consider using the safe function [cast], + /// which saturates on conversion. + /// + /// [cast]: Simd::cast #[inline] - pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> { + pub unsafe fn to_int_unchecked(self) -> Simd + where + $type: FloatToInt, + I: SimdElement, + { unsafe { intrinsics::simd_cast(self) } } - - /// Creates a floating-point vector from an integer vector. Rounds values that are - /// not exactly representable. - #[inline] - pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self { - unsafe { intrinsics::simd_cast(value) } - } } } } -implement! { f32, i32 } -implement! { f64, i64 } +implement! { f32 } +implement! { f64 } diff --git a/library/portable-simd/crates/core_simd/src/select.rs b/library/portable-simd/crates/core_simd/src/select.rs index 8d521057fbd..3acf07260e1 100644 --- a/library/portable-simd/crates/core_simd/src/select.rs +++ b/library/portable-simd/crates/core_simd/src/select.rs @@ -11,6 +11,7 @@ where /// For each lane in the mask, choose the corresponding lane from `true_values` if /// that lane mask is true, and `false_values` if that lane mask is false. /// + /// # Examples /// ``` /// # #![feature(portable_simd)] /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask}; @@ -31,6 +32,8 @@ where where U: SimdElement, { + // Safety: The mask has been cast to a vector of integers, + // and the operands to select between are vectors of the same type and length. unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) } } @@ -39,6 +42,7 @@ where /// For each lane in the mask, choose the corresponding lane from `true_values` if /// that lane mask is true, and `false_values` if that lane mask is false. /// + /// # Examples /// ``` /// # #![feature(portable_simd)] /// # #[cfg(feature = "std")] use core_simd::Mask; diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs index bdc489774a5..08b2add1166 100644 --- a/library/portable-simd/crates/core_simd/src/swizzle.rs +++ b/library/portable-simd/crates/core_simd/src/swizzle.rs @@ -95,6 +95,7 @@ pub trait Swizzle { LaneCount: SupportedLaneCount, LaneCount: SupportedLaneCount, { + // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32. unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) } } } @@ -119,6 +120,7 @@ pub trait Swizzle2 { LaneCount: SupportedLaneCount, LaneCount: SupportedLaneCount, { + // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32. unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) } } } diff --git a/library/portable-simd/crates/core_simd/src/to_bytes.rs b/library/portable-simd/crates/core_simd/src/to_bytes.rs index 8d9b3e8ff85..b36b1a347b2 100644 --- a/library/portable-simd/crates/core_simd/src/to_bytes.rs +++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs @@ -8,12 +8,14 @@ macro_rules! impl_to_bytes { /// Return the memory representation of this integer as a byte array in native byte /// order. pub fn to_ne_bytes(self) -> crate::simd::Simd { + // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&self) } } /// Create a native endian integer value from its memory representation as a byte array /// in native endianness. pub fn from_ne_bytes(bytes: crate::simd::Simd) -> Self { + // Safety: transmuting between vectors is safe unsafe { core::mem::transmute_copy(&bytes) } } } diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs index b7ef7a56c73..ff1b2c756ad 100644 --- a/library/portable-simd/crates/core_simd/src/vector.rs +++ b/library/portable-simd/crates/core_simd/src/vector.rs @@ -12,7 +12,79 @@ pub(crate) mod ptr; use crate::simd::intrinsics; use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount}; -/// A SIMD vector of `LANES` elements of type `T`. +/// A SIMD vector of `LANES` elements of type `T`. `Simd` has the same shape as [`[T; N]`](array), but operates like `T`. +/// +/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does. +/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation, +/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping +/// the two arrays together and mapping the operator over each lane. +/// +/// ```rust +/// # #![feature(array_zip, portable_simd)] +/// # use core::simd::{Simd}; +/// let a0: [i32; 4] = [-2, 0, 2, 4]; +/// let a1 = [10, 9, 8, 7]; +/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs); +/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs); +/// +/// // `Simd` implements `From<[T; N]> +/// let (v0, v1) = (Simd::from(a0), Simd::from(a1)); +/// // Which means arrays implement `Into>`. +/// assert_eq!(v0 + v1, zm_add.into()); +/// assert_eq!(v0 * v1, zm_mul.into()); +/// ``` +/// +/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping`]. +/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior. +/// This means there is no warning on overflows, even in "debug" builds. +/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap, +/// and even "debug builds" are unlikely to tolerate the loss of performance. +/// You may want to consider using explicitly checked arithmetic if such is required. +/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable. +/// +/// [`Wrapping`]: core::num::Wrapping +/// +/// # Layout +/// `Simd` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment. +/// `[T; N]` is aligned to `T`, but `Simd` will have an alignment based on both `T` and `N`. +/// It is thus sound to [`transmute`] `Simd` to `[T; N]`, and will typically optimize to zero cost, +/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide. +/// +/// # ABI "Features" +/// Due to Rust's safety guarantees, `Simd` is currently passed to and from functions via memory, not SIMD registers, +/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd` or return it. +/// The need for this may be corrected in the future. +/// +/// # Safe SIMD with Unsafe Rust +/// +/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code. +/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from. +/// In particular, the layout of `Simd` may be similar to `[T; N]`, and may allow some transmutations, +/// but references to `[T; N]` are not interchangeable with those to `Simd`. +/// Thus, when using `unsafe` Rust to read and write `Simd` through [raw pointers], it is a good idea to first try with +/// [`read_unaligned`] and [`write_unaligned`]. This is because: +/// - [`read`] and [`write`] require full alignment (in this case, `Simd`'s alignment) +/// - the likely source for reading or destination for writing `Simd` is [`[T]`](slice) and similar types, aligned to `T` +/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior** +/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization +/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime +/// +/// By imposing less obligations, unaligned functions are less likely to make the program unsound, +/// and may be just as fast as stricter alternatives. +/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd]`, +/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail. +/// If these are not sufficient, then it is most ideal to design data structures to be already aligned +/// to the `Simd` you wish to use before using `unsafe` Rust to read or write. +/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first, +/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`]. +/// +/// [`transmute`]: core::mem::transmute +/// [raw pointers]: pointer +/// [`read_unaligned`]: pointer::read_unaligned +/// [`write_unaligned`]: pointer::write_unaligned +/// [`read`]: pointer::read +/// [`write`]: pointer::write +/// [as_simd]: slice::as_simd #[repr(simd)] pub struct Simd([T; LANES]) where @@ -102,6 +174,7 @@ where #[inline] #[cfg(not(bootstrap))] pub fn cast(self) -> Simd { + // Safety: The input argument is a vector of a known SIMD type. unsafe { intrinsics::simd_as(self) } } @@ -175,7 +248,7 @@ where or: Self, ) -> Self { let enable: Mask = enable & idxs.lanes_lt(Simd::splat(slice.len())); - // SAFETY: We have masked-off out-of-bounds lanes. + // Safety: We have masked-off out-of-bounds lanes. unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) } } @@ -216,7 +289,7 @@ where let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr()); // Ferris forgive me, I have done pointer arithmetic here. let ptrs = base_ptr.wrapping_add(idxs); - // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah + // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) } } @@ -268,7 +341,7 @@ where idxs: Simd, ) { let enable: Mask = enable & idxs.lanes_lt(Simd::splat(slice.len())); - // SAFETY: We have masked-off out-of-bounds lanes. + // Safety: We have masked-off out-of-bounds lanes. unsafe { self.scatter_select_unchecked(slice, enable, idxs) } } @@ -307,7 +380,7 @@ where enable: Mask, idxs: Simd, ) { - // SAFETY: This block works with *mut T derived from &mut 'a [T], + // Safety: This block works with *mut T derived from &mut 'a [T], // which means it is delicate in Rust's borrowing model, circa 2021: // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts! // Even though this block is largely safe methods, it must be exactly this way @@ -487,7 +560,9 @@ mod sealed { use sealed::Sealed; /// Marker trait for types that may be used as SIMD vector elements. -/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize +/// +/// # Safety +/// This trait, when implemented, asserts the compiler can monomorphize /// `#[repr(simd)]` structs with the marked type as an element. /// Strictly, it is valid to impl if the vector will not be miscompiled. /// Practically, it is user-unfriendly to impl it if the vector won't compile, diff --git a/library/portable-simd/crates/core_simd/src/vector/ptr.rs b/library/portable-simd/crates/core_simd/src/vector/ptr.rs index c668d9a6eae..417d255c28d 100644 --- a/library/portable-simd/crates/core_simd/src/vector/ptr.rs +++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs @@ -21,6 +21,8 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { + // Safety: converting pointers to usize and vice-versa is safe + // (even if using that pointer is not) unsafe { let x: Simd = mem::transmute_copy(&self); mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) @@ -47,6 +49,8 @@ where #[inline] #[must_use] pub fn wrapping_add(self, addend: Simd) -> Self { + // Safety: converting pointers to usize and vice-versa is safe + // (even if using that pointer is not) unsafe { let x: Simd = mem::transmute_copy(&self); mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::())) }) diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs index e8ce7176b4f..9fb70218c95 100644 --- a/library/portable-simd/crates/core_simd/src/vendor.rs +++ b/library/portable-simd/crates/core_simd/src/vendor.rs @@ -9,6 +9,8 @@ macro_rules! from_transmute { impl core::convert::From<$from> for $to { #[inline] fn from(value: $from) -> $to { + // Safety: transmuting between vectors is safe, but the caller of this macro + // checks the invariants unsafe { core::mem::transmute(value) } } } diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs index 6a8ecd33a73..3aec36ca7b7 100644 --- a/library/portable-simd/crates/core_simd/tests/masks.rs +++ b/library/portable-simd/crates/core_simd/tests/masks.rs @@ -68,16 +68,16 @@ macro_rules! test_mask_api { assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask); } - #[cfg(feature = "generic_const_exprs")] #[test] fn roundtrip_bitmask_conversion() { + use core_simd::ToBitMask; let values = [ true, false, false, true, false, false, true, false, true, true, false, false, false, false, false, true, ]; let mask = core_simd::Mask::<$type, 16>::from_array(values); let bitmask = mask.to_bitmask(); - assert_eq!(bitmask, [0b01001001, 0b10000011]); + assert_eq!(bitmask, 0b1000001101001001); assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask); } } diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs index 4fb9de198ee..50f7a4ca170 100644 --- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs +++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs @@ -210,15 +210,21 @@ macro_rules! impl_signed_tests { ) } + fn div_min_may_overflow() { + let a = Vector::::splat(Scalar::MIN); + let b = Vector::::splat(-1); + assert_eq!(a / b, a); + } + + fn rem_min_may_overflow() { + let a = Vector::::splat(Scalar::MIN); + let b = Vector::::splat(-1); + assert_eq!(a % b, Vector::::splat(0)); + } + } test_helpers::test_lanes_panic! { - fn div_min_overflow_panics() { - let a = Vector::::splat(Scalar::MIN); - let b = Vector::::splat(-1); - let _ = a / b; - } - fn div_by_all_zeros_panics() { let a = Vector::::splat(42); let b = Vector::::splat(0); @@ -232,12 +238,6 @@ macro_rules! impl_signed_tests { let _ = a / b; } - fn rem_min_overflow_panic() { - let a = Vector::::splat(Scalar::MIN); - let b = Vector::::splat(-1); - let _ = a % b; - } - fn rem_zero_panic() { let a = Vector::::splat(42); let b = Vector::::splat(0); diff --git a/library/portable-simd/crates/core_simd/tests/round.rs b/library/portable-simd/crates/core_simd/tests/round.rs index 1a1bc9ebca7..53732329237 100644 --- a/library/portable-simd/crates/core_simd/tests/round.rs +++ b/library/portable-simd/crates/core_simd/tests/round.rs @@ -53,14 +53,6 @@ macro_rules! float_rounding_test { } test_helpers::test_lanes! { - fn from_int() { - test_helpers::test_unary_elementwise( - &Vector::::round_from_int, - &|x| x as Scalar, - &|_| true, - ) - } - fn to_int_unchecked() { // The maximum integer that can be represented by the equivalently sized float has // all of the mantissa digits set to 1, pushed up to the MSB. @@ -72,11 +64,11 @@ macro_rules! float_rounding_test { runner.run( &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE), |x| { - let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() }; + let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::().to_array() }; let result_2 = { - let mut result = [0; LANES]; + let mut result: [IntScalar; LANES] = [0; LANES]; for (i, o) in x.iter().zip(result.iter_mut()) { - *o = unsafe { i.to_int_unchecked() }; + *o = unsafe { i.to_int_unchecked::() }; } result };