From 3f7f5e8a2e3c9bb134d160241a6695bb379db647 Mon Sep 17 00:00:00 2001 From: Michael Bradshaw Date: Tue, 22 Mar 2022 20:21:56 -0600 Subject: [PATCH] Optimize RcInnerPtr::inc_strong instruction count Inspired by this internals thread: https://internals.rust-lang.org/t/rc-optimization-on-64-bit-targets/16362 [The generated assembly is a bit smaller](https://rust.godbolt.org/z/TeTnf6144) and is a more efficient usage of the CPU's instruction cache. `unlikely` doesn't impact any of the small artificial tests I've done, but I've included it in case it might help more complex scenarios when this is inlined. --- library/alloc/src/rc.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index ea651c075d9..8e7946dfd97 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2512,14 +2512,21 @@ trait RcInnerPtr { fn inc_strong(&self) { let strong = self.strong(); + // We insert an `assume` here to hint LLVM at an otherwise + // missed optimization. + // SAFETY: The reference count will never be zero when this is + // called. + unsafe { core::intrinsics::assume(strong != 0); } + + let strong = strong.wrapping_add(1); + self.strong_ref().set(strong); + // We want to abort on overflow instead of dropping the value. - // The reference count will never be zero when this is called; - // nevertheless, we insert an abort here to hint LLVM at - // an otherwise missed optimization. - if strong == 0 || strong == usize::MAX { + // Checking after the store instead of before allows for + // slightly better code generation. + if core::intrinsics::unlikely(strong == 0) { abort(); } - self.strong_ref().set(strong + 1); } #[inline] @@ -2536,14 +2543,21 @@ trait RcInnerPtr { fn inc_weak(&self) { let weak = self.weak(); + // We insert an `assume` here to hint LLVM at an otherwise + // missed optimization. + // SAFETY: The reference count will never be zero when this is + // called. + unsafe { core::intrinsics::assume(weak != 0); } + + let weak = weak.wrapping_add(1); + self.weak_ref().set(weak); + // We want to abort on overflow instead of dropping the value. - // The reference count will never be zero when this is called; - // nevertheless, we insert an abort here to hint LLVM at - // an otherwise missed optimization. - if weak == 0 || weak == usize::MAX { + // Checking after the store instead of before allows for + // slightly better code generation. + if core::intrinsics::unlikely(weak == 0) { abort(); } - self.weak_ref().set(weak + 1); } #[inline]