Auto merge of #103913 - Neutron3529:patch-1, r=thomcc

Improve performance of `rem_euclid()` for signed integers

such code is copy from
https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs and
https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs
using `r+rhs.abs()` is faster than calc it with an if clause. Bench result:
```
$ cargo bench
   Compiling div-euclid v0.1.0 (/me/div-euclid)
    Finished bench [optimized] target(s) in 1.01s
     Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef)

running 7 tests
test tests::it_works ... ignored
test tests::bench_aaabs     ... bench:  10,498,793 ns/iter (+/- 104,360)
test tests::bench_aadefault ... bench:  11,061,862 ns/iter (+/- 94,107)
test tests::bench_abs       ... bench:  10,477,193 ns/iter (+/- 81,942)
test tests::bench_default   ... bench:  10,622,983 ns/iter (+/- 25,119)
test tests::bench_zzabs     ... bench:  10,481,971 ns/iter (+/- 43,787)
test tests::bench_zzdefault ... bench:  11,074,976 ns/iter (+/- 29,633)

test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s
```
It seems that, default `rem_euclid` triggered a branch prediction, thus `bench_default` is faster than `bench_aadefault` and `bench_aadefault`, which shuffles the order of calculations. but all of them slower than what it was in `f64`'s and `f32`'s `rem_euclid`, thus I submit this PR.

bench code:
```rust
#![feature(test)]
extern crate test;

fn rem_euclid(a:i32,rhs:i32)->i32{
    let r = a % rhs;
    if r < 0 { r + rhs.abs() } else { r }
}

#[cfg(test)]
mod tests {
    use super::*;
    use test::Bencher;
    use rand::prelude::*;
    use rand::rngs::SmallRng;
    const N:i32=1000;
    #[test]
    fn it_works() {
        let a: i32 = 7; // or any other integer type
        let b = 4;

        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();

        for i in &d {
            for j in &n {
                assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j));
            }
        }

        assert_eq!(rem_euclid(a,b), 3);
        assert_eq!(rem_euclid(-a,b), 1);
        assert_eq!(rem_euclid(a,-b), 3);
        assert_eq!(rem_euclid(-a,-b), 1);
    }

    #[bench]
    fn bench_aaabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_aadefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_abs(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_default(b: &mut Bencher) {
        let d:Vec<i32>=(-N..=N).collect();
        let n:Vec<i32>=(-N..0).chain(1..=N).collect();
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }

    #[bench]
    fn bench_zzabs(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=rem_euclid(*i,*j);
                }
            }
            res
        });
    }
    #[bench]
    fn bench_zzdefault(b: &mut Bencher) {
        let mut d:Vec<i32>=(-N..=N).collect();
        let mut n:Vec<i32>=(-N..0).chain(1..=N).collect();
        let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]);
        d.shuffle(&mut rng);
        n.shuffle(&mut rng);
        d.shuffle(&mut rng);
        b.iter(||{
            let mut res=0;
            for i in &d {
                for j in &n {
                    res+=i.rem_euclid(*j);
                }
            }
            res
        });
    }
}
```
This commit is contained in:
bors 2022-11-12 20:48:27 +00:00
commit 6284998a26
1 changed files with 9 additions and 5 deletions

View File

@ -2072,11 +2072,15 @@ macro_rules! int_impl {
pub const fn rem_euclid(self, rhs: Self) -> Self {
let r = self % rhs;
if r < 0 {
if rhs < 0 {
r - rhs
} else {
r + rhs
}
// Semantically equivalent to `if rhs < 0 { r - rhs } else { r + rhs }`.
// If `rhs` is not `Self::MIN`, then `r + abs(rhs)` will not overflow
// and is clearly equivalent, because `r` is negative.
// Otherwise, `rhs` is `Self::MIN`, then we have
// `r.wrapping_add(Self::MIN.wrapping_abs())`, which evaluates
// to `r.wrapping_add(Self::MIN)`, which is equivalent to
// `r - Self::MIN`, which is what we wanted (and will not overflow
// for negative `r`).
r.wrapping_add(rhs.wrapping_abs())
} else {
r
}