Auto merge of #102750 - the8472:opt-field-order, r=wesleywiser

optimize field ordering by grouping m*2^n-sized fields with equivalently aligned ones

```rust
use std::ptr::addr_of;
use std::mem;

struct Foo {
    word: u32,
    byte: u8,
    ary: [u8; 4]
}

fn main() {
    let foo: Foo = unsafe { mem::zeroed() };

    println!("base: {:p}\nword: {:p}\nbyte: {:p}\nary:  {:p}", &foo, addr_of!(foo.word), addr_of!(foo.byte), addr_of!(foo.ary));
}
```

prints

```
base: 0x7fffc1a8a668
word: 0x7fffc1a8a668
byte: 0x7fffc1a8a66c
ary:  0x7fffc1a8a66d
```

I.e. the `u8` in the middle causes the array to sit at an odd offset, which might prevent optimizations, especially on architectures where unaligned loads are costly.

Note that this will make field ordering niche-dependent, i.e. a `Bar<T>` with `T=char` and `T=u32` may result in different field order, this may break some code that makes invalid assumptions about `repr(Rust)` types.
This commit is contained in:
bors 2022-11-23 10:01:48 +00:00
commit 4e0d0d757e
9 changed files with 81 additions and 34 deletions

View File

@ -3594,9 +3594,16 @@ mod size_asserts {
static_assert_size!(Res, 12);
static_assert_size!(Stmt<'_>, 32);
static_assert_size!(StmtKind<'_>, 16);
// tidy-alphabetical-end
// FIXME: move the tidy directive to the end after the next bootstrap bump
#[cfg(bootstrap)]
static_assert_size!(TraitItem<'_>, 88);
#[cfg(not(bootstrap))]
static_assert_size!(TraitItem<'_>, 80);
#[cfg(bootstrap)]
static_assert_size!(TraitItemKind<'_>, 48);
#[cfg(not(bootstrap))]
static_assert_size!(TraitItemKind<'_>, 40);
static_assert_size!(Ty<'_>, 48);
static_assert_size!(TyKind<'_>, 32);
// tidy-alphabetical-end
}

View File

@ -138,8 +138,18 @@ fn univariant_uninterned<'tcx>(
if optimize {
let end = if let StructKind::MaybeUnsized = kind { fields.len() - 1 } else { fields.len() };
let optimizing = &mut inverse_memory_index[..end];
let field_align = |f: &TyAndLayout<'_>| {
if let Some(pack) = pack { f.align.abi.min(pack) } else { f.align.abi }
let effective_field_align = |f: &TyAndLayout<'_>| {
if let Some(pack) = pack {
// return the packed alignment in bytes
f.align.abi.min(pack).bytes()
} else {
// returns log2(effective-align).
// This is ok since `pack` applies to all fields equally.
// The calculation assumes that size is an integer multiple of align, except for ZSTs.
//
// group [u8; 4] with align-4 or [u8; 6] with align-2 fields
f.align.abi.bytes().max(f.size.bytes()).trailing_zeros() as u64
}
};
// If `-Z randomize-layout` was enabled for the type definition we can shuffle
@ -160,15 +170,23 @@ fn univariant_uninterned<'tcx>(
optimizing.sort_by_key(|&x| {
// Place ZSTs first to avoid "interesting offsets",
// especially with only one or two non-ZST fields.
// Then place largest alignments first, largest niches within an alignment group last
let f = &fields[x as usize];
(!f.is_zst(), cmp::Reverse(field_align(f)))
let niche_size = f.largest_niche.map_or(0, |n| n.available(cx));
(!f.is_zst(), cmp::Reverse(effective_field_align(f)), niche_size)
});
}
StructKind::Prefixed(..) => {
// Sort in ascending alignment so that the layout stays optimal
// regardless of the prefix
optimizing.sort_by_key(|&x| field_align(&fields[x as usize]));
// regardless of the prefix.
// And put the largest niche in an alignment group at the end
// so it can be used as discriminant in jagged enums
optimizing.sort_by_key(|&x| {
let f = &fields[x as usize];
let niche_size = f.largest_niche.map_or(0, |n| n.available(cx));
(effective_field_align(f), niche_size)
});
}
}

View File

@ -15,7 +15,7 @@ use std::slice::Iter;
pub fn is_empty_1(xs: Iter<f32>) -> bool {
// CHECK-LABEL: @is_empty_1(
// CHECK-NEXT: start:
// CHECK-NEXT: [[A:%.*]] = icmp ne {{i32\*|ptr}} %xs.1, null
// CHECK-NEXT: [[A:%.*]] = icmp ne {{i32\*|ptr}} {{%xs.0|%xs.1}}, null
// CHECK-NEXT: tail call void @llvm.assume(i1 [[A]])
// The order between %xs.0 and %xs.1 on the next line doesn't matter
// and different LLVM versions produce different order.
@ -28,7 +28,7 @@ pub fn is_empty_1(xs: Iter<f32>) -> bool {
pub fn is_empty_2(xs: Iter<f32>) -> bool {
// CHECK-LABEL: @is_empty_2
// CHECK-NEXT: start:
// CHECK-NEXT: [[C:%.*]] = icmp ne {{i32\*|ptr}} %xs.1, null
// CHECK-NEXT: [[C:%.*]] = icmp ne {{i32\*|ptr}} {{%xs.0|%xs.1}}, null
// CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
// The order between %xs.0 and %xs.1 on the next line doesn't matter
// and different LLVM versions produce different order.

View File

@ -18,7 +18,7 @@ pub fn replace_byte(dst: &mut u8, src: u8) -> u8 {
// CHECK-NOT: call void @llvm.memcpy
// CHECK: ; core::mem::replace
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %dest, i{{.*}} 1, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %{{.*}}, i{{.*}} 1, i1 false)
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %dest, {{i8\*|ptr}} align 1 %src{{.*}}, i{{.*}} 1, i1 false)
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} align 1 %{{.*}}, {{i8\*|ptr}} align 1 %{{.*}}, i{{.*}} 1, i1 false)
// CHECK-NOT: call void @llvm.memcpy

View File

@ -9,7 +9,7 @@ type Demo = [u8; 3];
#[no_mangle]
pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
// CHECK-NOT: sub
// CHECK: %2 = icmp eq {{i8\*|ptr}} %1, %0
// CHECK: %2 = icmp eq {{i8\*|ptr}} {{%1|%0}}, {{%1|%0}}
// CHECK: ret i1 %2
y.len() == 0
}

View File

@ -370,6 +370,12 @@ error: layout_of(NicheFirst) = Layout {
pref: $PREF_ALIGN,
},
abi: ScalarPair(
Union {
value: Int(
I8,
false,
),
},
Initialized {
value: Int(
I8,
@ -377,16 +383,10 @@ error: layout_of(NicheFirst) = Layout {
),
valid_range: 0..=4,
},
Union {
value: Int(
I8,
false,
),
},
),
fields: Arbitrary {
offsets: [
Size(0 bytes),
Size(1 bytes),
],
memory_index: [
0,
@ -394,7 +394,7 @@ error: layout_of(NicheFirst) = Layout {
},
largest_niche: Some(
Niche {
offset: Size(0 bytes),
offset: Size(1 bytes),
value: Int(
I8,
false,
@ -429,29 +429,29 @@ error: layout_of(NicheFirst) = Layout {
I8,
false,
),
valid_range: 0..=2,
valid_range: 0..=255,
},
Initialized {
value: Int(
I8,
false,
),
valid_range: 0..=255,
valid_range: 0..=2,
},
),
fields: Arbitrary {
offsets: [
Size(0 bytes),
Size(1 bytes),
Size(0 bytes),
],
memory_index: [
0,
1,
0,
],
},
largest_niche: Some(
Niche {
offset: Size(0 bytes),
offset: Size(1 bytes),
value: Int(
I8,
false,

View File

@ -1,6 +1,7 @@
// check-pass
// compile-flags: -Zhir-stats
// only-x86_64
// ignore-stage1 FIXME: remove after next bootstrap bump
// The aim here is to include at least one of every different type of top-level
// AST/HIR node reported by `-Zhir-stats`.

View File

@ -2,12 +2,12 @@ ast-stats-1 PRE EXPANSION AST STATS
ast-stats-1 Name Accumulated Size Count Item Size
ast-stats-1 ----------------------------------------------------------------
ast-stats-1 ExprField 48 ( 0.6%) 1 48
ast-stats-1 GenericArgs 56 ( 0.8%) 1 56
ast-stats-1 - AngleBracketed 56 ( 0.8%) 1
ast-stats-1 Crate 56 ( 0.8%) 1 56
ast-stats-1 Attribute 64 ( 0.9%) 2 32
ast-stats-1 - Normal 32 ( 0.4%) 1
ast-stats-1 - DocComment 32 ( 0.4%) 1
ast-stats-1 GenericArgs 64 ( 0.9%) 1 64
ast-stats-1 - AngleBracketed 64 ( 0.9%) 1
ast-stats-1 Local 72 ( 1.0%) 1 72
ast-stats-1 WherePredicate 72 ( 1.0%) 1 72
ast-stats-1 - BoundPredicate 72 ( 1.0%) 1
@ -53,15 +53,15 @@ ast-stats-1 - Impl 184 ( 2.5%) 1
ast-stats-1 - Fn 368 ( 5.0%) 2
ast-stats-1 - Use 552 ( 7.4%) 3
ast-stats-1 ----------------------------------------------------------------
ast-stats-1 Total 7_424
ast-stats-1 Total 7_416
ast-stats-1
ast-stats-2 POST EXPANSION AST STATS
ast-stats-2 Name Accumulated Size Count Item Size
ast-stats-2 ----------------------------------------------------------------
ast-stats-2 ExprField 48 ( 0.6%) 1 48
ast-stats-2 GenericArgs 56 ( 0.7%) 1 56
ast-stats-2 - AngleBracketed 56 ( 0.7%) 1
ast-stats-2 Crate 56 ( 0.7%) 1 56
ast-stats-2 GenericArgs 64 ( 0.8%) 1 64
ast-stats-2 - AngleBracketed 64 ( 0.8%) 1
ast-stats-2 Local 72 ( 0.9%) 1 72
ast-stats-2 WherePredicate 72 ( 0.9%) 1 72
ast-stats-2 - BoundPredicate 72 ( 0.9%) 1
@ -80,9 +80,9 @@ ast-stats-2 - Expr 96 ( 1.2%) 3
ast-stats-2 Param 160 ( 2.0%) 4 40
ast-stats-2 FnDecl 200 ( 2.5%) 5 40
ast-stats-2 Variant 240 ( 3.0%) 2 120
ast-stats-2 GenericBound 288 ( 3.5%) 4 72
ast-stats-2 - Trait 288 ( 3.5%) 4
ast-stats-2 Block 288 ( 3.5%) 6 48
ast-stats-2 GenericBound 288 ( 3.6%) 4 72
ast-stats-2 - Trait 288 ( 3.6%) 4
ast-stats-2 Block 288 ( 3.6%) 6 48
ast-stats-2 AssocItem 416 ( 5.1%) 4 104
ast-stats-2 - Type 208 ( 2.6%) 2
ast-stats-2 - Fn 208 ( 2.6%) 2
@ -104,7 +104,7 @@ ast-stats-2 - Rptr 64 ( 0.8%) 1
ast-stats-2 - Ptr 64 ( 0.8%) 1
ast-stats-2 - ImplicitSelf 128 ( 1.6%) 2
ast-stats-2 - Path 640 ( 7.9%) 10
ast-stats-2 Item 2_024 (24.9%) 11 184
ast-stats-2 Item 2_024 (25.0%) 11 184
ast-stats-2 - Trait 184 ( 2.3%) 1
ast-stats-2 - Enum 184 ( 2.3%) 1
ast-stats-2 - ExternCrate 184 ( 2.3%) 1
@ -113,7 +113,7 @@ ast-stats-2 - Impl 184 ( 2.3%) 1
ast-stats-2 - Fn 368 ( 4.5%) 2
ast-stats-2 - Use 736 ( 9.1%) 4
ast-stats-2 ----------------------------------------------------------------
ast-stats-2 Total 8_120
ast-stats-2 Total 8_112
ast-stats-2
hir-stats HIR STATS
hir-stats Name Accumulated Size Count Item Size

View File

@ -3,6 +3,7 @@
#![allow(non_camel_case_types)]
#![allow(dead_code)]
#![feature(never_type)]
#![feature(pointer_is_aligned)]
use std::mem::size_of;
use std::num::NonZeroU8;
@ -168,6 +169,18 @@ pub enum EnumManyVariant<X> {
_F0, _F1, _F2, _F3, _F4, _F5, _F6, _F7, _F8, _F9, _FA, _FB, _FC, _FD, _FE, _FF,
}
struct Reorder4 {
a: u32,
b: u8,
ary: [u8; 4],
}
struct Reorder2 {
a: u16,
b: u8,
ary: [u8; 6],
}
pub fn main() {
assert_eq!(size_of::<u8>(), 1 as usize);
assert_eq!(size_of::<u32>(), 4 as usize);
@ -249,4 +262,12 @@ pub fn main() {
assert_eq!(size_of::<EnumManyVariant<Option<NicheU16>>>(), 4);
assert_eq!(size_of::<EnumManyVariant<Option2<NicheU16,u8>>>(), 6);
assert_eq!(size_of::<EnumManyVariant<Option<(NicheU16,u8)>>>(), 6);
let v = Reorder4 {a: 0, b: 0, ary: [0; 4]};
assert_eq!(size_of::<Reorder4>(), 12);
assert!((&v.ary).as_ptr().is_aligned_to(4), "[u8; 4] should group with align-4 fields");
let v = Reorder2 {a: 0, b: 0, ary: [0; 6]};
assert_eq!(size_of::<Reorder2>(), 10);
assert!((&v.ary).as_ptr().is_aligned_to(2), "[u8; 6] should group with align-2 fields");
}