Default repr(C) enums to c_int size

This is what ISO C strongly implies this is correct, and
many processor-specific ABIs imply or mandate this size, so
"everyone" (LLVM, gcc...) defaults to emitting enums this way.
However, this is by no means guaranteed by ISO C,
and the bare-metal Arm targets show it can be overridden,
which rustc supports via `c-enum-min-bits` in a target.json.

The override is a flag named `-fshort-enums` in clang and gcc,
but introducing a CLI flag is probably unnecessary for rustc.
This flag can be used by non-Arm microcontroller targets,
like AVR and MSP430, but it is not enabled for them by default.
Rust programmers who know the size of a target's enums
can use explicit reprs, which also lets them match C23 code.

This change is most relevant to 16-bit targets: AVR and MSP430.
Most of rustc's targets use 32-bit ints, but ILP64 does exist.
Regardless, rustc should now correctly handle enums for
both very small and very large targets.

Thanks to William for confirming MSP430 behavior,
and to Waffle for better style and no-core size_of asserts.

Co-authored-by: William D. Jones <thor0505@comcast.net>
Co-authored-by: Waffle Maybe <waffle.lapkin@gmail.com>
This commit is contained in:
Jubilee Young 2023-02-02 00:14:36 -08:00
parent 4781233a77
commit 2edf6c8784
13 changed files with 81 additions and 25 deletions

View File

@ -170,7 +170,9 @@ pub struct TargetDataLayout {
pub instruction_address_space: AddressSpace,
/// Minimum size of #[repr(C)] enums (default I32 bits)
/// Minimum size of #[repr(C)] enums (default c_int::BITS, usually 32)
/// Note: This isn't in LLVM's data layout string, it is `short_enum`
/// so the only valid spec for LLVM is c_int::BITS or 8
pub c_enum_min_size: Integer,
}

View File

@ -19,7 +19,7 @@ pub fn target() -> Target {
max_atomic_width: Some(32),
emit_debug_gdb_scripts: false,
// GCC and Clang default to 8 for arm-none here
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
},
}

View File

@ -20,7 +20,7 @@ pub fn target() -> Target {
max_atomic_width: Some(32),
emit_debug_gdb_scripts: false,
// GCC and Clang default to 8 for arm-none here
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
},
}

View File

@ -49,7 +49,7 @@ pub fn target() -> Target {
// from thumb_base, rust-lang/rust#44993.
emit_debug_gdb_scripts: false,
// from thumb_base, apparently gcc/clang give enums a minimum of 8 bits on no-os targets
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
},
}

View File

@ -27,7 +27,7 @@ pub fn target() -> Target {
max_atomic_width: Some(64),
panic_strategy: PanicStrategy::Abort,
emit_debug_gdb_scripts: false,
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
};
Target {

View File

@ -19,7 +19,7 @@ pub fn target() -> Target {
panic_strategy: PanicStrategy::Abort,
emit_debug_gdb_scripts: false,
// GCC and Clang default to 8 for arm-none here
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
};
Target {

View File

@ -18,7 +18,7 @@ pub fn target() -> Target {
max_atomic_width: Some(32),
emit_debug_gdb_scripts: false,
// GCC and Clang default to 8 for arm-none here
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
},
}

View File

@ -19,7 +19,7 @@ pub fn target() -> Target {
max_atomic_width: Some(32),
emit_debug_gdb_scripts: false,
// GCC and Clang default to 8 for arm-none here
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
},
}

View File

@ -11,7 +11,7 @@ pub fn target() -> Target {
base.has_rpath = true;
base.linker_flavor = LinkerFlavor::Unix(Cc::Yes);
base.c_enum_min_bits = 8;
base.c_enum_min_bits = Some(8);
Target {
llvm_target: "hexagon-unknown-linux-musl".into(),

View File

@ -1344,10 +1344,18 @@ impl Target {
});
}
dl.c_enum_min_size = match Integer::from_size(Size::from_bits(self.c_enum_min_bits)) {
Ok(bits) => bits,
Err(err) => return Err(TargetDataLayoutErrors::InvalidBitsSize { err }),
};
dl.c_enum_min_size = self
.c_enum_min_bits
.map_or_else(
|| {
self.c_int_width
.parse()
.map_err(|_| String::from("failed to parse c_int_width"))
},
Ok,
)
.and_then(|i| Integer::from_size(Size::from_bits(i)))
.map_err(|err| TargetDataLayoutErrors::InvalidBitsSize { err })?;
Ok(dl)
}
@ -1701,8 +1709,8 @@ pub struct TargetOptions {
/// If present it's a default value to use for adjusting the C ABI.
pub default_adjusted_cabi: Option<Abi>,
/// Minimum number of bits in #[repr(C)] enum. Defaults to 32.
pub c_enum_min_bits: u64,
/// Minimum number of bits in #[repr(C)] enum. Defaults to the size of c_int
pub c_enum_min_bits: Option<u64>,
/// Whether or not the DWARF `.debug_aranges` section should be generated.
pub generate_arange_section: bool,
@ -1932,7 +1940,7 @@ impl Default for TargetOptions {
supported_split_debuginfo: Cow::Borrowed(&[SplitDebuginfo::Off]),
supported_sanitizers: SanitizerSet::empty(),
default_adjusted_cabi: None,
c_enum_min_bits: 32,
c_enum_min_bits: None,
generate_arange_section: true,
supports_stack_protector: true,
entry_name: "main".into(),
@ -2118,12 +2126,6 @@ impl Target {
base.$key_name = s;
}
} );
($key_name:ident, u64) => ( {
let name = (stringify!($key_name)).replace("_", "-");
if let Some(s) = obj.remove(&name).and_then(|j| Json::as_u64(&j)) {
base.$key_name = s;
}
} );
($key_name:ident, u32) => ( {
let name = (stringify!($key_name)).replace("_", "-");
if let Some(s) = obj.remove(&name).and_then(|b| b.as_u64()) {
@ -2492,6 +2494,7 @@ impl Target {
key!(is_builtin, bool);
key!(c_int_width = "target-c-int-width");
key!(c_enum_min_bits, Option<u64>); // if None, matches c_int_width
key!(os);
key!(env);
key!(abi);
@ -2587,7 +2590,6 @@ impl Target {
key!(supported_split_debuginfo, falliable_list)?;
key!(supported_sanitizers, SanitizerSet)?;
key!(default_adjusted_cabi, Option<Abi>)?;
key!(c_enum_min_bits, u64);
key!(generate_arange_section, bool);
key!(supports_stack_protector, bool);
key!(entry_name);

View File

@ -53,7 +53,7 @@ pub fn opts() -> TargetOptions {
frame_pointer: FramePointer::Always,
// ARM supports multiple ABIs for enums, the linux one matches the default of 32 here
// but any arm-none or thumb-none target will be defaulted to 8 on GCC and clang
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
..Default::default()
}
}

View File

@ -55,7 +55,7 @@ pub fn target() -> Target {
// suggested from thumb_base, rust-lang/rust#44993.
emit_debug_gdb_scripts: false,
// suggested from thumb_base, with no-os gcc/clang use 8-bit enums
c_enum_min_bits: 8,
c_enum_min_bits: Some(8),
frame_pointer: FramePointer::MayOmit,
main_needs_argc_argv: false,

View File

@ -0,0 +1,52 @@
// build-pass
// revisions: avr msp430
//
// [avr] needs-llvm-components: avr
// [avr] compile-flags: --target=avr-unknown-gnu-atmega328 --crate-type=rlib
// [msp430] needs-llvm-components: msp430
// [msp430] compile-flags: --target=msp430-none-elf --crate-type=rlib
#![feature(no_core, lang_items, intrinsics, staged_api)]
#![no_core]
#![crate_type = "lib"]
#![stable(feature = "", since = "")]
#![allow(dead_code)]
// Test that the repr(C) attribute doesn't break compilation
// Previous bad assumption was that 32-bit enum default width is fine on msp430, avr
// But the width of the C int on these platforms is 16 bits, and C enums <= C int range
// so we want no more than that, usually. This resulted in errors like
// "layout decided on a larger discriminant type (I32) than typeck (I16)"
#[repr(C)]
enum Foo {
Bar,
}
extern "rust-intrinsic" {
#[stable(feature = "", since = "")]
#[rustc_const_stable(feature = "", since = "")]
#[rustc_safe_intrinsic]
fn size_of<T>() -> usize;
}
#[lang="sized"]
trait Sized {}
#[lang="copy"]
trait Copy {}
const EXPECTED: usize = 2;
const ACTUAL: usize = size_of::<Foo>();
// Validate that the size is indeed 16 bits, to match this C static_assert:
/**
```c
#include <assert.h>
enum foo {
BAR
};
int main(void)
{
/* passes on msp430-elf-gcc */
static_assert(sizeof(enum foo) == 2);
}
```
*/
const _: [(); EXPECTED] = [(); ACTUAL];