AMDGPU: Assume ECC is enabled by default if supported
The test should really be checking for the property directly in the code object headers, but there are problems with this. I don't see this directly represented in the text form, and for the binary emission this is depending on a function level subtarget feature to emit a global flag. llvm-svn: 357558
This commit is contained in:
parent
f7887d41cb
commit
f426ddbfc7
|
@ -281,6 +281,12 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
|
|||
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
|
||||
>;
|
||||
|
||||
def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
|
||||
"DoesNotSupportSRAMECC",
|
||||
"true",
|
||||
"Hardware does not support SRAM ECC"
|
||||
>;
|
||||
|
||||
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
|
||||
"EnableSRAMECC",
|
||||
"true",
|
||||
|
@ -439,14 +445,16 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
|||
"southern-islands",
|
||||
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64,
|
||||
FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
|
||||
FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
|
||||
FeatureDoesNotSupportSRAMECC]
|
||||
>;
|
||||
|
||||
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
"sea-islands",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace,
|
||||
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
|
||||
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
|
||||
FeatureDoesNotSupportSRAMECC]
|
||||
>;
|
||||
|
||||
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
|
@ -457,7 +465,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
|||
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
|
||||
FeatureScalarStores, FeatureInv2PiInlineImm,
|
||||
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
|
||||
FeatureIntClamp, FeatureTrigReducedRange
|
||||
FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC
|
||||
]
|
||||
>;
|
||||
|
||||
|
@ -550,19 +558,22 @@ def FeatureISAVersion9_0_0 : FeatureSet<
|
|||
[FeatureGFX9,
|
||||
FeatureMadMixInsts,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureCodeObjectV3]>;
|
||||
FeatureCodeObjectV3,
|
||||
FeatureDoesNotSupportSRAMECC]>;
|
||||
|
||||
def FeatureISAVersion9_0_2 : FeatureSet<
|
||||
[FeatureGFX9,
|
||||
FeatureMadMixInsts,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureXNACK,
|
||||
FeatureDoesNotSupportSRAMECC,
|
||||
FeatureCodeObjectV3]>;
|
||||
|
||||
def FeatureISAVersion9_0_4 : FeatureSet<
|
||||
[FeatureGFX9,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureFmaMixInsts,
|
||||
FeatureDoesNotSupportSRAMECC,
|
||||
FeatureCodeObjectV3]>;
|
||||
|
||||
def FeatureISAVersion9_0_6 : FeatureSet<
|
||||
|
|
|
@ -64,7 +64,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
|
|||
|
||||
GCNSubtarget &
|
||||
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS) {
|
||||
StringRef GPU, StringRef FS) {
|
||||
// Determine default and user-specified characteristics
|
||||
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
|
||||
// enabled, but some instructions do not respect them and they run at the
|
||||
|
@ -77,7 +77,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
|||
// Similarly we want enable-prt-strict-null to be on by default and not to
|
||||
// unset everything else if it is disabled
|
||||
|
||||
SmallString<256> FullFS("+promote-alloca,+load-store-opt,");
|
||||
// Assuming ECC is enabled is the conservative default.
|
||||
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,");
|
||||
|
||||
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
|
||||
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
|
||||
|
@ -129,6 +130,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
|||
|
||||
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
|
||||
|
||||
// ECC is on by default, but turn it off if the hardware doesn't support it
|
||||
// anyway. This matters for the gfx9 targets with d16 loads, but don't support
|
||||
// ECC.
|
||||
if (DoesNotSupportSRAMECC && EnableSRAMECC) {
|
||||
ToggleFeature(AMDGPU::FeatureSRAMECC);
|
||||
EnableSRAMECC = false;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -206,6 +215,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
HasDot1Insts(false),
|
||||
HasDot2Insts(false),
|
||||
EnableSRAMECC(false),
|
||||
DoesNotSupportSRAMECC(false),
|
||||
FlatAddressSpace(false),
|
||||
FlatInstOffsets(false),
|
||||
FlatGlobalInsts(false),
|
||||
|
|
|
@ -332,6 +332,7 @@ protected:
|
|||
bool HasDot1Insts;
|
||||
bool HasDot2Insts;
|
||||
bool EnableSRAMECC;
|
||||
bool DoesNotSupportSRAMECC;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatInstOffsets;
|
||||
bool FlatGlobalInsts;
|
||||
|
|
|
@ -84,6 +84,10 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
|||
AMDGPU::FeatureTrapHandler,
|
||||
AMDGPU::FeatureCodeObjectV3,
|
||||
|
||||
// The default assumption needs to be ecc is enabled, but no directly
|
||||
// exposed operations depend on it, so it can be safely inlined.
|
||||
AMDGPU::FeatureSRAMECC,
|
||||
|
||||
// Perf-tuning features
|
||||
AMDGPU::FeatureFastFMAF32,
|
||||
AMDGPU::HalfRate64Ops
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX803,NO-D16-HI %s
|
||||
|
||||
; GCN-LABEL: {{^}}load_local_lo_hi_v2i16_multi_use_lo:
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,ECC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
|
||||
|
||||
; Make sure the correct set of targets are marked with
|
||||
; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never
|
||||
; supported.
|
||||
|
||||
; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:
|
||||
; NO-ECC: global_load_short_d16_hi
|
||||
; ECC: global_load_ushort
|
||||
define void @load_global_hi_v2i16_reglo_vreg(i16 addrspace(1)* %in, i16 %reg) {
|
||||
entry:
|
||||
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
|
||||
%load = load i16, i16 addrspace(1)* %gep
|
||||
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
|
||||
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
|
||||
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
|
||||
|
||||
; sram-ecc can be safely ignored when inlining, since no intrinisics
|
||||
; or other directly exposed operations depend on it.
|
||||
|
||||
define i32 @func_default() #0 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @func_ecc_enabled() #1 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @func_ecc_disabled() #2 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @default_call_default(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @default_call_default() #0 {
|
||||
%call = call i32 @func_default()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ecc_enabled_call_default(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @ecc_enabled_call_default() #1 {
|
||||
%call = call i32 @func_default()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ecc_enabled_call_ecc_enabled(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @ecc_enabled_call_ecc_enabled() #1 {
|
||||
%call = call i32 @func_ecc_enabled()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ecc_enabled_call_ecc_disabled(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @ecc_enabled_call_ecc_disabled() #1 {
|
||||
%call = call i32 @func_ecc_disabled()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ecc_disabled_call_default(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @ecc_disabled_call_default() #2 {
|
||||
%call = call i32 @func_default()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ecc_disabled_call_ecc_enabled(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @ecc_disabled_call_ecc_enabled() #2 {
|
||||
%call = call i32 @func_ecc_enabled()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @ecc_disabled_call_ecc_disabled(
|
||||
; CHECK-NEXT: ret i32 0
|
||||
define i32 @ecc_disabled_call_ecc_disabled() #2 {
|
||||
%call = call i32 @func_ecc_disabled()
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "target-features"="+sram-ecc" }
|
||||
attributes #2 = { nounwind "target-features"="-sram-ecc" }
|
Loading…
Reference in New Issue