AMDGPU: Assume ECC is enabled by default if supported

The test should really be checking for the property directly in the
code object headers, but there are problems with this. I don't see
this directly represented in the text form, and for the binary
emission this is depending on a function level subtarget feature to
emit a global flag.

llvm-svn: 357558
This commit is contained in:
Matt Arsenault 2019-04-03 01:58:57 +00:00
parent f7887d41cb
commit f426ddbfc7
7 changed files with 127 additions and 7 deletions

View File

@ -281,6 +281,12 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
>;
def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
"DoesNotSupportSRAMECC",
"true",
"Hardware does not support SRAM ECC"
>;
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
"EnableSRAMECC",
"true",
@ -439,14 +445,16 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
"southern-islands",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64,
FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
FeatureDoesNotSupportSRAMECC]
>;
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
"sea-islands",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureDoesNotSupportSRAMECC]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@ -457,7 +465,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
FeatureIntClamp, FeatureTrigReducedRange
FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC
]
>;
@ -550,19 +558,22 @@ def FeatureISAVersion9_0_0 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureCodeObjectV3]>;
FeatureCodeObjectV3,
FeatureDoesNotSupportSRAMECC]>;
def FeatureISAVersion9_0_2 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
FeatureDoesNotSupportSRAMECC,
FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_4 : FeatureSet<
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
FeatureDoesNotSupportSRAMECC,
FeatureCodeObjectV3]>;
def FeatureISAVersion9_0_6 : FeatureSet<

View File

@ -64,7 +64,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
StringRef GPU, StringRef FS) {
// Determine default and user-specified characteristics
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
// enabled, but some instructions do not respect them and they run at the
@ -77,7 +77,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// Similarly we want enable-prt-strict-null to be on by default and not to
// unset everything else if it is disabled
SmallString<256> FullFS("+promote-alloca,+load-store-opt,");
// Assuming ECC is enabled is the conservative default.
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
@ -129,6 +130,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
// ECC is on by default, but turn it off if the hardware doesn't support it
// anyway. This matters for the gfx9 targets with d16 loads, but don't support
// ECC.
if (DoesNotSupportSRAMECC && EnableSRAMECC) {
ToggleFeature(AMDGPU::FeatureSRAMECC);
EnableSRAMECC = false;
}
return *this;
}
@ -206,6 +215,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDot1Insts(false),
HasDot2Insts(false),
EnableSRAMECC(false),
DoesNotSupportSRAMECC(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),

View File

@ -332,6 +332,7 @@ protected:
bool HasDot1Insts;
bool HasDot2Insts;
bool EnableSRAMECC;
bool DoesNotSupportSRAMECC;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;

View File

@ -84,6 +84,10 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
AMDGPU::FeatureTrapHandler,
AMDGPU::FeatureCodeObjectV3,
// The default assumption needs to be ecc is enabled, but no directly
// exposed operations depend on it, so it can be safely inlined.
AMDGPU::FeatureSRAMECC,
// Perf-tuning features
AMDGPU::FeatureFastFMAF32,
AMDGPU::HalfRate64Ops

View File

@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX803,NO-D16-HI %s
; GCN-LABEL: {{^}}load_local_lo_hi_v2i16_multi_use_lo:

View File

@ -0,0 +1,24 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,ECC %s
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
; Make sure the correct set of targets are marked with
; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never
; supported.
; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:
; NO-ECC: global_load_short_d16_hi
; ECC: global_load_ushort
define void @load_global_hi_v2i16_reglo_vreg(i16 addrspace(1)* %in, i16 %reg) {
entry:
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
%load = load i16, i16 addrspace(1)* %gep
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
ret void
}

View File

@ -0,0 +1,70 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
; sram-ecc can be safely ignored when inlining, since no intrinisics
; or other directly exposed operations depend on it.
define i32 @func_default() #0 {
ret i32 0
}
define i32 @func_ecc_enabled() #1 {
ret i32 0
}
define i32 @func_ecc_disabled() #2 {
ret i32 0
}
; CHECK-LABEL: @default_call_default(
; CHECK-NEXT: ret i32 0
define i32 @default_call_default() #0 {
%call = call i32 @func_default()
ret i32 %call
}
; CHECK-LABEL: @ecc_enabled_call_default(
; CHECK-NEXT: ret i32 0
define i32 @ecc_enabled_call_default() #1 {
%call = call i32 @func_default()
ret i32 %call
}
; CHECK-LABEL: @ecc_enabled_call_ecc_enabled(
; CHECK-NEXT: ret i32 0
define i32 @ecc_enabled_call_ecc_enabled() #1 {
%call = call i32 @func_ecc_enabled()
ret i32 %call
}
; CHECK-LABEL: @ecc_enabled_call_ecc_disabled(
; CHECK-NEXT: ret i32 0
define i32 @ecc_enabled_call_ecc_disabled() #1 {
%call = call i32 @func_ecc_disabled()
ret i32 %call
}
; CHECK-LABEL: @ecc_disabled_call_default(
; CHECK-NEXT: ret i32 0
define i32 @ecc_disabled_call_default() #2 {
%call = call i32 @func_default()
ret i32 %call
}
; CHECK-LABEL: @ecc_disabled_call_ecc_enabled(
; CHECK-NEXT: ret i32 0
define i32 @ecc_disabled_call_ecc_enabled() #2 {
%call = call i32 @func_ecc_enabled()
ret i32 %call
}
; CHECK-LABEL: @ecc_disabled_call_ecc_disabled(
; CHECK-NEXT: ret i32 0
define i32 @ecc_disabled_call_ecc_disabled() #2 {
%call = call i32 @func_ecc_disabled()
ret i32 %call
}
attributes #0 = { nounwind }
attributes #1 = { nounwind "target-features"="+sram-ecc" }
attributes #2 = { nounwind "target-features"="-sram-ecc" }