AMDGPU: Address todo for handling 1/(2 pi)

llvm-svn: 339814
This commit is contained in:
Matt Arsenault 2018-08-15 21:03:55 +00:00
parent 0f2c1cf429
commit 6c7ba82900
6 changed files with 384 additions and 164 deletions

View File

@ -3449,9 +3449,27 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
}
static bool isConstantFPZero(SDValue N) {
if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
return C->isZero() && !C->isNegative();
static bool isInv2Pi(const APFloat &APF) {
static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
return APF.bitwiseIsEqual(KF16) ||
APF.bitwiseIsEqual(KF32) ||
APF.bitwiseIsEqual(KF64);
}
// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
// additional cost to negate them.
bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
if (C->isZero() && !C->isNegative())
return true;
if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
return true;
}
return false;
}
@ -3577,9 +3595,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue RHS = N0.getOperand(1);
// 0 doesn't have a negated inline immediate.
// TODO: Shouldn't fold 1/2pi either, and should be generalized to other
// operations.
if (isConstantFPZero(RHS))
// TODO: This constant check should be generalized to other operations.
if (isConstantCostlierToNegate(RHS))
return SDValue();
SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);

View File

@ -95,6 +95,8 @@ protected:
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isConstantCostlierToNegate(SDValue N) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;

View File

@ -136,6 +136,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
HasVOP3PInsts(false),
HasMulI24(true),
HasMulU24(true),
HasInv2PiInlineImm(false),
HasFminFmaxLegacy(true),
EnablePromoteAlloca(false),
LocalMemorySize(0),
@ -190,7 +191,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasVGPRIndexMode(false),
HasScalarStores(false),
HasScalarAtomics(false),
HasInv2PiInlineImm(false),
HasSDWAOmod(false),
HasSDWAScalar(false),
HasSDWASdst(false),

View File

@ -72,6 +72,7 @@ protected:
bool HasVOP3PInsts;
bool HasMulI24;
bool HasMulU24;
bool HasInv2PiInlineImm;
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
int LocalMemorySize;
@ -170,6 +171,10 @@ public:
return HasMulU24;
}
bool hasInv2PiInlineImm() const {
return HasInv2PiInlineImm;
}
bool hasFminFmaxLegacy() const {
return HasFminFmaxLegacy;
}
@ -347,7 +352,6 @@ protected:
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
bool HasInv2PiInlineImm;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
@ -782,9 +786,6 @@ public:
return HasScalarAtomics;
}
bool hasInv2PiInlineImm() const {
return HasInv2PiInlineImm;
}
bool hasDPP() const {
return HasDPP;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
; --------------------------------------------------------------------------------
; rcp_legacy tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32:
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
; GCN: {{buffer|flat}}_store_dword [[RESULT]]
define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%rcp = call float @llvm.amdgcn.rcp.legacy(float %a)
%fneg = fsub float -0.000000e+00, %rcp
store float %fneg, float addrspace(1)* %out.gep
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.amdgcn.rcp.legacy(float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }