[SystemZ::TTI] Return zero cost for scalar load/store connected with a bswap.
Since byte-swapping loads and stores are supported, a 'load -> bswap' or 'bswap -> store' sequence should have the cost of one. Review: Ulrich Weigand https://reviews.llvm.org/D54870 llvm-svn: 347732
This commit is contained in:
parent
e9af715820
commit
5da8e432b9
|
@ -939,6 +939,15 @@ isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool isBswapIntrinsicCall(const Value *V) {
|
||||
if (const Instruction *I = dyn_cast<Instruction>(V))
|
||||
if (auto *CI = dyn_cast<CallInst>(I))
|
||||
if (auto *F = CI->getCalledFunction())
|
||||
if (F->getIntrinsicID() == Intrinsic::bswap)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
|
@ -975,6 +984,22 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
|||
unsigned NumOps =
|
||||
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
|
||||
|
||||
// Store/Load reversed saves one instruction.
|
||||
if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
|
||||
if (Opcode == Instruction::Load && I->hasOneUse()) {
|
||||
const Instruction *LdUser = cast<Instruction>(*I->user_begin());
|
||||
// In case of load -> bswap -> store, return normal cost for the load.
|
||||
if (isBswapIntrinsicCall(LdUser) &&
|
||||
(!LdUser->hasOneUse() || !isa<StoreInst>(*LdUser->user_begin())))
|
||||
return 0;
|
||||
}
|
||||
else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
const Value *StoredVal = SI->getValueOperand();
|
||||
if (StoredVal->hasOneUse() && isBswapIntrinsicCall(StoredVal))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (Src->getScalarSizeInBits() == 128)
|
||||
// 128 bit scalars are held in a pair of two 64 bit registers.
|
||||
NumOps *= 2;
|
||||
|
|
|
@ -40,6 +40,73 @@ define void @bswap_i16(i16 %arg, <2 x i16> %arg2, <4 x i16> %arg4,
|
|||
ret void
|
||||
}
|
||||
|
||||
; Test that store/load reversed is reflected in costs.
|
||||
define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64_mem':
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i64, i64* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1)
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg)
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp2, i64* %dst
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i64, i64* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2)
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp3, i64* %dst
|
||||
%Ld1 = load i64, i64* %src
|
||||
%swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1)
|
||||
|
||||
%swp2 = tail call i64 @llvm.bswap.i64(i64 %arg)
|
||||
store i64 %swp2, i64* %dst
|
||||
|
||||
%Ld2 = load i64, i64* %src
|
||||
%swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2)
|
||||
store i64 %swp3, i64* %dst
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem':
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1)
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg)
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp2, i32* %dst
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i32, i32* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2)
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp3, i32* %dst
|
||||
%Ld1 = load i32, i32* %src
|
||||
%swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1)
|
||||
|
||||
%swp2 = tail call i32 @llvm.bswap.i32(i32 %arg)
|
||||
store i32 %swp2, i32* %dst
|
||||
|
||||
%Ld2 = load i32, i32* %src
|
||||
%swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2)
|
||||
store i32 %swp3, i32* %dst
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
|
||||
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem':
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1)
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg)
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp2, i16* %dst
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i16, i16* %src
|
||||
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2)
|
||||
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp3, i16* %dst
|
||||
%Ld1 = load i16, i16* %src
|
||||
%swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1)
|
||||
|
||||
%swp2 = tail call i16 @llvm.bswap.i16(i16 %arg)
|
||||
store i16 %swp2, i16* %dst
|
||||
|
||||
%Ld2 = load i16, i16* %src
|
||||
%swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2)
|
||||
store i16 %swp3, i16* %dst
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
|
Loading…
Reference in New Issue