R600: Implement isZExtFree.

This allows 64-bit operations that are truncated to be reduced
to 32-bit ones.

llvm-svn: 204946
This commit is contained in:
Matt Arsenault 2014-03-27 17:23:31 +00:00
parent d125d74a73
commit b517c8128e
7 changed files with 66 additions and 6 deletions

View File

@ -275,6 +275,22 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
(Dest->getPrimitiveSizeInBits() % 32 == 0);
}
bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
const DataLayout *DL = getDataLayout();
unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
return SrcSize == 32 && DestSize == 64;
}
bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
// Any register load of a 64-bit value really requires 2 32-bit moves. For all
// practical purposes, the extra mov 0 to load a 64-bit is free. As used,
// this will enable reducing 64-bit operations the 32-bit, which is always
// good.
return Src == MVT::i32 && Dest == MVT::i64;
}
bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// There aren't really 64-bit registers, but pairs of 32-bit ones and only a
// limited number of native 64-bit operations. Shrinking an operation to fit

View File

@ -87,6 +87,10 @@ public:
virtual bool isFNegFree(EVT VT) const override;
virtual bool isTruncateFree(EVT Src, EVT Dest) const override;
virtual bool isTruncateFree(Type *Src, Type *Dest) const override;
virtual bool isZExtFree(Type *Src, Type *Dest) const override;
virtual bool isZExtFree(EVT Src, EVT Dest) const override;
virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
virtual MVT getVectorIdxTy() const override;

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.r600.read.tidig.x() readnone
@ -68,3 +68,17 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
}
; SI-LABEL: @trunc_i64_add_to_i32
; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
; SI-NOT: ADDC
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = add i64 %b, %a
%trunc = trunc i64 %add to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
ret void
}

View File

@ -40,3 +40,15 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
; SI-CHECK-LABEL: @trunc_i64_mul_to_i32
; SI-CHECK: S_LOAD_DWORD
; SI-CHECK: S_LOAD_DWORD
; SI-CHECK: V_MUL_LO_I32
; SI-CHECK: BUFFER_STORE_DWORD
define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%mul = mul i64 %b, %a
%trunc = trunc i64 %mul to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
ret void
}

View File

@ -114,3 +114,16 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
store i64 %or, i64 addrspace(1)* %out
ret void
}
; SI-LABEL: @trunc_i64_or_to_i32
; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
%add = or i64 %b, %a
%trunc = trunc i64 %add to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
ret void
}

View File

@ -90,10 +90,10 @@ define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
}
; FUNC-LABEL: @sext_in_reg_i32_to_i64
; SI: S_LOAD_DWORDX2
; SI: S_ADD_I32
; SI-NEXT: S_ADDC_U32
; SI-NEXT: S_ASHR_I32 s{{[0-9]+}}, s{{[0-9]+}}, 31
; SI: S_LOAD_DWORD
; SI: S_LOAD_DWORD
; SI: S_ADD_I32 [[ADD:s[0-9]+]],
; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
; SI: BUFFER_STORE_DWORDX2
define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%c = add i64 %a, %b

View File

@ -34,11 +34,12 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]],
; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
%aa = add i64 %a, 234 ; Prevent shrinking store.
%b = shl i64 %aa, 2
%result = trunc i64 %b to i32
store i32 %result, i32 addrspace(1)* %out, align 4
store i64 %b, i64 addrspace(1)* %out2, align 8 ; Prevent reducing ops to 32-bits
ret void
}