[NVPTX] aligned byte-buffers for vector return types
Summary: Fixes PR21100 which is caused by inconsistency between the declared return type and the expected return type at the call site. The new behavior is consistent with nvcc and the NVPTXTargetLowering::getPrototype function. Test Plan: test/Codegen/NVPTX/vector-return.ll Reviewers: jholewinski Reviewed By: jholewinski Subscribers: llvm-commits, meheff, eliben, jholewinski Differential Revision: http://reviews.llvm.org/D5612 llvm-svn: 220607
This commit is contained in:
parent
5a1106f8fc
commit
ea51161a94
|
@ -1355,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
// .param .align 16 .b8 retval0[<size-in-bytes>], or
|
||||
// .param .b<size-in-bits> retval0
|
||||
unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
|
||||
if (retTy->isSingleValueType()) {
|
||||
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
|
||||
// these three types to match the logic in
|
||||
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
|
||||
// Plus, this behavior is consistent with nvcc's.
|
||||
if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
|
||||
retTy->isPointerTy()) {
|
||||
// Scalar needs to be at least 32bit wide
|
||||
if (resultsz < 32)
|
||||
resultsz = 32;
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
|
||||
|
||||
declare <2 x float> @bar(<2 x float> %input)
|
||||
|
||||
define void @foo(<2 x float> %input, <2 x float>* %output) {
|
||||
; CHECK-LABEL: @foo
|
||||
entry:
|
||||
%call = tail call <2 x float> @bar(<2 x float> %input)
|
||||
; CHECK: .param .align 8 .b8 retval0[8];
|
||||
; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0];
|
||||
store <2 x float> %call, <2 x float>* %output, align 8
|
||||
; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]}
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue