diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6cccf93028d3..866017e49db5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1355,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // .param .align 16 .b8 retval0[], or // .param .b retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isSingleValueType()) { + // Emit ".param .b retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; diff --git a/llvm/test/CodeGen/NVPTX/vector-return.ll b/llvm/test/CodeGen/NVPTX/vector-return.ll new file mode 100644 index 000000000000..15e50f8e1443 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/vector-return.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s + +declare <2 x float> @bar(<2 x float> %input) + +define void @foo(<2 x float> %input, <2 x float>* %output) { +; CHECK-LABEL: @foo +entry: + %call = tail call <2 x float> @bar(<2 x float> %input) +; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0]; + store <2 x float> %call, <2 x float>* %output, align 8 +; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]} + ret void +}