From d7a73824e46ac8aae8b7ba03069c662fb32f0f20 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 20 Jul 2017 21:16:03 +0000 Subject: [PATCH] [NVPTX] Add lowering of i128 params. The patch adds support of i128 params lowering. The changes are quite trivial to support i128 as a "special case" of integer type. With this patch, we lower i128 params the same way as aggregates of size 16 bytes: .param .b8 _ [16]. Currently, NVPTX can't deal with the 128 bit integers: * in some cases because of failed assertions like ValVTs.size() == OutVals.size() && "Bad return value decomposition" * in other cases emitting PTX with .i128 or .u128 types (which are not valid [1]) [1] http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#fundamental-types Differential Revision: https://reviews.llvm.org/D34555 Patch by: Denys Zariaiev (denys.zariaiev@gmail.com) llvm-svn: 308675 --- clang/lib/Basic/Targets.cpp | 4 +- clang/test/CodeGen/target-data.c | 4 +- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 14 ++++- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 27 ++++++--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- llvm/test/CodeGen/NVPTX/i128-global.ll | 7 +++ llvm/test/CodeGen/NVPTX/i128-param.ll | 58 ++++++++++++++++++++ llvm/test/CodeGen/NVPTX/i128-retval.ll | 28 ++++++++++ 8 files changed, 129 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/i128-global.ll create mode 100644 llvm/test/CodeGen/NVPTX/i128-param.ll create mode 100644 llvm/test/CodeGen/NVPTX/i128-retval.ll diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 8a5c47b34016..01137b66b385 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -1833,9 +1833,9 @@ public: GPU = CudaArch::SM_20; if (TargetPointerWidth == 32) - resetDataLayout("e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); else - resetDataLayout("e-i64:64-v16:16-v32:32-n16:32:64"); + resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); // If possible, get a TargetInfo for our host triple, so we can match its // types. diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 851ce5831fa3..3869afec7858 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -116,11 +116,11 @@ // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX -// NVPTX: target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64" +// NVPTX: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" // RUN: %clang_cc1 -triple nvptx64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX64 -// NVPTX64: target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +// NVPTX64: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" // RUN: %clang_cc1 -triple r600-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=R600 diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0139646fc3f7..82634cf02982 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -400,7 +400,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { O << " ("; if (isABI) { - if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { + if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) { unsigned size = 0; if (auto *ITy = dyn_cast(Ty)) { size = ITy->getBitWidth(); @@ -418,7 +418,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { } else if (isa(Ty)) { O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() << " func_retval0"; - } else if (Ty->isAggregateType() || Ty->isVectorTy()) { + } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { unsigned totalsz = DL.getTypeAllocSize(Ty); unsigned retAlignment = 0; if (!getAlign(*F, 0, retAlignment)) @@ -1425,6 +1425,14 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); + // Special case for i128 + if (ETy->isIntegerTy(128)) { + O << " .b8 "; + getSymbol(GVar)->print(O, MAI); + O << "[16]"; + return; + } + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; O << getPTXFundamentalTypeStr(ETy); @@ -1551,7 +1559,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) { - if (Ty->isAggregateType() || Ty->isVectorTy()) { + if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 9070b628f12e..d939fe5c09bd 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -168,6 +168,19 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, SmallVector TempVTs; SmallVector TempOffsets; + // Special case for i128 - decompose to (i64, i64) + if (Ty->isIntegerTy(128)) { + ValueVTs.push_back(EVT(MVT::i64)); + ValueVTs.push_back(EVT(MVT::i64)); + + if (Offsets) { + Offsets->push_back(StartingOffset + 0); + Offsets->push_back(StartingOffset + 8); + } + + return; + } + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { EVT VT = TempVTs[i]; @@ -1262,7 +1275,7 @@ std::string NVPTXTargetLowering::getPrototype( O << "()"; } else { O << "("; - if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { + if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) { unsigned size = 0; if (auto *ITy = dyn_cast(retTy)) { size = ITy->getBitWidth(); @@ -1280,7 +1293,7 @@ std::string NVPTXTargetLowering::getPrototype( O << ".param .b" << size << " _"; } else if (isa(retTy)) { O << ".param .b" << PtrVT.getSizeInBits() << " _"; - } else if (retTy->isAggregateType() || retTy->isVectorTy()) { + } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); O << ".param .align " << retAlignment << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; @@ -1302,7 +1315,7 @@ std::string NVPTXTargetLowering::getPrototype( first = false; if (!Outs[OIdx].Flags.isByVal()) { - if (Ty->isAggregateType() || Ty->isVectorTy()) { + if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { unsigned align = 0; const CallInst *CallI = cast(CS->getInstruction()); // +1 because index 0 is reserved for return type alignment @@ -1458,7 +1471,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned AllocSize = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); bool NeedAlign; // Does argument declaration specify alignment? - if (Ty->isAggregateType() || Ty->isVectorTy()) { + if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { // declare .param .align .b8 .param[]; SDValue DeclareParamOps[] = { Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), @@ -1634,8 +1647,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // these three types to match the logic in // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. // Plus, this behavior is consistent with nvcc's. - if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() || - RetTy->isPointerTy()) { + if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() || + (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -2366,7 +2379,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (theArgs[i]->use_empty()) { // argument is dead - if (Ty->isAggregateType()) { + if (Ty->isAggregateType() || Ty->isIntegerTy(128)) { SmallVector vtparts; ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 2b6ba8c85d4d..ac21563ee9ab 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -81,7 +81,7 @@ static std::string computeDataLayout(bool is64Bit) { if (!is64Bit) Ret += "-p:32:32"; - Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64"; return Ret; } diff --git a/llvm/test/CodeGen/NVPTX/i128-global.ll b/llvm/test/CodeGen/NVPTX/i128-global.ll new file mode 100644 index 000000000000..f53575d4ddb3 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i128-global.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +; CHECK: .visible .global .align 16 .b8 G1[16] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +@G1 = global i128 1 + +; CHECK: .visible .global .align 16 .b8 G2[16]; +@G2 = global i128 0 diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll new file mode 100644 index 000000000000..7cb603546aed --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i128-param.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK-LABEL: .visible .func callee( +; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16], +; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16], +define void @callee(i128, i128, i128*) { + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0]; + ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1]; + + ; CHECK: mul.lo.s64 %[[REG4:rd[0-9]+]], %[[REG0]], %[[REG3]]; + ; CHECK-NEXT: mul.hi.u64 %[[REG5:rd[0-9]+]], %[[REG0]], %[[REG2]]; + ; CHECK-NEXT: add.s64 %[[REG6:rd[0-9]+]], %[[REG5]], %[[REG4]]; + ; CHECK-NEXT: mul.lo.s64 %[[REG7:rd[0-9]+]], %[[REG1]], %[[REG2]]; + ; CHECK-NEXT: add.s64 %[[REG8:rd[0-9]+]], %[[REG6]], %[[REG7]]; + ; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]]; + %a = mul i128 %0, %1 + + store i128 %a, i128* %2 + ret void +} + +; CHECK-LABEL: .visible .entry caller_kernel( +; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16], +; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16], +define ptx_kernel void @caller_kernel(i128, i128, i128*) { +start: + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0]; + ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1]; + + ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 + ; CHECK: .param .align 16 .b8 param0[16]; + ; CHECK-NEXT: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]} + ; CHECK: .param .align 16 .b8 param1[16]; + ; CHECK-NEXT: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] + call void @callee(i128 %0, i128 %1, i128* %2) + + ret void +} + +; CHECK-LABEL: .visible .func caller_func( +; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16], +; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16], +define void @caller_func(i128, i128, i128*) { +start: + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0] + ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1] + + ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 + ; CHECK: .param .align 16 .b8 param0[16]; + ; CHECK: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]} + ; CHECK: .param .align 16 .b8 param1[16]; + ; CHECK: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] + call void @callee(i128 %0, i128 %1, i128* %2) + + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/i128-retval.ll b/llvm/test/CodeGen/NVPTX/i128-retval.ll new file mode 100644 index 000000000000..015b0199d835 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i128-retval.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee( +define i128 @callee(i128) { + ; CHECK: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0]; + ; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]} + ret i128 %0 +} + +; CHECK-LABEL: .visible .func caller( +define void @caller(i128, i128*) { +start: + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0]; + ; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]], [caller_param_1]; + + ; CHECK: { // callseq 0, 0 + ; CHECK: .param .align 16 .b8 retval0[16]; + ; CHECK: call.uni (retval0), + ; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0+0]; + ; CHECK: } // callseq 0 + %a = call i128 @callee(i128 %0) + + ; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]]; + ; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]]; + store i128 %a, i128* %1 + + ret void +}