diff --git a/llvm/lib/Target/PTX/CMakeLists.txt b/llvm/lib/Target/PTX/CMakeLists.txt index 331266da30b3..c4448d6f0f5f 100644 --- a/llvm/lib/Target/PTX/CMakeLists.txt +++ b/llvm/lib/Target/PTX/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_TARGET_DEFINITIONS PTX.td) tablegen(PTXGenAsmWriter.inc -gen-asm-writer) +tablegen(PTXGenCallingConv.inc -gen-callingconv) tablegen(PTXGenDAGISel.inc -gen-dag-isel) tablegen(PTXGenInstrInfo.inc -gen-instr-desc) tablegen(PTXGenInstrNames.inc -gen-instr-enums) diff --git a/llvm/lib/Target/PTX/Makefile b/llvm/lib/Target/PTX/Makefile index 2c40d6994094..844480f3b5a6 100644 --- a/llvm/lib/Target/PTX/Makefile +++ b/llvm/lib/Target/PTX/Makefile @@ -13,6 +13,7 @@ TARGET = PTX # Make sure that tblgen is run, first thing. BUILT_SOURCES = PTXGenAsmWriter.inc \ + PTXGenCallingConv.inc \ PTXGenDAGISel.inc \ PTXGenInstrInfo.inc \ PTXGenInstrNames.inc \ diff --git a/llvm/lib/Target/PTX/PTX.td b/llvm/lib/Target/PTX/PTX.td index 231866a08953..2c7bd3b68d9e 100644 --- a/llvm/lib/Target/PTX/PTX.td +++ b/llvm/lib/Target/PTX/PTX.td @@ -71,6 +71,12 @@ def : Proc<"generic", []>; include "PTXRegisterInfo.td" +//===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PTXCallingConv.td" + //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/llvm/lib/Target/PTX/PTXAsmPrinter.cpp index cf8d461ba1a9..1142144da151 100644 --- a/llvm/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/llvm/lib/Target/PTX/PTXAsmPrinter.cpp @@ -80,11 +80,11 @@ static const char *getRegisterTypeName(unsigned RegNo) { #define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; TEST_REGCLS(RegPred, pred); - TEST_REGCLS(RegI16, u16); - TEST_REGCLS(RegI32, u32); - TEST_REGCLS(RegI64, u64); - TEST_REGCLS(RegF32, f32); - TEST_REGCLS(RegF64, f64); + TEST_REGCLS(RegI16, b16); + TEST_REGCLS(RegI32, b32); + TEST_REGCLS(RegI64, b64); + TEST_REGCLS(RegF32, b32); + TEST_REGCLS(RegF64, b64); #undef TEST_REGCLS llvm_unreachable("Not in any register class!"); @@ -394,17 +394,23 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { const PTXMachineFunctionInfo *MFI = MF->getInfo(); const bool isKernel = MFI->isKernel(); - unsigned reg; std::string decl = isKernel ? ".entry" : ".func"; - // Print return register - reg = MFI->retReg(); - if (!isKernel && reg != PTX::NoRegister) { - decl += " (.reg ."; // FIXME: could it return in .param space? - decl += getRegisterTypeName(reg); - decl += " "; - decl += getRegisterName(reg); + if (!isKernel) { + decl += " ("; + + for (PTXMachineFunctionInfo::ret_iterator + i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); + } decl += ")"; } @@ -412,40 +418,66 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " "; decl += CurrentFnSym->getName().str(); - // Print parameter list - if (!MFI->argRegEmpty()) { - decl += " ("; - if (isKernel) { - unsigned cnt = 0; - for(PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - reg = *i; - assert(reg != PTX::NoRegister && "Not a valid register!"); - if (i != b) - decl += ", "; - decl += ".param ."; - decl += getRegisterTypeName(reg); - decl += " "; - decl += PARAM_PREFIX; - decl += utostr(++cnt); - } - } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - reg = *i; - assert(reg != PTX::NoRegister && "Not a valid register!"); - if (i != b) - decl += ", "; - decl += ".reg ."; - decl += getRegisterTypeName(reg); - decl += " "; - decl += getRegisterName(reg); - } + decl += " ("; + + unsigned cnt = 0; + + // Print parameters + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + if (isKernel) { + decl += ".param .b"; + decl += utostr(*i); + decl += " "; + decl += PARAM_PREFIX; + decl += utostr(++cnt); + } else { + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); } - decl += ")"; } + decl += ")"; + + // // Print parameter list + // if (!MFI->argRegEmpty()) { + // decl += " ("; + // if (isKernel) { + // unsigned cnt = 0; + // for(PTXMachineFunctionInfo::reg_iterator + // i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + // i != e; ++i) { + // reg = *i; + // assert(reg != PTX::NoRegister && "Not a valid register!"); + // if (i != b) + // decl += ", "; + // decl += ".param ."; + // decl += getRegisterTypeName(reg); + // decl += " "; + // decl += PARAM_PREFIX; + // decl += utostr(++cnt); + // } + // } else { + // for (PTXMachineFunctionInfo::reg_iterator + // i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + // i != e; ++i) { + // reg = *i; + // assert(reg != PTX::NoRegister && "Not a valid register!"); + // if (i != b) + // decl += ", "; + // decl += ".reg ."; + // decl += getRegisterTypeName(reg); + // decl += " "; + // decl += getRegisterName(reg); + // } + // } + // decl += ")"; + // } OutStreamer.EmitRawText(Twine(decl)); } diff --git a/llvm/lib/Target/PTX/PTXCallingConv.td b/llvm/lib/Target/PTX/PTXCallingConv.td new file mode 100644 index 000000000000..4d7759b14ee4 --- /dev/null +++ b/llvm/lib/Target/PTX/PTXCallingConv.td @@ -0,0 +1,36 @@ +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// Currently, we reserve one register of each type for return values and let +// the rest be used for parameters. This is a dirty hack, but I am not sure +// how to tell LLVM that registers used for parameter passing cannot be used +// for return values. + +// PTX Calling Conventions +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P1, P2, P3, P4, P5, P6, P7]>>, + CCIfType<[i16], CCAssignToReg<[RH1, RH2, RH3, RH4, RH5, RH6, RH7]>>, + CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7]>>, + CCIfType<[i64, f64], CCAssignToReg<[RD1, RD2, RD3, RD4, RD5, RD6, RD7]>> +]>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P0]>>, + CCIfType<[i16], CCAssignToReg<[RH0]>>, + CCIfType<[i32, f32], CCAssignToReg<[R0]>>, + CCIfType<[i64, f64], CCAssignToReg<[RD0]>> +]>; diff --git a/llvm/lib/Target/PTX/PTXISelLowering.cpp b/llvm/lib/Target/PTX/PTXISelLowering.cpp index cacd5c8c3425..c3cdabad51dc 100644 --- a/llvm/lib/Target/PTX/PTXISelLowering.cpp +++ b/llvm/lib/Target/PTX/PTXISelLowering.cpp @@ -16,6 +16,7 @@ #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -24,6 +25,16 @@ using namespace llvm; +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "PTXGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. @@ -195,44 +206,72 @@ SDValue PTXTargetLowering:: break; } - // Make sure we don't add argument registers twice - if (MFI->isDoneAddArg()) - llvm_unreachable("cannot add argument registers twice"); + if (MFI->isKernel()) { + // For kernel functions, we just need to emit the proper READ_PARAM ISDs + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - // Reset argmap before allocation - for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap); - i != e; ++ i) - i->reset(); + assert(Ins[i].VT != MVT::i1 && "Kernels cannot take pred operands"); - for (int i = 0, e = Ins.size(); i != e; ++ i) { - MVT::SimpleValueType VT = Ins[i].VT.SimpleTy; + SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, Ins[i].VT, Chain, + DAG.getTargetConstant(i, MVT::i32)); + InVals.push_back(ArgValue); - struct argmap_entry *entry = std::find(argmap, - argmap + array_lengthof(argmap), VT); - if (entry == argmap + array_lengthof(argmap)) - llvm_unreachable("Type of argument is not supported"); - - if (MFI->isKernel() && entry->RC == PTX::RegPredRegisterClass) - llvm_unreachable("cannot pass preds to kernel"); - - MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); - - unsigned preg = *++(entry->loc); // allocate start from register 1 - unsigned vreg = RegInfo.createVirtualRegister(entry->RC); - RegInfo.addLiveIn(preg, vreg); - - MFI->addArgReg(preg); - - SDValue inval; - if (MFI->isKernel()) - inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain, - DAG.getTargetConstant(i, MVT::i32)); - else - inval = DAG.getCopyFromReg(Chain, dl, vreg, VT); - InVals.push_back(inval); + // Instead of storing a physical register in our argument list, we just + // store the total size of the parameter, in bits. The ASM printer + // knows how to process this. + MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); + } } + else { + // For device functions, we use the PTX calling convention to do register + // assignments then create CopyFromReg ISDs for the allocated registers - MFI->doneAddArg(); + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + + CCValAssign& VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + TargetRegisterClass* TRC = 0; + + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } + + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); + MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + InVals.push_back(ArgValue); + + MFI->addArgReg(VA.getLocReg()); + } + } return Chain; } @@ -254,51 +293,43 @@ SDValue PTXTargetLowering:: assert(Outs.size() == 0 && "Kernel must return void."); return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); case CallingConv::PTX_Device: - assert(Outs.size() <= 1 && "Can at most return one value."); + //assert(Outs.size() <= 1 && "Can at most return one value."); break; } - // PTX_Device - - // return void - if (Outs.size() == 0) - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo(); + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); SDValue Flag; - unsigned reg; - if (Outs[0].VT == MVT::i16) { - reg = PTX::RH0; + CCInfo.AnalyzeReturn(Outs, RetCC_PTX); + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + + CCValAssign& VA = RVLocs[i]; + + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + + unsigned Reg = VA.getLocReg(); + + DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); + + Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); + + MFI->addRetReg(Reg); } - else if (Outs[0].VT == MVT::i32) { - reg = PTX::R0; - } - else if (Outs[0].VT == MVT::i64) { - reg = PTX::RD0; - } - else if (Outs[0].VT == MVT::f32) { - reg = PTX::R0; + + if (Flag.getNode() == 0) { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); } else { - assert(Outs[0].VT == MVT::f64 && "Can return only basic types"); - reg = PTX::RD0; + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); } - - MachineFunction &MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo(); - MFI->setRetReg(reg); - - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) - DAG.getMachineFunction().getRegInfo().addLiveOut(reg); - - // Copy the result values into the output registers - Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag); - - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); - - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); } diff --git a/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp index c5e191007239..6fe9e6c3f657 100644 --- a/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -54,8 +54,6 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); - unsigned retreg = MFI->retReg(); - DEBUG(dbgs() << "PTX::NoRegister == " << PTX::NoRegister << "\n" << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); @@ -68,15 +66,13 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { // FIXME: This is a slow linear scanning for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) if (MRI.isPhysRegUsed(reg) && - reg != retreg && + !MFI->isRetReg(reg) && (MFI->isKernel() || !MFI->isArgReg(reg))) MFI->addLocalVarReg(reg); // Notify MachineFunctionInfo that I've done adding local var reg MFI->doneAddLocalVar(); - DEBUG(dbgs() << "Return Reg: " << retreg << "\n"); - DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->argRegBegin(), e = MFI->argRegEnd(); i != e; ++i) diff --git a/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h index 81df1c236cb2..1da4b5defcd2 100644 --- a/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -15,6 +15,7 @@ #define PTX_MACHINE_FUNCTION_INFO_H #include "PTX.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -25,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo { private: bool is_kernel; std::vector reg_arg, reg_local_var; - unsigned reg_ret; + DenseSet reg_ret; bool _isDoneAddArg; public: @@ -39,19 +40,18 @@ public: void addArgReg(unsigned reg) { reg_arg.push_back(reg); } void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void setRetReg(unsigned reg) { reg_ret = reg; } + void addRetReg(unsigned reg) { reg_ret.insert(reg); } void doneAddArg(void) { _isDoneAddArg = true; } void doneAddLocalVar(void) {} - bool isDoneAddArg(void) { return _isDoneAddArg; } - bool isKernel() const { return is_kernel; } typedef std::vector::const_iterator reg_iterator; typedef std::vector::const_reverse_iterator reg_reverse_iterator; + typedef DenseSet::const_iterator ret_iterator; bool argRegEmpty() const { return reg_arg.empty(); } int getNumArg() const { return reg_arg.size(); } @@ -64,12 +64,19 @@ public: reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } reg_iterator localVarRegEnd() const { return reg_local_var.end(); } - unsigned retReg() const { return reg_ret; } + bool retRegEmpty() const { return reg_ret.empty(); } + int getNumRet() const { return reg_ret.size(); } + ret_iterator retRegBegin() const { return reg_ret.begin(); } + ret_iterator retRegEnd() const { return reg_ret.end(); } bool isArgReg(unsigned reg) const { return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); } + bool isRetReg(unsigned reg) const { + return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + } + bool isLocalVarReg(unsigned reg) const { return std::find(reg_local_var.begin(), reg_local_var.end(), reg) != reg_local_var.end(); diff --git a/llvm/test/CodeGen/PTX/add.ll b/llvm/test/CodeGen/PTX/add.ll index 235b00e8782f..b89a2f62691f 100644 --- a/llvm/test/CodeGen/PTX/add.ll +++ b/llvm/test/CodeGen/PTX/add.ll @@ -22,14 +22,14 @@ define ptx_device i64 @t1_u64(i64 %x, i64 %y) { } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: add.f32 f0, f1, f2 +; CHECK: add.f32 r0, r1, r2 ; CHECK-NEXT: ret; %z = fadd float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: add.f64 fd0, fd1, fd2 +; CHECK: add.f64 rd0, rd1, rd2 ; CHECK-NEXT: ret; %z = fadd double %x, %y ret double %z @@ -57,14 +57,14 @@ define ptx_device i64 @t2_u64(i64 %x) { } define ptx_device float @t2_f32(float %x) { -; CHECK: add.f32 f0, f1, 0F3F800000; +; CHECK: add.f32 r0, r1, 0F3F800000; ; CHECK-NEXT: ret; %z = fadd float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000; +; CHECK: add.f64 rd0, rd1, 0D3FF0000000000000; ; CHECK-NEXT: ret; %z = fadd double %x, 1.0 ret double %z diff --git a/llvm/test/CodeGen/PTX/cvt.ll b/llvm/test/CodeGen/PTX/cvt.ll index 2f793dede621..984cb4d8d5ac 100644 --- a/llvm/test/CodeGen/PTX/cvt.ll +++ b/llvm/test/CodeGen/PTX/cvt.ll @@ -31,7 +31,7 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) { } define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { -; CHECK: cvt.rni.pred.f32 p0, f1; +; CHECK: cvt.rni.pred.f32 p0, r1; ; CHECK: ret; %a = fptoui float %x to i1 %b = and i1 %a, %y @@ -40,7 +40,7 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) { } define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { -; CHECK: cvt.rni.pred.f64 p0, fd1; +; CHECK: cvt.rni.pred.f64 p0, rd1; ; CHECK: ret; %a = fptoui double %x to i1 %b = and i1 %a, %y @@ -72,14 +72,14 @@ define ptx_device i16 @cvt_i16_i64(i64 %x) { } define ptx_device i16 @cvt_i16_f32(float %x) { -; CHECK: cvt.rni.u16.f32 rh0, f1; +; CHECK: cvt.rni.u16.f32 rh0, r1; ; CHECK: ret; %a = fptoui float %x to i16 ret i16 %a } define ptx_device i16 @cvt_i16_f64(double %x) { -; CHECK: cvt.rni.u16.f64 rh0, fd1; +; CHECK: cvt.rni.u16.f64 rh0, rd1; ; CHECK: ret; %a = fptoui double %x to i16 ret i16 %a @@ -109,14 +109,14 @@ define ptx_device i32 @cvt_i32_i64(i64 %x) { } define ptx_device i32 @cvt_i32_f32(float %x) { -; CHECK: cvt.rni.u32.f32 r0, f1; +; CHECK: cvt.rni.u32.f32 r0, r1; ; CHECK: ret; %a = fptoui float %x to i32 ret i32 %a } define ptx_device i32 @cvt_i32_f64(double %x) { -; CHECK: cvt.rni.u32.f64 r0, fd1; +; CHECK: cvt.rni.u32.f64 r0, rd1; ; CHECK: ret; %a = fptoui double %x to i32 ret i32 %a @@ -146,14 +146,14 @@ define ptx_device i64 @cvt_i64_i32(i32 %x) { } define ptx_device i64 @cvt_i64_f32(float %x) { -; CHECK: cvt.rni.u64.f32 rd0, f1; +; CHECK: cvt.rni.u64.f32 rd0, r1; ; CHECK: ret; %a = fptoui float %x to i64 ret i64 %a } define ptx_device i64 @cvt_i64_f64(double %x) { -; CHECK: cvt.rni.u64.f64 rd0, fd1; +; CHECK: cvt.rni.u64.f64 rd0, rd1; ; CHECK: ret; %a = fptoui double %x to i64 ret i64 %a @@ -162,35 +162,35 @@ define ptx_device i64 @cvt_i64_f64(double %x) { ; f32 define ptx_device float @cvt_f32_preds(i1 %x) { -; CHECK: cvt.rn.f32.pred f0, p1; +; CHECK: cvt.rn.f32.pred r0, p1; ; CHECK: ret; %a = uitofp i1 %x to float ret float %a } define ptx_device float @cvt_f32_i16(i16 %x) { -; CHECK: cvt.rn.f32.u16 f0, rh1; +; CHECK: cvt.rn.f32.u16 r0, rh1; ; CHECK: ret; %a = uitofp i16 %x to float ret float %a } define ptx_device float @cvt_f32_i32(i32 %x) { -; CHECK: cvt.rn.f32.u32 f0, r1; +; CHECK: cvt.rn.f32.u32 r0, r1; ; CHECK: ret; %a = uitofp i32 %x to float ret float %a } define ptx_device float @cvt_f32_i64(i64 %x) { -; CHECK: cvt.rn.f32.u64 f0, rd1; +; CHECK: cvt.rn.f32.u64 r0, rd1; ; CHECK: ret; %a = uitofp i64 %x to float ret float %a } define ptx_device float @cvt_f32_f64(double %x) { -; CHECK: cvt.rn.f32.f64 f0, fd1; +; CHECK: cvt.rn.f32.f64 r0, rd1; ; CHECK: ret; %a = fptrunc double %x to float ret float %a @@ -199,35 +199,35 @@ define ptx_device float @cvt_f32_f64(double %x) { ; f64 define ptx_device double @cvt_f64_preds(i1 %x) { -; CHECK: cvt.rn.f64.pred fd0, p1; +; CHECK: cvt.rn.f64.pred rd0, p1; ; CHECK: ret; %a = uitofp i1 %x to double ret double %a } define ptx_device double @cvt_f64_i16(i16 %x) { -; CHECK: cvt.rn.f64.u16 fd0, rh1; +; CHECK: cvt.rn.f64.u16 rd0, rh1; ; CHECK: ret; %a = uitofp i16 %x to double ret double %a } define ptx_device double @cvt_f64_i32(i32 %x) { -; CHECK: cvt.rn.f64.u32 fd0, r1; +; CHECK: cvt.rn.f64.u32 rd0, r1; ; CHECK: ret; %a = uitofp i32 %x to double ret double %a } define ptx_device double @cvt_f64_i64(i64 %x) { -; CHECK: cvt.rn.f64.u64 fd0, rd1; +; CHECK: cvt.rn.f64.u64 rd0, rd1; ; CHECK: ret; %a = uitofp i64 %x to double ret double %a } define ptx_device double @cvt_f64_f32(float %x) { -; CHECK: cvt.f64.f32 fd0, f1; +; CHECK: cvt.f64.f32 rd0, r1; ; CHECK: ret; %a = fpext float %x to double ret double %a diff --git a/llvm/test/CodeGen/PTX/fdiv-sm10.ll b/llvm/test/CodeGen/PTX/fdiv-sm10.ll index 121360ce9be3..9aff25111b03 100644 --- a/llvm/test/CodeGen/PTX/fdiv-sm10.ll +++ b/llvm/test/CodeGen/PTX/fdiv-sm10.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.approx.f32 f0, f1, f2; +; CHECK: div.approx.f32 r0, r1, r2; ; CHECK-NEXT: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.f64 fd0, fd1, fd2; +; CHECK: div.f64 rd0, rd1, rd2; ; CHECK-NEXT: ret; %a = fdiv double %x, %y ret double %a diff --git a/llvm/test/CodeGen/PTX/fdiv-sm13.ll b/llvm/test/CodeGen/PTX/fdiv-sm13.ll index 0ec7bae8030e..84e0adab7e19 100644 --- a/llvm/test/CodeGen/PTX/fdiv-sm13.ll +++ b/llvm/test/CodeGen/PTX/fdiv-sm13.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: div.approx.f32 f0, f1, f2; +; CHECK: div.approx.f32 r0, r1, r2; ; CHECK-NEXT: ret; %a = fdiv float %x, %y ret float %a } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: div.rn.f64 fd0, fd1, fd2; +; CHECK: div.rn.f64 rd0, rd1, rd2; ; CHECK-NEXT: ret; %a = fdiv double %x, %y ret double %a diff --git a/llvm/test/CodeGen/PTX/fneg.ll b/llvm/test/CodeGen/PTX/fneg.ll index 22eeda3f0cbf..185c37c879bb 100644 --- a/llvm/test/CodeGen/PTX/fneg.ll +++ b/llvm/test/CodeGen/PTX/fneg.ll @@ -1,14 +1,14 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device float @t1_f32(float %x) { -; CHECK: neg.f32 f0, f1; +; CHECK: neg.f32 r0, r1; ; CHECK-NEXT: ret; %y = fsub float -0.000000e+00, %x ret float %y } define ptx_device double @t1_f64(double %x) { -; CHECK: neg.f64 fd0, fd1; +; CHECK: neg.f64 rd0, rd1; ; CHECK-NEXT: ret; %y = fsub double -0.000000e+00, %x ret double %y diff --git a/llvm/test/CodeGen/PTX/ld.ll b/llvm/test/CodeGen/PTX/ld.ll index 377a95abe3db..9b759987f830 100644 --- a/llvm/test/CodeGen/PTX/ld.ll +++ b/llvm/test/CodeGen/PTX/ld.ll @@ -87,7 +87,7 @@ entry: define ptx_device float @t1_f32(float* %p) { entry: -;CHECK: ld.global.f32 f0, [r1]; +;CHECK: ld.global.f32 r0, [r1]; ;CHECK-NEXT: ret; %x = load float* %p ret float %x @@ -95,7 +95,7 @@ entry: define ptx_device double @t1_f64(double* %p) { entry: -;CHECK: ld.global.f64 fd0, [r1]; +;CHECK: ld.global.f64 rd0, [r1]; ;CHECK-NEXT: ret; %x = load double* %p ret double %x @@ -130,7 +130,7 @@ entry: define ptx_device float @t2_f32(float* %p) { entry: -;CHECK: ld.global.f32 f0, [r1+4]; +;CHECK: ld.global.f32 r0, [r1+4]; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 1 %x = load float* %i @@ -139,7 +139,7 @@ entry: define ptx_device double @t2_f64(double* %p) { entry: -;CHECK: ld.global.f64 fd0, [r1+8]; +;CHECK: ld.global.f64 rd0, [r1+8]; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 1 %x = load double* %i @@ -180,7 +180,7 @@ define ptx_device float @t3_f32(float* %p, i32 %q) { entry: ;CHECK: shl.b32 r0, r2, 2; ;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.f32 f0, [r0]; +;CHECK-NEXT: ld.global.f32 r0, [r0]; %i = getelementptr float* %p, i32 %q %x = load float* %i ret float %x @@ -190,7 +190,7 @@ define ptx_device double @t3_f64(double* %p, i32 %q) { entry: ;CHECK: shl.b32 r0, r2, 3; ;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: ld.global.f64 fd0, [r0]; +;CHECK-NEXT: ld.global.f64 rd0, [r0]; %i = getelementptr double* %p, i32 %q %x = load double* %i ret double %x @@ -229,7 +229,7 @@ entry: define ptx_device float @t4_global_f32() { entry: ;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: ld.global.f32 f0, [r0]; +;CHECK-NEXT: ld.global.f32 r0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 %x = load float* %i @@ -239,7 +239,7 @@ entry: define ptx_device double @t4_global_f64() { entry: ;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: ld.global.f64 fd0, [r0]; +;CHECK-NEXT: ld.global.f64 rd0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 %x = load double* %i @@ -279,7 +279,7 @@ entry: define ptx_device float @t4_const_f32() { entry: ;CHECK: mov.u32 r0, array_constant_float; -;CHECK-NEXT: ld.const.f32 f0, [r0]; +;CHECK-NEXT: ld.const.f32 r0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 %x = load float addrspace(1)* %i @@ -289,7 +289,7 @@ entry: define ptx_device double @t4_const_f64() { entry: ;CHECK: mov.u32 r0, array_constant_double; -;CHECK-NEXT: ld.const.f64 fd0, [r0]; +;CHECK-NEXT: ld.const.f64 rd0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 %x = load double addrspace(1)* %i @@ -329,7 +329,7 @@ entry: define ptx_device float @t4_local_f32() { entry: ;CHECK: mov.u32 r0, array_local_float; -;CHECK-NEXT: ld.local.f32 f0, [r0]; +;CHECK-NEXT: ld.local.f32 r0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 %x = load float addrspace(2)* %i @@ -339,7 +339,7 @@ entry: define ptx_device double @t4_local_f64() { entry: ;CHECK: mov.u32 r0, array_local_double; -;CHECK-NEXT: ld.local.f64 fd0, [r0]; +;CHECK-NEXT: ld.local.f64 rd0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 %x = load double addrspace(2)* %i @@ -379,7 +379,7 @@ entry: define ptx_device float @t4_shared_f32() { entry: ;CHECK: mov.u32 r0, array_shared_float; -;CHECK-NEXT: ld.shared.f32 f0, [r0]; +;CHECK-NEXT: ld.shared.f32 r0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 %x = load float addrspace(4)* %i @@ -389,7 +389,7 @@ entry: define ptx_device double @t4_shared_f64() { entry: ;CHECK: mov.u32 r0, array_shared_double; -;CHECK-NEXT: ld.shared.f64 fd0, [r0]; +;CHECK-NEXT: ld.shared.f64 rd0, [r0]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 %x = load double addrspace(4)* %i @@ -429,7 +429,7 @@ entry: define ptx_device float @t5_f32() { entry: ;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: ld.global.f32 f0, [r0+4]; +;CHECK-NEXT: ld.global.f32 r0, [r0+4]; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 %x = load float* %i @@ -439,7 +439,7 @@ entry: define ptx_device double @t5_f64() { entry: ;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: ld.global.f64 fd0, [r0+8]; +;CHECK-NEXT: ld.global.f64 rd0, [r0+8]; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 %x = load double* %i diff --git a/llvm/test/CodeGen/PTX/llvm-intrinsic.ll b/llvm/test/CodeGen/PTX/llvm-intrinsic.ll index 1e265f5b7b3a..a3176451ffd5 100644 --- a/llvm/test/CodeGen/PTX/llvm-intrinsic.ll +++ b/llvm/test/CodeGen/PTX/llvm-intrinsic.ll @@ -2,7 +2,7 @@ define ptx_device float @test_sqrt_f32(float %x) { entry: -; CHECK: sqrt.rn.f32 f0, f1; +; CHECK: sqrt.rn.f32 r0, r1; ; CHECK-NEXT: ret; %y = call float @llvm.sqrt.f32(float %x) ret float %y @@ -10,7 +10,7 @@ entry: define ptx_device double @test_sqrt_f64(double %x) { entry: -; CHECK: sqrt.rn.f64 fd0, fd1; +; CHECK: sqrt.rn.f64 rd0, rd1; ; CHECK-NEXT: ret; %y = call double @llvm.sqrt.f64(double %x) ret double %y @@ -18,7 +18,7 @@ entry: define ptx_device float @test_sin_f32(float %x) { entry: -; CHECK: sin.approx.f32 f0, f1; +; CHECK: sin.approx.f32 r0, r1; ; CHECK-NEXT: ret; %y = call float @llvm.sin.f32(float %x) ret float %y @@ -26,7 +26,7 @@ entry: define ptx_device double @test_sin_f64(double %x) { entry: -; CHECK: sin.approx.f64 fd0, fd1; +; CHECK: sin.approx.f64 rd0, rd1; ; CHECK-NEXT: ret; %y = call double @llvm.sin.f64(double %x) ret double %y @@ -34,7 +34,7 @@ entry: define ptx_device float @test_cos_f32(float %x) { entry: -; CHECK: cos.approx.f32 f0, f1; +; CHECK: cos.approx.f32 r0, r1; ; CHECK-NEXT: ret; %y = call float @llvm.cos.f32(float %x) ret float %y @@ -42,7 +42,7 @@ entry: define ptx_device double @test_cos_f64(double %x) { entry: -; CHECK: cos.approx.f64 fd0, fd1; +; CHECK: cos.approx.f64 rd0, rd1; ; CHECK-NEXT: ret; %y = call double @llvm.cos.f64(double %x) ret double %y diff --git a/llvm/test/CodeGen/PTX/mad.ll b/llvm/test/CodeGen/PTX/mad.ll index 0c25f2c0030a..56d3811aa7cd 100644 --- a/llvm/test/CodeGen/PTX/mad.ll +++ b/llvm/test/CodeGen/PTX/mad.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s define ptx_device float @t1_f32(float %x, float %y, float %z) { -; CHECK: mad.rn.f32 f0, f1, f2, f3; +; CHECK: mad.rn.f32 r0, r1, r2, r3; ; CHECK-NEXT: ret; %a = fmul float %x, %y %b = fadd float %a, %z @@ -9,7 +9,7 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) { } define ptx_device double @t1_f64(double %x, double %y, double %z) { -; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3; +; CHECK: mad.rn.f64 rd0, rd1, rd2, rd3; ; CHECK-NEXT: ret; %a = fmul double %x, %y %b = fadd double %a, %z diff --git a/llvm/test/CodeGen/PTX/mov.ll b/llvm/test/CodeGen/PTX/mov.ll index 120572a0e868..05ce4c0a8876 100644 --- a/llvm/test/CodeGen/PTX/mov.ll +++ b/llvm/test/CodeGen/PTX/mov.ll @@ -19,13 +19,13 @@ define ptx_device i64 @t1_u64() { } define ptx_device float @t1_f32() { -; CHECK: mov.f32 f0, 0F00000000; +; CHECK: mov.f32 r0, 0F00000000; ; CHECK: ret; ret float 0.0 } define ptx_device double @t1_f64() { -; CHECK: mov.f64 fd0, 0D0000000000000000; +; CHECK: mov.f64 rd0, 0D0000000000000000; ; CHECK: ret; ret double 0.0 } @@ -49,13 +49,13 @@ define ptx_device i64 @t2_u64(i64 %x) { } define ptx_device float @t3_f32(float %x) { -; CHECK: mov.f32 f0, f1; +; CHECK: mov.u32 r0, r1; ; CHECK-NEXT: ret; ret float %x } define ptx_device double @t3_f64(double %x) { -; CHECK: mov.f64 fd0, fd1; +; CHECK: mov.u64 rd0, rd1; ; CHECK-NEXT: ret; ret double %x } diff --git a/llvm/test/CodeGen/PTX/mul.ll b/llvm/test/CodeGen/PTX/mul.ll index 5ce042675dc8..93f94e350963 100644 --- a/llvm/test/CodeGen/PTX/mul.ll +++ b/llvm/test/CodeGen/PTX/mul.ll @@ -11,28 +11,28 @@ ;} define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: mul.f32 f0, f1, f2 +; CHECK: mul.f32 r0, r1, r2 ; CHECK-NEXT: ret; %z = fmul float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: mul.f64 fd0, fd1, fd2 +; CHECK: mul.f64 rd0, rd1, rd2 ; CHECK-NEXT: ret; %z = fmul double %x, %y ret double %z } define ptx_device float @t2_f32(float %x) { -; CHECK: mul.f32 f0, f1, 0F40A00000; +; CHECK: mul.f32 r0, r1, 0F40A00000; ; CHECK-NEXT: ret; %z = fmul float %x, 5.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: mul.f64 fd0, fd1, 0D4014000000000000; +; CHECK: mul.f64 rd0, rd1, 0D4014000000000000; ; CHECK-NEXT: ret; %z = fmul double %x, 5.0 ret double %z diff --git a/llvm/test/CodeGen/PTX/parameter-order.ll b/llvm/test/CodeGen/PTX/parameter-order.ll index 95d4a328149c..54864720992a 100644 --- a/llvm/test/CodeGen/PTX/parameter-order.ll +++ b/llvm/test/CodeGen/PTX/parameter-order.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s -; CHECK: .func (.reg .u32 r0) test_parameter_order (.reg .f32 f1, .reg .u32 r1, .reg .u32 r2, .reg .f32 f2) +; CHECK: .func (.reg .b32 r0) test_parameter_order (.reg .b32 r1, .reg .b32 r2, .reg .b32 r3, .reg .b32 r4) define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { -; CHECK: sub.u32 r0, r1, r2 +; CHECK: sub.u32 r0, r2, r3 %result = sub i32 %b, %c ret i32 %result } diff --git a/llvm/test/CodeGen/PTX/selp.ll b/llvm/test/CodeGen/PTX/selp.ll index 6f1b03e5997f..19cfa5399d07 100644 --- a/llvm/test/CodeGen/PTX/selp.ll +++ b/llvm/test/CodeGen/PTX/selp.ll @@ -13,13 +13,13 @@ define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) { } define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) { -; CHECK: selp.f32 f0, f1, f2, p1; +; CHECK: selp.f32 r0, r1, r2, p1; %a = select i1 %x, float %y, float %z ret float %a } define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) { -; CHECK: selp.f64 fd0, fd1, fd2, p1; +; CHECK: selp.f64 rd0, rd1, rd2, p1; %a = select i1 %x, double %y, double %z ret double %a } diff --git a/llvm/test/CodeGen/PTX/st.ll b/llvm/test/CodeGen/PTX/st.ll index dee5c61abe66..612967ac8303 100644 --- a/llvm/test/CodeGen/PTX/st.ll +++ b/llvm/test/CodeGen/PTX/st.ll @@ -87,7 +87,7 @@ entry: define ptx_device void @t1_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r1], f1; +;CHECK: st.global.f32 [r1], r2; ;CHECK-NEXT: ret; store float %x, float* %p ret void @@ -95,7 +95,7 @@ entry: define ptx_device void @t1_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r1], fd1; +;CHECK: st.global.f64 [r1], rd1; ;CHECK-NEXT: ret; store double %x, double* %p ret void @@ -130,7 +130,7 @@ entry: define ptx_device void @t2_f32(float* %p, float %x) { entry: -;CHECK: st.global.f32 [r1+4], f1; +;CHECK: st.global.f32 [r1+4], r2; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 1 store float %x, float* %i @@ -139,7 +139,7 @@ entry: define ptx_device void @t2_f64(double* %p, double %x) { entry: -;CHECK: st.global.f64 [r1+8], fd1; +;CHECK: st.global.f64 [r1+8], rd1; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 1 store double %x, double* %i @@ -183,7 +183,7 @@ define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { entry: ;CHECK: shl.b32 r0, r2, 2; ;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: st.global.f32 [r0], r3; ;CHECK-NEXT: ret; %i = getelementptr float* %p, i32 %q store float %x, float* %i @@ -194,7 +194,7 @@ define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { entry: ;CHECK: shl.b32 r0, r2, 3; ;CHECK-NEXT: add.u32 r0, r1, r0; -;CHECK-NEXT: st.global.f64 [r0], fd1; +;CHECK-NEXT: st.global.f64 [r0], rd1; ;CHECK-NEXT: ret; %i = getelementptr double* %p, i32 %q store double %x, double* %i @@ -234,7 +234,7 @@ entry: define ptx_device void @t4_global_f32(float %x) { entry: ;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: st.global.f32 [r0], r1; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 store float %x, float* %i @@ -244,7 +244,7 @@ entry: define ptx_device void @t4_global_f64(double %x) { entry: ;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: st.global.f64 [r0], fd1; +;CHECK-NEXT: st.global.f64 [r0], rd1; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 store double %x, double* %i @@ -284,7 +284,7 @@ entry: define ptx_device void @t4_local_f32(float %x) { entry: ;CHECK: mov.u32 r0, array_local_float; -;CHECK-NEXT: st.local.f32 [r0], f1; +;CHECK-NEXT: st.local.f32 [r0], r1; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 store float %x, float addrspace(2)* %i @@ -294,7 +294,7 @@ entry: define ptx_device void @t4_local_f64(double %x) { entry: ;CHECK: mov.u32 r0, array_local_double; -;CHECK-NEXT: st.local.f64 [r0], fd1; +;CHECK-NEXT: st.local.f64 [r0], rd1; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 store double %x, double addrspace(2)* %i @@ -334,7 +334,7 @@ entry: define ptx_device void @t4_shared_f32(float %x) { entry: ;CHECK: mov.u32 r0, array_shared_float; -;CHECK-NEXT: st.shared.f32 [r0], f1; +;CHECK-NEXT: st.shared.f32 [r0], r1; ;CHECK-NEXT: ret; %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 store float %x, float addrspace(4)* %i @@ -344,7 +344,7 @@ entry: define ptx_device void @t4_shared_f64(double %x) { entry: ;CHECK: mov.u32 r0, array_shared_double; -;CHECK-NEXT: st.shared.f64 [r0], fd1; +;CHECK-NEXT: st.shared.f64 [r0], rd1; ;CHECK-NEXT: ret; %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 store double %x, double addrspace(4)* %i @@ -384,7 +384,7 @@ entry: define ptx_device void @t5_f32(float %x) { entry: ;CHECK: mov.u32 r0, array_float; -;CHECK-NEXT: st.global.f32 [r0+4], f1; +;CHECK-NEXT: st.global.f32 [r0+4], r1; ;CHECK-NEXT: ret; %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 store float %x, float* %i @@ -394,7 +394,7 @@ entry: define ptx_device void @t5_f64(double %x) { entry: ;CHECK: mov.u32 r0, array_double; -;CHECK-NEXT: st.global.f64 [r0+8], fd1; +;CHECK-NEXT: st.global.f64 [r0+8], rd1; ;CHECK-NEXT: ret; %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 store double %x, double* %i diff --git a/llvm/test/CodeGen/PTX/sub.ll b/llvm/test/CodeGen/PTX/sub.ll index 7dd2c6f6ac79..9efeaace0e7e 100644 --- a/llvm/test/CodeGen/PTX/sub.ll +++ b/llvm/test/CodeGen/PTX/sub.ll @@ -22,14 +22,14 @@ define ptx_device i64 @t1_u64(i64 %x, i64 %y) { } define ptx_device float @t1_f32(float %x, float %y) { -; CHECK: sub.f32 f0, f1, f2 +; CHECK: sub.f32 r0, r1, r2 ; CHECK-NEXT: ret; %z = fsub float %x, %y ret float %z } define ptx_device double @t1_f64(double %x, double %y) { -; CHECK: sub.f64 fd0, fd1, fd2 +; CHECK: sub.f64 rd0, rd1, rd2 ; CHECK-NEXT: ret; %z = fsub double %x, %y ret double %z @@ -57,14 +57,14 @@ define ptx_device i64 @t2_u64(i64 %x) { } define ptx_device float @t2_f32(float %x) { -; CHECK: add.f32 f0, f1, 0FBF800000; +; CHECK: add.f32 r0, r1, 0FBF800000; ; CHECK-NEXT: ret; %z = fsub float %x, 1.0 ret float %z } define ptx_device double @t2_f64(double %x) { -; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000; +; CHECK: add.f64 rd0, rd1, 0DBFF0000000000000; ; CHECK-NEXT: ret; %z = fsub double %x, 1.0 ret double %z