From 7e2c83256bb55fcd634b055d72755f7724e89c54 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 4 Sep 2013 19:53:46 +0000 Subject: [PATCH] R600: Non vector only instruction can be scheduled on trans unit llvm-svn: 189980 --- llvm/lib/Target/R600/R600InstrInfo.cpp | 3 + llvm/lib/Target/R600/R600MachineScheduler.cpp | 33 ++++--- llvm/lib/Target/R600/R600MachineScheduler.h | 5 +- llvm/lib/Target/R600/R600Packetizer.cpp | 38 ++++++-- llvm/test/CodeGen/R600/and.ll | 8 +- llvm/test/CodeGen/R600/fadd.ll | 14 +-- llvm/test/CodeGen/R600/fcmp-cnd.ll | 2 +- llvm/test/CodeGen/R600/fcmp.ll | 2 +- llvm/test/CodeGen/R600/fdiv.ll | 12 +-- llvm/test/CodeGen/R600/fmul.ll | 14 +-- llvm/test/CodeGen/R600/fneg.ll | 2 +- llvm/test/CodeGen/R600/fp_to_sint.ll | 12 +-- llvm/test/CodeGen/R600/fp_to_uint.ll | 12 +-- llvm/test/CodeGen/R600/fsub.ll | 14 +-- .../R600/icmp-select-sete-reverse-args.ll | 2 +- llvm/test/CodeGen/R600/literals.ll | 14 +-- llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll | 2 +- llvm/test/CodeGen/R600/rotr.ll | 3 +- llvm/test/CodeGen/R600/selectcc-cnd.ll | 4 +- llvm/test/CodeGen/R600/selectcc-cnde-int.ll | 2 +- llvm/test/CodeGen/R600/set-dx10.ll | 36 +++++--- llvm/test/CodeGen/R600/store.ll | 30 +++---- llvm/test/CodeGen/R600/sub.ll | 12 +-- llvm/test/CodeGen/R600/unsupported-cc.ll | 24 +++-- llvm/test/CodeGen/R600/vselect.ll | 24 ++--- .../test/CodeGen/R600/work-item-intrinsics.ll | 18 ++-- .../CodeGen/R600/wrong-transalu-pos-fix.ll | 89 +++++++++++++++++++ llvm/test/CodeGen/R600/xor.ll | 14 +-- 28 files changed, 299 insertions(+), 146 deletions(-) create mode 100644 llvm/test/CodeGen/R600/wrong-transalu-pos-fix.ll diff --git a/llvm/lib/Target/R600/R600InstrInfo.cpp b/llvm/lib/Target/R600/R600InstrInfo.cpp index 212463f2c98e..e3cb71b05c33 100644 --- a/llvm/lib/Target/R600/R600InstrInfo.cpp +++ b/llvm/lib/Target/R600/R600InstrInfo.cpp @@ -477,6 +477,9 @@ static bool isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, const std::vector > &TransOps, unsigned ConstCount) { + // TransALU can't read 3 constants + if (ConstCount > 2) + return false; for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { const std::pair &Src = TransOps[i]; unsigned Cycle = getTransSwizzle(TransSwz, i); diff --git a/llvm/lib/Target/R600/R600MachineScheduler.cpp b/llvm/lib/Target/R600/R600MachineScheduler.cpp index 0dc0365926ec..0499dd52d923 100644 --- a/llvm/lib/Target/R600/R600MachineScheduler.cpp +++ b/llvm/lib/Target/R600/R600MachineScheduler.cpp @@ -9,7 +9,6 @@ // /// \file /// \brief R600 Machine Scheduler interface -// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot // //===----------------------------------------------------------------------===// @@ -29,6 +28,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { DAG = dag; TII = static_cast(DAG->TII); TRI = static_cast(DAG->TRI); + VLIW5 = !DAG->MF.getTarget().getSubtarget().hasCaymanISA(); MRI = &DAG->MRI; CurInstKind = IDOther; CurEmitted = 0; @@ -342,14 +342,16 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -SUnit *R600SchedStrategy::PopInst(std::vector &Q) { +SUnit *R600SchedStrategy::PopInst(std::vector &Q, bool AnyALU) { if (Q.empty()) return NULL; for (std::vector::reverse_iterator It = Q.rbegin(), E = Q.rend(); It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); - if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) { + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) + && (!AnyALU || !TII->isVectorOnly(SU->getInstr())) + ) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; @@ -373,6 +375,8 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; +// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) +// OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -409,12 +413,12 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { } } -SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { +SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) { static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; - SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); + SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu); if (SlotedSU) return SlotedSU; - SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); + SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu); if (UnslotedSU) AssignSlot(UnslotedSU->getInstr(), Slot); return UnslotedSU; @@ -434,30 +438,35 @@ SUnit* R600SchedStrategy::pickAlu() { // Bottom up scheduling : predX must comes first if (!AvailableAlus[AluPredX].empty()) { OccupedSlotsMask |= 31; - return PopInst(AvailableAlus[AluPredX]); + return PopInst(AvailableAlus[AluPredX], false); } // Flush physical reg copies (RA will discard them) if (!AvailableAlus[AluDiscarded].empty()) { OccupedSlotsMask |= 31; - return PopInst(AvailableAlus[AluDiscarded]); + return PopInst(AvailableAlus[AluDiscarded], false); } // If there is a T_XYZW alu available, use it if (!AvailableAlus[AluT_XYZW].empty()) { OccupedSlotsMask |= 15; - return PopInst(AvailableAlus[AluT_XYZW]); + return PopInst(AvailableAlus[AluT_XYZW], false); } } bool TransSlotOccuped = OccupedSlotsMask & 16; - if (!TransSlotOccuped) { + if (!TransSlotOccuped && VLIW5) { if (!AvailableAlus[AluTrans].empty()) { OccupedSlotsMask |= 16; - return PopInst(AvailableAlus[AluTrans]); + return PopInst(AvailableAlus[AluTrans], false); + } + SUnit *SU = AttemptFillSlot(3, true); + if (SU) { + OccupedSlotsMask |= 16; + return SU; } } for (int Chan = 3; Chan > -1; --Chan) { bool isOccupied = OccupedSlotsMask & (1 << Chan); if (!isOccupied) { - SUnit *SU = AttemptFillSlot(Chan); + SUnit *SU = AttemptFillSlot(Chan, false); if (SU) { OccupedSlotsMask |= (1 << Chan); InstructionsGroupCandidate.push_back(SU->getInstr()); diff --git a/llvm/lib/Target/R600/R600MachineScheduler.h b/llvm/lib/Target/R600/R600MachineScheduler.h index f8965d8998a4..0a6f1204a4d9 100644 --- a/llvm/lib/Target/R600/R600MachineScheduler.h +++ b/llvm/lib/Target/R600/R600MachineScheduler.h @@ -84,15 +84,16 @@ public: private: std::vector InstructionsGroupCandidate; + bool VLIW5; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; AluKind getAluKind(SUnit *SU) const; void LoadAlu(); unsigned AvailablesAluCount() const; - SUnit *AttemptFillSlot (unsigned Slot); + SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu); void PrepareNextSlot(); - SUnit *PopInst(std::vector &Q); + SUnit *PopInst(std::vector &Q, bool AnyALU); void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); diff --git a/llvm/lib/Target/R600/R600Packetizer.cpp b/llvm/lib/Target/R600/R600Packetizer.cpp index 5cf1fd3b665e..6c70052b2038 100644 --- a/llvm/lib/Target/R600/R600Packetizer.cpp +++ b/llvm/lib/Target/R600/R600Packetizer.cpp @@ -58,6 +58,8 @@ class R600PacketizerList : public VLIWPacketizerList { private: const R600InstrInfo *TII; const R600RegisterInfo &TRI; + bool VLIW5; + bool ConsideredInstUsesAlreadyWrittenVectorElement; unsigned getSlot(const MachineInstr *MI) const { return TRI.getHWRegChan(MI->getOperand(0).getReg()); @@ -74,7 +76,13 @@ private: MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); if (I->isBundle()) BI++; + int LastDstChan = -1; do { + bool isTrans = false; + int BISlot = getSlot(BI); + if (LastDstChan >= BISlot) + isTrans = true; + LastDstChan = BISlot; if (TII->isPredicated(BI)) continue; int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); @@ -85,7 +93,7 @@ private: continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); - if (TII->isTransOnly(BI)) { + if (isTrans || TII->isTransOnly(BI)) { Result[Dst] = AMDGPU::PS; continue; } @@ -142,10 +150,14 @@ public: MachineDominatorTree &MDT) : VLIWPacketizerList(MF, MLI, MDT, true), TII (static_cast(MF.getTarget().getInstrInfo())), - TRI(TII->getRegisterInfo()) { } + TRI(TII->getRegisterInfo()) { + VLIW5 = !MF.getTarget().getSubtarget().hasCaymanISA(); + } // initPacketizerState - initialize some internal flags. - void initPacketizerState() { } + void initPacketizerState() { + ConsideredInstUsesAlreadyWrittenVectorElement = false; + } // ignorePseudoInstruction - Ignore bundling of pseudo instructions. bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { @@ -172,8 +184,8 @@ public: // together. bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); - if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII)) - return false; + if (getSlot(MII) == getSlot(MIJ)) + ConsideredInstUsesAlreadyWrittenVectorElement = true; // Does MII and MIJ share the same pred_sel ? int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); @@ -211,6 +223,20 @@ public: std::vector &BS, bool &isTransSlot) { isTransSlot = TII->isTransOnly(MI); + assert (!isTransSlot || VLIW5); + + // Is the dst reg sequence legal ? + if (!isTransSlot && !CurrentPacketMIs.empty()) { + if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) { + if (ConsideredInstUsesAlreadyWrittenVectorElement && + !TII->isVectorOnly(MI) && VLIW5) { + isTransSlot = true; + DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump();); + } + else + return false; + } + } // Are the Constants limitations met ? CurrentPacketMIs.push_back(MI); @@ -278,6 +304,8 @@ public: return It; } endPacket(MI->getParent(), MI); + if (TII->isTransOnly(MI)) + return MI; return VLIWPacketizerList::addToPacket(MI); } }; diff --git a/llvm/test/CodeGen/R600/and.ll b/llvm/test/CodeGen/R600/and.ll index 5fbc843a4e41..dbb6eef4fb02 100644 --- a/llvm/test/CodeGen/R600/and.ll +++ b/llvm/test/CodeGen/R600/and.ll @@ -19,10 +19,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { } ;EG-CHECK: @test4 -;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test4 ;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} diff --git a/llvm/test/CodeGen/R600/fadd.ll b/llvm/test/CodeGen/R600/fadd.ll index 6d459679d438..91bfdbc8b7a1 100644 --- a/llvm/test/CodeGen/R600/fadd.ll +++ b/llvm/test/CodeGen/R600/fadd.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @fadd_f32 -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W ; SI-CHECK: @fadd_f32 ; SI-CHECK: V_ADD_F32 define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) { @@ -13,8 +13,8 @@ entry: } ; R600-CHECK: @fadd_v2f32 -; R600-CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z -; R600-CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y +; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z +; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y ; SI-CHECK: @fadd_v2f32 ; SI-CHECK: V_ADD_F32 ; SI-CHECK: V_ADD_F32 @@ -26,10 +26,10 @@ entry: } ; R600-CHECK: @fadd_v4f32 -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; SI-CHECK: @fadd_v4f32 ; SI-CHECK: V_ADD_F32 ; SI-CHECK: V_ADD_F32 diff --git a/llvm/test/CodeGen/R600/fcmp-cnd.ll b/llvm/test/CodeGen/R600/fcmp-cnd.ll index 7373a214790e..1d4e323d3abf 100644 --- a/llvm/test/CodeGen/R600/fcmp-cnd.ll +++ b/llvm/test/CodeGen/R600/fcmp-cnd.ll @@ -2,7 +2,7 @@ ;Not checking arguments 2 and 3 to CNDE, because they may change between ;registers and literal.x depending on what the optimizer does. -;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/llvm/test/CodeGen/R600/fcmp.ll b/llvm/test/CodeGen/R600/fcmp.ll index dc3a779dd609..c76a75876565 100644 --- a/llvm/test/CodeGen/R600/fcmp.ll +++ b/llvm/test/CodeGen/R600/fcmp.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fcmp_sext -; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/llvm/test/CodeGen/R600/fdiv.ll b/llvm/test/CodeGen/R600/fdiv.ll index 84e9f6773688..090cedff3f95 100644 --- a/llvm/test/CodeGen/R600/fdiv.ll +++ b/llvm/test/CodeGen/R600/fdiv.ll @@ -8,8 +8,8 @@ ; R600-CHECK: @fdiv_v2f32 ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y -; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS -; R600-CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS +; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS +; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS ; SI-CHECK: @fdiv_v2f32 ; SI-CHECK-DAG: V_RCP_F32 ; SI-CHECK-DAG: V_MUL_F32 @@ -27,10 +27,10 @@ entry: ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; R600-CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS +; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; SI-CHECK: @fdiv_v4f32 ; SI-CHECK-DAG: V_RCP_F32 ; SI-CHECK-DAG: V_MUL_F32 diff --git a/llvm/test/CodeGen/R600/fmul.ll b/llvm/test/CodeGen/R600/fmul.ll index f2b3e2c17dba..4a7d6cde7edf 100644 --- a/llvm/test/CodeGen/R600/fmul.ll +++ b/llvm/test/CodeGen/R600/fmul.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @fmul_f32 -; R600-CHECK: MUL_IEEE * {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W +; R600-CHECK: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W ; SI-CHECK: @fmul_f32 ; SI-CHECK: V_MUL_F32 define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) { @@ -17,8 +17,8 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) ; R600-CHECK: @fmul_v2f32 -; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}} -; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}} +; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}} ; SI-CHECK: @fmul_v2f32 ; SI-CHECK: V_MUL_F32 ; SI-CHECK: V_MUL_F32 @@ -30,10 +30,10 @@ entry: } ; R600-CHECK: @fmul_v4f32 -; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; SI-CHECK: @fmul_v4f32 ; SI-CHECK: V_MUL_F32 ; SI-CHECK: V_MUL_F32 diff --git a/llvm/test/CodeGen/R600/fneg.ll b/llvm/test/CodeGen/R600/fneg.ll index 799db0c74c1d..f7083cd6ca05 100644 --- a/llvm/test/CodeGen/R600/fneg.ll +++ b/llvm/test/CodeGen/R600/fneg.ll @@ -12,7 +12,7 @@ entry: ; CHECK: @fneg_v4 ; CHECK: -PV -; CHECK: -PV +; CHECK: -T ; CHECK: -PV ; CHECK: -PV define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { diff --git a/llvm/test/CodeGen/R600/fp_to_sint.ll b/llvm/test/CodeGen/R600/fp_to_sint.ll index 6471270e3a35..cecf0f93ef2d 100644 --- a/llvm/test/CodeGen/R600/fp_to_sint.ll +++ b/llvm/test/CodeGen/R600/fp_to_sint.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @fp_to_sint_v2i32 -; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI-CHECK: @fp_to_sint_v2i32 ; SI-CHECK: V_CVT_I32_F32_e32 ; SI-CHECK: V_CVT_I32_F32_e32 @@ -14,10 +14,10 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { } ; R600-CHECK: @fp_to_sint_v4i32 -; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}} +; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI-CHECK: @fp_to_sint_v4i32 ; SI-CHECK: V_CVT_I32_F32_e32 ; SI-CHECK: V_CVT_I32_F32_e32 diff --git a/llvm/test/CodeGen/R600/fp_to_uint.ll b/llvm/test/CodeGen/R600/fp_to_uint.ll index 0d07a616aa12..8d3f79abb4fd 100644 --- a/llvm/test/CodeGen/R600/fp_to_uint.ll +++ b/llvm/test/CodeGen/R600/fp_to_uint.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @fp_to_uint_v2i32 -; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; SI-CHECK: @fp_to_uint_v2i32 ; SI-CHECK: V_CVT_U32_F32_e32 ; SI-CHECK: V_CVT_U32_F32_e32 @@ -15,10 +15,10 @@ define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { } ; R600-CHECK: @fp_to_uint_v4i32 -; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI-CHECK: @fp_to_uint_v4i32 ; SI-CHECK: V_CVT_U32_F32_e32 ; SI-CHECK: V_CVT_U32_F32_e32 diff --git a/llvm/test/CodeGen/R600/fsub.ll b/llvm/test/CodeGen/R600/fsub.ll index 1608c3aced5b..850d3ee4e98f 100644 --- a/llvm/test/CodeGen/R600/fsub.ll +++ b/llvm/test/CodeGen/R600/fsub.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @fsub_f32 -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W ; SI-CHECK: @fsub_f32 ; SI-CHECK: V_SUB_F32 define void @fsub_f32(float addrspace(1)* %out, float %a, float %b) { @@ -17,8 +17,8 @@ declare float @llvm.R600.load.input(i32) readnone declare void @llvm.AMDGPU.store.output(float, i32) ; R600-CHECK: @fsub_v2f32 -; R600-CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z -; R600-CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y +; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z +; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y ; SI-CHECK: @fsub_v2f32 ; SI-CHECK: V_SUB_F32 ; SI-CHECK: V_SUB_F32 @@ -30,10 +30,10 @@ entry: } ; R600-CHECK: @fsub_v4f32 -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} -; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} +; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} ; SI-CHECK: @fsub_v4f32 ; SI-CHECK: V_SUB_F32 ; SI-CHECK: V_SUB_F32 diff --git a/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll index e3005fe82da1..71705a64f50e 100644 --- a/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll +++ b/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll @@ -3,7 +3,7 @@ ;Test that a select with reversed True/False values is correctly lowered ;to a SETNE_INT. There should only be one SETNE_INT instruction. -;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK-NOT: SETNE_INT define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/llvm/test/CodeGen/R600/literals.ll b/llvm/test/CodeGen/R600/literals.ll index 7a113f1a4c58..e5bdbc43c24c 100644 --- a/llvm/test/CodeGen/R600/literals.ll +++ b/llvm/test/CodeGen/R600/literals.ll @@ -7,7 +7,8 @@ ; ADD_INT literal.x KC0[2].Z, 5 ; CHECK: @i32_literal -; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR ; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: @@ -23,7 +24,8 @@ entry: ; ADD literal.x KC0[2].Z, 5.0 ; CHECK: @float_literal -; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: @@ -34,10 +36,10 @@ entry: ; Make sure inline literals are folded into REG_SEQUENCE instructions. ; CHECK: @inline_literal_reg_sequence -; CHECK: MOV T[[GPR:[0-9]]].X, 0.0 -; CHECK-NEXT: MOV T[[GPR]].Y, 0.0 -; CHECK-NEXT: MOV T[[GPR]].Z, 0.0 -; CHECK-NEXT: MOV * T[[GPR]].W, 0.0 +; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0 +; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0 +; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0 +; CHECK-NEXT: MOV {{\** *}}T[[GPR]].W, 0.0 define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) { entry: diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll index 7627783ce368..b9be9c68309f 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ; R600-CHECK: @amdgpu_trunc -; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI-CHECK: @amdgpu_trunc ; SI-CHECK: V_TRUNC_F32 diff --git a/llvm/test/CodeGen/R600/rotr.ll b/llvm/test/CodeGen/R600/rotr.ll index 5c4c4e93c218..c3995b64eb50 100644 --- a/llvm/test/CodeGen/R600/rotr.ll +++ b/llvm/test/CodeGen/R600/rotr.ll @@ -19,7 +19,8 @@ entry: ; R600-CHECK: @rotl ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x ; R600-CHECK-NEXT: 32 -; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} +; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} + ; SI-CHECK: @rotl ; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}} diff --git a/llvm/test/CodeGen/R600/selectcc-cnd.ll b/llvm/test/CodeGen/R600/selectcc-cnd.ll index d7287b487896..0bfca6937488 100644 --- a/llvm/test/CodeGen/R600/selectcc-cnd.ll +++ b/llvm/test/CodeGen/R600/selectcc-cnd.ll @@ -1,8 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE -;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x, -;CHECK-NEXT: {{[-0-9]+\(2.0}} +;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x, +;CHECK: 1073741824 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { %1 = load float addrspace(1)* %in %2 = fcmp oeq float %1, 0.0 diff --git a/llvm/test/CodeGen/R600/selectcc-cnde-int.ll b/llvm/test/CodeGen/R600/selectcc-cnde-int.ll index 768dc7dbf418..d568888f7cb2 100644 --- a/llvm/test/CodeGen/R600/selectcc-cnde-int.ll +++ b/llvm/test/CodeGen/R600/selectcc-cnde-int.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE_INT -;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x, +;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x, ;CHECK-NEXT: 2 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %1 = load i32 addrspace(1)* %in diff --git a/llvm/test/CodeGen/R600/set-dx10.ll b/llvm/test/CodeGen/R600/set-dx10.ll index 291a7bd85ac8..bdc2ff40b76b 100644 --- a/llvm/test/CodeGen/R600/set-dx10.ll +++ b/llvm/test/CodeGen/R600/set-dx10.ll @@ -5,7 +5,8 @@ ; SET*DX10 instructions. ; CHECK: @fcmp_une_select_fptosi -; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -18,7 +19,8 @@ entry: } ; CHECK: @fcmp_une_select_i32 -; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -29,7 +31,8 @@ entry: } ; CHECK: @fcmp_ueq_select_fptosi -; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -42,7 +45,8 @@ entry: } ; CHECK: @fcmp_ueq_select_i32 -; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -53,7 +57,8 @@ entry: } ; CHECK: @fcmp_ugt_select_fptosi -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -66,7 +71,8 @@ entry: } ; CHECK: @fcmp_ugt_select_i32 -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -77,7 +83,8 @@ entry: } ; CHECK: @fcmp_uge_select_fptosi -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -90,7 +97,8 @@ entry: } ; CHECK: @fcmp_uge_select_i32 -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -101,7 +109,8 @@ entry: } ; CHECK: @fcmp_ule_select_fptosi -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -114,7 +123,8 @@ entry: } ; CHECK: @fcmp_ule_select_i32 -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -125,7 +135,8 @@ entry: } ; CHECK: @fcmp_ult_select_fptosi -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -138,7 +149,8 @@ entry: } ; CHECK: @fcmp_ult_select_i32 -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { entry: diff --git a/llvm/test/CodeGen/R600/store.ll b/llvm/test/CodeGen/R600/store.ll index 2609dab422f3..341c42791a1c 100644 --- a/llvm/test/CodeGen/R600/store.ll +++ b/llvm/test/CodeGen/R600/store.ll @@ -10,15 +10,15 @@ ; EG-CHECK: @store_i8 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X ; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]] -; IG 0: Get the byte index -; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; IG 0: Get the byte index and truncate the value +; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y +; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43) +; IG 1: Truncate the calculated the shift amount for the mask +; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x ; EG-CHECK-NEXT: 3 -; IG 1: Truncate the value and calculated the shift amount for the mask -; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x -; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y -; EG-CHECK: 255(3.573311e-43), 3 ; IG 2: Shift the value and the mask -; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] +; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] ; EG-CHECK-NEXT: 255 ; IG 3: Initialize the Y and Z channels to zero @@ -39,15 +39,15 @@ entry: ; EG-CHECK: @store_i16 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X ; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]] -; IG 0: Get the byte index -; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x -; EG-CHECK-NEXT: 3 -; IG 1: Truncate the value and calculated the shift amount for the mask -; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x -; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y -; EG-CHECK: 65535(9.183409e-41), 3 +; IG 0: Get the byte index and truncate the value +; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x +; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y +; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41) +; IG 1: Truncate the calculated the shift amount for the mask +; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x +; EG-CHECK: 3 ; IG 2: Shift the value and the mask -; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] +; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] ; EG-CHECK-NEXT: 65535 ; IG 3: Initialize the Y and Z channels to zero diff --git a/llvm/test/CodeGen/R600/sub.ll b/llvm/test/CodeGen/R600/sub.ll index 596400e50966..c7fed03d9492 100644 --- a/llvm/test/CodeGen/R600/sub.ll +++ b/llvm/test/CodeGen/R600/sub.ll @@ -2,8 +2,8 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ;EG-CHECK: @test2 -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test2 ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} @@ -19,10 +19,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { } ;EG-CHECK: @test4 -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test4 ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} diff --git a/llvm/test/CodeGen/R600/unsupported-cc.ll b/llvm/test/CodeGen/R600/unsupported-cc.ll index cf29833b1913..d3aa060adc0e 100644 --- a/llvm/test/CodeGen/R600/unsupported-cc.ll +++ b/llvm/test/CodeGen/R600/unsupported-cc.ll @@ -3,7 +3,8 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK: @slt -; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: @@ -14,7 +15,8 @@ entry: } ; CHECK: @ult_i32 -; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -25,7 +27,8 @@ entry: } ; CHECK: @ult_float -; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float(float addrspace(1)* %out, float %in) { entry: @@ -36,7 +39,8 @@ entry: } ; CHECK: @olt -; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ;CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: @@ -47,7 +51,8 @@ entry: } ; CHECK: @sle -; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: @@ -58,7 +63,8 @@ entry: } ; CHECK: @ule_i32 -; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -69,7 +75,8 @@ entry: } ; CHECK: @ule_float -; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float(float addrspace(1)* %out, float %in) { entry: @@ -80,7 +87,8 @@ entry: } ; CHECK: @ole -; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/llvm/test/CodeGen/R600/vselect.ll b/llvm/test/CodeGen/R600/vselect.ll index 72a90849a78d..8e9c5b59cd1b 100644 --- a/llvm/test/CodeGen/R600/vselect.ll +++ b/llvm/test/CodeGen/R600/vselect.ll @@ -2,8 +2,8 @@ ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s ;EG-CHECK: @test_select_v2i32 -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test_select_v2i32 ;SI-CHECK: V_CNDMASK_B32_e64 @@ -20,8 +20,8 @@ entry: } ;EG-CHECK: @test_select_v2f32 -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test_select_v2f32 ;SI-CHECK: V_CNDMASK_B32_e64 @@ -38,10 +38,10 @@ entry: } ;EG-CHECK: @test_select_v4i32 -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test_select_v4i32 ;SI-CHECK: V_CNDMASK_B32_e64 @@ -60,10 +60,10 @@ entry: } ;EG-CHECK: @test_select_v4f32 -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) { entry: diff --git a/llvm/test/CodeGen/R600/work-item-intrinsics.ll b/llvm/test/CodeGen/R600/work-item-intrinsics.ll index 26ef304d1f52..d5fe79fbdd3d 100644 --- a/llvm/test/CodeGen/R600/work-item-intrinsics.ll +++ b/llvm/test/CodeGen/R600/work-item-intrinsics.ll @@ -3,7 +3,7 @@ ; R600-CHECK: @ngroups_x ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[0].X +; R600-CHECK: MOV [[VAL]], KC0[0].X ; SI-CHECK: @ngroups_x ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -17,7 +17,7 @@ entry: ; R600-CHECK: @ngroups_y ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[0].Y +; R600-CHECK: MOV [[VAL]], KC0[0].Y ; SI-CHECK: @ngroups_y ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -31,7 +31,7 @@ entry: ; R600-CHECK: @ngroups_z ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[0].Z +; R600-CHECK: MOV [[VAL]], KC0[0].Z ; SI-CHECK: @ngroups_z ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -45,7 +45,7 @@ entry: ; R600-CHECK: @global_size_x ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[0].W +; R600-CHECK: MOV [[VAL]], KC0[0].W ; SI-CHECK: @global_size_x ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -59,7 +59,7 @@ entry: ; R600-CHECK: @global_size_y ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[1].X +; R600-CHECK: MOV [[VAL]], KC0[1].X ; SI-CHECK: @global_size_y ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -73,7 +73,7 @@ entry: ; R600-CHECK: @global_size_z ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[1].Y +; R600-CHECK: MOV [[VAL]], KC0[1].Y ; SI-CHECK: @global_size_z ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -87,7 +87,7 @@ entry: ; R600-CHECK: @local_size_x ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[1].Z +; R600-CHECK: MOV [[VAL]], KC0[1].Z ; SI-CHECK: @local_size_x ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -101,7 +101,7 @@ entry: ; R600-CHECK: @local_size_y ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[1].W +; R600-CHECK: MOV [[VAL]], KC0[1].W ; SI-CHECK: @local_size_y ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -115,7 +115,7 @@ entry: ; R600-CHECK: @local_size_z ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; R600-CHECK: MOV * [[VAL]], KC0[2].X +; R600-CHECK: MOV [[VAL]], KC0[2].X ; SI-CHECK: @local_size_z ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] diff --git a/llvm/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/llvm/test/CodeGen/R600/wrong-transalu-pos-fix.ll new file mode 100644 index 000000000000..c158076f8c95 --- /dev/null +++ b/llvm/test/CodeGen/R600/wrong-transalu-pos-fix.ll @@ -0,0 +1,89 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; We want all MULLO_INT inst to be last in their instruction group +;CHECK: @fill3d +;CHECK-NOT: MULLO_INT T[0-9]+ + +; ModuleID = 'radeon' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" +target triple = "r600--" + +; Function Attrs: nounwind +define void @fill3d(i32 addrspace(1)* nocapture %out) #0 { +entry: + %x.i = tail call i32 @llvm.r600.read.global.size.x() #1 + %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1 + %mul = mul i32 %y.i18, %x.i + %z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1 + %mul3 = mul i32 %mul, %z.i17 + %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1 + %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1 + %mul26.i = mul i32 %x.i12.i, %x.i.i + %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %add.i16 = add i32 %x.i4.i, %mul26.i + %mul7 = mul i32 %add.i16, %y.i18 + %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1 + %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1 + %mul30.i = mul i32 %y.i14.i, %y.i.i + %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1 + %add.i14 = add i32 %mul30.i, %mul7 + %mul819 = add i32 %add.i14, %y.i6.i + %add = mul i32 %mul819, %z.i17 + %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1 + %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1 + %mul33.i = mul i32 %z.i16.i, %z.i.i + %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1 + %add.i = add i32 %z.i8.i, %mul33.i + %add13 = add i32 %add.i, %add + %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13 + store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4, !tbaa !3 + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tgid.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tgid.y() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tgid.z() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.local.size.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.local.size.y() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.local.size.z() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.y() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.z() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.global.size.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.global.size.y() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.global.size.z() #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!opencl.kernels = !{!0, !1, !2} + +!0 = metadata !{null} +!1 = metadata !{null} +!2 = metadata !{void (i32 addrspace(1)*)* @fill3d} +!3 = metadata !{metadata !"int", metadata !4} +!4 = metadata !{metadata !"omnipotent char", metadata !5} +!5 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/R600/xor.ll b/llvm/test/CodeGen/R600/xor.ll index 84d4cd49e931..fa5cb209cd0f 100644 --- a/llvm/test/CodeGen/R600/xor.ll +++ b/llvm/test/CodeGen/R600/xor.ll @@ -2,8 +2,8 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ;EG-CHECK: @xor_v2i32 -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @xor_v2i32 ;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} @@ -19,10 +19,10 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in } ;EG-CHECK: @xor_v4i32 -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @xor_v4i32 ;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} @@ -39,7 +39,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in } ;EG-CHECK: @xor_i1 -;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}} +;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} ;SI-CHECK: @xor_i1 ;SI-CHECK: S_XOR_B64 {{SGPR[0-9]+_SGPR[0-9]+, SGPR[0-9]+_SGPR[0-9]+, SGPR[0-9]+_SGPR[0-9]+}}