From 3ecab8e4555aee0b4aa10c413696a67f55948c39 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 19 Sep 2019 16:26:14 +0000 Subject: [PATCH] Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics" This reverts r372314, reapplying r372285 and the commits which depend on it (r372286-r372293, and r372296-r372297) This was missing one switch to getTargetConstant in an untested case. llvm-svn: 372338 --- .../CodeGen/GlobalISel/InstructionSelector.h | 5 + .../GlobalISel/InstructionSelectorImpl.h | 14 +- llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h | 3 + .../include/llvm/Target/TargetSelectionDAG.td | 5 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 27 +- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 10 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 18 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 4 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 279 +++++- .../Target/AMDGPU/AMDGPUInstructionSelector.h | 7 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 60 ++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 5 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 399 ++++++++- .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 40 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 + llvm/lib/Target/AMDGPU/BUFInstructions.td | 110 +-- llvm/lib/Target/AMDGPU/DSInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 104 +-- llvm/lib/Target/AMDGPU/SIInstructions.td | 12 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 14 +- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 18 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 44 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 24 +- llvm/lib/Target/ARM/ARMInstrInfo.td | 52 +- llvm/lib/Target/ARM/ARMInstrThumb2.td | 48 +- .../Target/Hexagon/HexagonDepMapAsm2Intrin.td | 696 +++++++-------- llvm/lib/Target/Hexagon/HexagonDepOperands.td | 83 +- llvm/lib/Target/Hexagon/HexagonIntrinsics.td | 46 +- llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td | 4 +- llvm/lib/Target/Mips/Mips64InstrInfo.td | 1 + llvm/lib/Target/Mips/MipsDSPInstrInfo.td | 19 +- llvm/lib/Target/Mips/MipsInstrInfo.td | 2 + llvm/lib/Target/Mips/MipsMSAInstrInfo.td | 55 +- llvm/lib/Target/Mips/MipsSEISelLowering.cpp | 3 +- llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 6 +- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 4 +- llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 8 +- .../Target/SystemZ/SystemZISelDAGToDAG.cpp | 6 +- .../Target/SystemZ/SystemZISelLowering.cpp | 49 +- .../lib/Target/SystemZ/SystemZInstrFormats.td | 166 ++-- llvm/lib/Target/SystemZ/SystemZInstrVector.td | 18 +- llvm/lib/Target/SystemZ/SystemZOperands.td | 121 +-- llvm/lib/Target/SystemZ/SystemZOperators.td | 6 +- llvm/lib/Target/SystemZ/SystemZPatterns.td | 4 +- .../SystemZ/SystemZSelectionDAGInfo.cpp | 8 +- .../WebAssembly/WebAssemblyInstrBulkMemory.td | 4 +- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 12 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 270 +++--- llvm/lib/Target/X86/X86InstrAVX512.td | 224 ++--- llvm/lib/Target/X86/X86InstrMMX.td | 8 +- llvm/lib/Target/X86/X86InstrSSE.td | 204 ++--- llvm/lib/Target/X86/X86InstrSystem.td | 2 +- llvm/lib/Target/X86/X86InstrTSX.td | 2 +- llvm/lib/Target/X86/X86InstrXOP.td | 16 +- .../AArch64/GlobalISel/arm64-irtranslator.ll | 3 +- .../GlobalISel/inst-select-amdgcn.exp.mir | 12 +- .../inst-select-amdgcn.s.sendmsg.mir | 3 +- .../GlobalISel/irtranslator-amdgcn-sendmsg.ll | 15 + .../GlobalISel/irtranslator-amdgpu_ps.ll | 12 +- .../GlobalISel/irtranslator-amdgpu_vs.ll | 9 +- .../irtranslator-struct-return-intrinsics.ll | 5 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 519 ++++++++++++ ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 314 +++++++ .../llvm.amdgcn.raw.buffer.store.ll | 791 ++++++++++++++++++ .../AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll | 45 + .../GlobalISel/regbankselect-amdgcn-exp.mir | 40 +- .../regbankselect-amdgcn.ds.swizzle.mir | 21 + .../regbankselect-amdgcn.image.load.1d.ll | 181 ++++ .../regbankselect-amdgcn.image.sample.1d.ll | 268 ++++++ .../regbankselect-amdgcn.raw.buffer.load.ll | 173 ++++ ...regbankselect-amdgcn.struct.buffer.load.ll | 179 ++++ ...egbankselect-amdgcn.struct.buffer.store.ll | 174 ++++ .../AMDGPU/GlobalISel/regbankselect-smulh.mir | 62 +- .../AMDGPU/GlobalISel/regbankselect-umulh.mir | 62 +- ...ssert-dead-def-subreg-use-other-subreg.mir | 70 ++ llvm/test/TableGen/immarg.td | 31 + llvm/utils/TableGen/GlobalISelEmitter.cpp | 27 +- 79 files changed, 5010 insertions(+), 1365 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll create mode 100644 llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir create mode 100644 llvm/test/TableGen/immarg.td diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index e4877b7ad639..fd3dc743000b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -220,6 +220,11 @@ enum { /// - OpIdx - Operand index GIM_CheckIsMBB, + /// Check the specified operand is an Imm + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_CheckIsImm, + /// Check if the specified operand is safe to fold into the current /// instruction. /// - InsnID - Instruction ID diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index bcb1df64a465..08f2f54bcf90 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -690,7 +690,19 @@ bool InstructionSelector::executeMatchTable( } break; } - + case GIM_CheckIsImm: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "))\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckIsSafeToFold: { int64_t InsnID = MatchTable[CurrentIdx++]; DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index 0ddcca0932ec..eaa55c911b81 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -374,6 +374,9 @@ namespace llvm { /// Returns a mask for which lanes get read/written by the given (register) /// machine operand. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + /// Returns true if the def register in \p MO has no uses. + bool deadDefHasNoUse(const MachineOperand &MO); }; /// Creates a new SUnit and return a ptr to it. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index f42faad73602..8856cf003af0 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -848,6 +848,11 @@ class ImmLeaf : ImmLeaf; + // An ImmLeaf except that Imm is an APInt. This is useful when you need to // zero-extend the immediate instead of sign-extend it. // diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 66082df0108b..c44532e240f8 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1617,14 +1617,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : CI.arg_operands()) { + for (auto &Arg : enumerate(CI.arg_operands())) { // Some intrinsics take metadata parameters. Reject them. - if (isa(Arg)) + if (isa(Arg.value())) return false; - ArrayRef VRegs = getOrCreateVRegs(*Arg); - if (VRegs.size() > 1) - return false; - MIB.addUse(VRegs[0]); + + // If this is required to be an immediate, don't materialize it in a + // register. + if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (ConstantInt *CI = dyn_cast(Arg.value())) { + // imm arguments are more convenient than cimm (and realistically + // probably sufficient), so use them. + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + MIB.addImm(CI->getSExtValue()); + } else { + MIB.addFPImm(cast(Arg.value())); + } + } else { + ArrayRef VRegs = getOrCreateVRegs(*Arg.value()); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); + } } // Add a MachineMemOperand if it is a target mem intrinsic. diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index af07f8f948f9..92420e697cea 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -373,6 +373,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const return TRI->getSubRegIndexLaneMask(SubReg); } +bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) { + auto RegUse = CurrentVRegUses.find(MO.getReg()); + if (RegUse == CurrentVRegUses.end()) + return true; + return (RegUse->LaneMask & getLaneMaskForMO(MO)).none(); +} + /// Adds register output and data dependencies from this SUnit to instructions /// that occur later in the same scheduling region if they read from or write to /// the virtual register defined at OperIdx. @@ -402,8 +409,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { } if (MO.isDead()) { - assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && - "Dead defs should have no uses"); + assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); } else { // Add data dependence to all uses we found so far. const TargetSubtargetInfo &ST = MF.getSubtarget(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ce3255c081be..98f7d877c05e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4768,8 +4768,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - SDValue Op = getValue(I.getArgOperand(i)); - Ops.push_back(Op); + const Value *Arg = I.getArgOperand(i); + if (!I.paramHasAttr(i, Attribute::ImmArg)) { + Ops.push_back(getValue(Arg)); + continue; + } + + // Use TargetConstant instead of a regular constant for immarg. + EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + if (const ConstantInt *CI = dyn_cast(Arg)) { + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); + } else { + Ops.push_back( + DAG.getTargetConstantFP(*cast(Arg), SDLoc(), VT)); + } } SmallVector ValueVTs; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 19b10fc0806f..af61521d4b48 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -685,11 +685,11 @@ def logical_imm64_not : Operand { // iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535]. let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in { -def i32_imm0_65535 : Operand, ImmLeaf, TImmLeaf; -def i64_imm0_65535 : Operand, ImmLeaf, TImmLeaf; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index df234223be6b..d89229865e4f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -798,7 +798,7 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>, + [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 217b39969961..73486b969f45 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -245,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, } } -static int64_t getConstant(const MachineInstr *MI) { - return MI->getOperand(1).getCImm()->getSExtValue(); -} - static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { switch (Opc) { case AMDGPU::G_AND: @@ -737,6 +734,260 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, .addImm(Enabled); } +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + if (mi_match(Reg, MRI, m_ICst(C)) && C == 0) + return true; + + // FIXME: matcher should ignore copies + return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0; +} + +static unsigned extractGLC(unsigned CachePolicy) { + return CachePolicy & 1; +} + +static unsigned extractSLC(unsigned CachePolicy) { + return (CachePolicy >> 1) & 1; +} + +static unsigned extractDLC(unsigned CachePolicy) { + return (CachePolicy >> 2) & 1; +} + +// Returns Base register, constant offset, and offset def point. +static std::tuple +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (!Def) + return {Reg, 0, nullptr}; + + if (Def->getOpcode() == AMDGPU::G_CONSTANT) { + unsigned Offset; + const MachineOperand &Op = Def->getOperand(1); + if (Op.isImm()) + Offset = Op.getImm(); + else + Offset = Op.getCImm()->getZExtValue(); + + return {Register(), Offset, Def}; + } + + int64_t Offset; + if (Def->getOpcode() == AMDGPU::G_ADD) { + // TODO: Handle G_OR used for add case + if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset))) + return {Def->getOperand(0).getReg(), Offset, Def}; + + // FIXME: matcher should ignore copies + if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset)))) + return {Def->getOperand(0).getReg(), Offset, Def}; + } + + return {Reg, 0, Def}; +} + +static unsigned getBufferStoreOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + const int Size = Ty.getSizeInBits(); + switch (8 * MemSize) { + case 8: + return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + case 16: + return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + default: + unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + return Opc; + } +} + +static unsigned getBufferStoreFormatOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + bool IsD16Packed = Ty.getScalarSizeInBits() == 16; + bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits(); + int NumElts = Ty.isVector() ? Ty.getNumElements() : 1; + + if (IsD16Packed) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact; + default: + return -1; + } + } + + if (IsD16Unpacked) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact; + default: + return -1; + } + } + + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact; + default: + return -1; + } + + llvm_unreachable("unhandled buffer store"); +} + +// TODO: Move this to combiner +// Returns base register, imm offset, total constant offset. +std::tuple +AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned TotalConstOffset; + MachineInstr *OffsetDef; + MachineRegisterInfo &MRI = *B.getMRI(); + + std::tie(BaseReg, TotalConstOffset, OffsetDef) + = getBaseWithConstantOffset(MRI, OrigOffset); + + unsigned ImmOffset = TotalConstOffset; + + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store.f + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + if (Overflow != 0) { + // In case this is in a waterfall loop, insert offset code at the def point + // of the offset, not inside the loop. + MachineBasicBlock::iterator OldInsPt = B.getInsertPt(); + MachineBasicBlock &OldMBB = B.getMBB(); + B.setInstr(*OffsetDef); + + if (!BaseReg) { + BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(BaseReg) + .addImm(Overflow); + } else { + Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(OverflowVal) + .addImm(Overflow); + + Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg) + .addReg(BaseReg) + .addReg(OverflowVal, RegState::Kill) + .addImm(0); + BaseReg = NewBaseReg; + } + + B.setInsertPt(OldMBB, OldInsPt); + } + + return {BaseReg, ImmOffset, TotalConstOffset}; +} + +bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, + bool IsFormat) const { + MachineIRBuilder B(MI); + MachineRegisterInfo &MRI = *B.getMRI(); + MachineFunction &MF = B.getMF(); + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + int Size = Ty.getSizeInBits(); + if (Size % 32 != 0) + return false; + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + MachineMemOperand *MMO = *MI.memoperands_begin(); + const int MemSize = MMO->getSize(); + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned CachePolicy = MI.getOperand(5).getImm(); + unsigned ImmOffset; + unsigned TotalOffset; + + std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset); + if (TotalOffset != 0) + MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize); + + const bool Offen = !isZero(VOffset, MRI); + + int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) : + getBufferStoreOpcode(Ty, MemSize, Offen); + if (Opc == -1) + return false; + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(CachePolicy)) + .addImm(extractSLC(CachePolicy)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(CachePolicy)) + .addMemOperand(MMO); + + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -746,10 +997,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::amdgcn_exp: { - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); + int64_t Done = I.getOperand(7).getImm(); + int64_t VM = I.getOperand(8).getImm(); MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), I.getOperand(4).getReg(), @@ -762,13 +1013,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( } case Intrinsic::amdgcn_exp_compr: { const DebugLoc &DL = I.getDebugLoc(); - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); Register Reg0 = I.getOperand(3).getReg(); Register Reg1 = I.getOperand(4).getReg(); Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); + int64_t Done = I.getOperand(5).getImm(); + int64_t VM = I.getOperand(6).getImm(); BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, @@ -791,6 +1042,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); return true; } + case Intrinsic::amdgcn_raw_buffer_store: + return selectStoreIntrinsic(I, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return selectStoreIntrinsic(I, true); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 67a5062ca395..8c6e89761945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -35,6 +35,7 @@ class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; class GCNSubtarget; class MachineInstr; +class MachineIRBuilder; class MachineOperand; class MachineRegisterInfo; class SIInstrInfo; @@ -82,6 +83,12 @@ private: bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; + + std::tuple + splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const; + + bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; + bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; bool selectG_ICMP(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index abfb45259359..02b3b4210267 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1751,6 +1751,62 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, return true; } +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!ST.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + LLT StoreVT = MRI.getType(Reg); + assert(StoreVT.isVector() && StoreVT.getElementType() == S16); + + auto Unmerge = B.buildUnmerge(S16, Reg); + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + + int NumElts = StoreVT.getNumElements(); + + return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + bool IsFormat) const { + // TODO: Reject f16 format on targets where unsupported. + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + B.setInstr(MI); + + const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); + + // Fixup illegal register types for i8 stores. + if (Ty == LLT::scalar(8) || Ty == S16) { + Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0); + MI.getOperand(1).setReg(AnyExt); + return true; + } + + if (Ty.isVector()) { + if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) { + if (IsFormat) + MI.getOperand(1).setReg(handleD16VData(B, MRI, VData)); + return true; + } + + return Ty.getElementType() == S32 && Ty.getNumElements() <= 4; + } + + return Ty == S32; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1843,6 +1899,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MI.eraseFromParent(); return true; } + case Intrinsic::amdgcn_raw_buffer_store: + return legalizeRawBufferStore(MI, MRI, B, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return legalizeRawBufferStore(MI, MRI, B, true); default: return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 99564a04dbb2..855444fa2762 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -83,6 +83,11 @@ public: MachineIRBuilder &B) const; bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool IsFormat) const; bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 92d5a5d07c7d..0032d0468627 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -33,6 +34,7 @@ #include "AMDGPUGenRegisterBankInfo.def" using namespace llvm; +using namespace MIPatternMatch; namespace { @@ -84,9 +86,11 @@ public: }; } -AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) +AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterBankInfo(), - TRI(static_cast(&TRI)) { + Subtarget(ST), + TRI(Subtarget.getRegisterInfo()), + TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. static bool AlreadyInit = false; @@ -638,8 +642,10 @@ static LLT getHalfSizedType(LLT Ty) { /// /// There is additional complexity to try for compare values to identify the /// unique values used. -void AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineInstr &MI, MachineRegisterInfo &MRI, +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineIRBuilder &B, + MachineInstr &MI, + MachineRegisterInfo &MRI, ArrayRef OpIndices) const { MachineFunction *MF = MI.getParent()->getParent(); const GCNSubtarget &ST = MF->getSubtarget(); @@ -662,9 +668,8 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( // No operands need to be replaced, so no need to loop. if (SGPROperandRegs.empty()) - return; + return false; - MachineIRBuilder B(MI); SmallVector ResultRegs; SmallVector InitResultRegs; SmallVector PhiRegs; @@ -922,6 +927,18 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( B.buildInstr(AMDGPU::S_MOV_B64_term) .addDef(AMDGPU::EXEC) .addReg(SaveExecReg); + + // Restore the insert point before the original instruction. + B.setInsertPt(MBB, MBB.end()); + + return true; +} + +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineInstr &MI, MachineRegisterInfo &MRI, + ArrayRef OpIndices) const { + MachineIRBuilder B(MI); + return executeInWaterfallLoop(B, MI, MRI, OpIndices); } // Legalize an operand that must be an SGPR by inserting a readfirstlane. @@ -1031,6 +1048,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, return true; } +bool AMDGPURegisterBankInfo::applyMappingImage( + MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RsrcIdx) const { + const int NumDefs = MI.getNumExplicitDefs(); + + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += NumDefs + 1; + + // Insert copies to VGPR arguments. + applyDefaultMapping(OpdMapper); + + // Fixup any SGPR arguments. + SmallVector SGPRIndexes; + for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + // If this intrinsic has a sampler, it immediately follows rsrc. + if (I == RsrcIdx || I == RsrcIdx + 1) + SGPRIndexes.push_back(I); + } + + executeInWaterfallLoop(MI, MRI, SGPRIndexes); + return true; +} + // For cases where only a single copy is inserted for matching register banks. // Replace the register in the instruction operand static void substituteSimpleCopyRegs( @@ -1042,6 +1086,184 @@ static void substituteSimpleCopyRegs( } } +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!Subtarget.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + LLT StoreVT = MRI.getType(Reg); + if (!StoreVT.isVector() || StoreVT.getElementType() != S16) + return Reg; + + auto Unmerge = B.buildUnmerge(S16, Reg); + + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(Unmerge.getReg(I)); + + const LLT S32 = LLT::scalar(32); + int NumElts = StoreVT.getNumElements(); + + return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +static std::pair +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + int64_t Const; + if (mi_match(Reg, MRI, m_ICst(Const))) + return std::make_pair(Register(), Const); + + Register Base; + if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const)))) + return std::make_pair(Base, Const); + + // TODO: Handle G_OR used for add case + return std::make_pair(Reg, 0); +} + +std::pair +AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned ImmOffset; + const LLT S32 = LLT::scalar(32); + + std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(), + OrigOffset); + + unsigned C1 = 0; + if (ImmOffset != 0) { + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store. + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + C1 = ImmOffset; + if (Overflow != 0) { + if (!BaseReg) + BaseReg = B.buildConstant(S32, Overflow).getReg(0); + else { + auto OverflowVal = B.buildConstant(S32, Overflow); + BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0); + } + } + } + + if (!BaseReg) + BaseReg = B.buildConstant(S32, 0).getReg(0); + + return {BaseReg, C1}; +} + +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + return mi_match(Reg, MRI, m_ICst(C)) && C == 0; +} + +static unsigned extractGLC(unsigned CachePolicy) { + return CachePolicy & 1; +} + +static unsigned extractSLC(unsigned CachePolicy) { + return (CachePolicy >> 1) & 1; +} + +static unsigned extractDLC(unsigned CachePolicy) { + return (CachePolicy >> 2) & 1; +} + +MachineInstr * +AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const { + MachineRegisterInfo &MRI = *B.getMRI(); + executeInWaterfallLoop(B, MI, MRI, {2, 4}); + + // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer. + + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + int EltSize = Ty.getScalarSizeInBits(); + int Size = Ty.getSizeInBits(); + + // FIXME: Broken integer truncstore. + if (EltSize != 32) + report_fatal_error("unhandled intrinsic store"); + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + const int MemSize = (*MI.memoperands_begin())->getSize(); + + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned CachePolicy = MI.getOperand(5).getImm(); + + unsigned ImmOffset; + std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset); + + const bool Offen = !isZero(VOffset, MRI); + + unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact; + switch (8 * MemSize) { + case 8: + Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + break; + case 16: + Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + break; + default: + Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + break; + } + + + // Set the insertion point back to the instruction in case it was moved into a + // loop. + B.setInstr(MI); + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(CachePolicy)) + .addImm(extractSLC(CachePolicy)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(CachePolicy)) + .cloneMemRefs(MI); + + // FIXME: We need a way to report failure from applyMappingImpl. + // Insert constrain copies before inserting the loop. + if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this)) + report_fatal_error("failed to constrain selected store intrinsic"); + + return MIB; +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -1405,7 +1627,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + auto IntrID = MI.getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_buffer_load: { executeInWaterfallLoop(MI, MRI, { 2 }); return; @@ -1424,9 +1647,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } - default: + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 4}); + return; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_struct_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 5}); + return; + } + default: { + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) { + applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + return; + } + } + break; } + } break; } case AMDGPU::G_LOAD: @@ -1531,6 +1784,45 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { MI.getNumOperands()); } +const RegisterBankInfo::InstructionMapping & +AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const { + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += MI.getNumExplicitDefs() + 1; + + const int NumOps = MI.getNumOperands(); + SmallVector OpdsMapping(NumOps); + + // TODO: Should packed/unpacked D16 difference be reported here as part of + // the value mapping? + for (int I = 0; I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + Register OpReg = MI.getOperand(I).getReg(); + unsigned Size = getSizeInBits(OpReg, MRI, *TRI); + + // FIXME: Probably need a new intrinsic register bank searchable table to + // handle arbitrary intrinsics easily. + // + // If this has a sampler, it immediately follows rsrc. + const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1; + + if (MustBeSGPR) { + // If this must be an SGPR, so we must report whatever it is as legal. + unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size); + } else { + // Some operands must be VGPR, and these are easy to copy to. + OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + } + } + + return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps); +} + const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { @@ -1577,11 +1869,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, return Bank ? Bank->getID() : Default; } + static unsigned regBankUnion(unsigned RB0, unsigned RB1) { return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; } +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + // Lie and claim anything is legal, even though this needs to be an SGPR + // applyMapping will have to deal with it as a waterfall loop. + unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID); + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(Bank, Size); +} + +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -1748,7 +2060,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLVM_FALLTHROUGH; } - case AMDGPU::G_GEP: case AMDGPU::G_ADD: case AMDGPU::G_SUB: @@ -1764,8 +2075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_SADDE: case AMDGPU::G_USUBE: case AMDGPU::G_SSUBE: - case AMDGPU::G_UMULH: - case AMDGPU::G_SMULH: case AMDGPU::G_SMIN: case AMDGPU::G_SMAX: case AMDGPU::G_UMIN: @@ -1799,6 +2108,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_ROUND: return getDefaultMappingVOP(MI); + case AMDGPU::G_UMULH: + case AMDGPU::G_SMULH: { + if (MF.getSubtarget().hasScalarMulHiInsts() && + isSALUMapping(MI)) + return getDefaultMappingSOP(MI); + return getDefaultMappingVOP(MI); + } case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); @@ -2072,6 +2388,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_wqm: return getDefaultMappingVOP(MI); + case Intrinsic::amdgcn_ds_swizzle: case Intrinsic::amdgcn_ds_permute: case Intrinsic::amdgcn_ds_bpermute: case Intrinsic::amdgcn_update_dpp: @@ -2193,9 +2510,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { - default: - return getInvalidInstructionMapping(); + auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_memtime: case Intrinsic::amdgcn_s_memrealtime: @@ -2235,18 +2551,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_exp: - OpdsMapping[0] = nullptr; // IntrinsicID - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); // FIXME: Could we support packed types here? OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_buffer_load: { Register RSrc = MI.getOperand(2).getReg(); // SGPR @@ -2298,6 +2607,54 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); break; } + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_tbuffer_load: { + // FIXME: Should make intrinsic ID the last operand of the instruction, + // then this would be the same as store + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_tbuffer_load: { + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + default: + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) + return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg); + } + + return getInvalidInstructionMapping(); } break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f3a96e2a6128..584b23c0c220 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -23,7 +23,9 @@ namespace llvm { class LLT; +class GCNSubtarget; class MachineIRBuilder; +class SIInstrInfo; class SIRegisterInfo; class TargetRegisterInfo; @@ -36,9 +38,15 @@ protected: #include "AMDGPUGenRegisterBank.inc" }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { + const GCNSubtarget &Subtarget; const SIRegisterInfo *TRI; + const SIInstrInfo *TII; - void executeInWaterfallLoop(MachineInstr &MI, + bool executeInWaterfallLoop(MachineIRBuilder &B, + MachineInstr &MI, + MachineRegisterInfo &MRI, + ArrayRef OpIndices) const; + bool executeInWaterfallLoop(MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef OpIndices) const; @@ -47,6 +55,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { bool applyMappingWideLoad(MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const; + bool + applyMappingImage(MachineInstr &MI, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RSrcIdx) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + + std::pair + splitBufferOffsets(MachineIRBuilder &B, Register Offset) const; + + MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const; /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; @@ -58,6 +79,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + // Return a value mapping for an operand that is required to be an SGPR. + const ValueMapping *getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + // Return a value mapping for an operand that is required to be a VGPR. + const ValueMapping *getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Regs. This appropriately sets the regbank of the new registers. void split64BitValueForMapping(MachineIRBuilder &B, @@ -90,8 +121,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingAllVGPR( const MachineInstr &MI) const; + + const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const; + public: - AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI); + AMDGPURegisterBankInfo(const GCNSubtarget &STI); unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 3e556b46fa52..b1069d4a3198 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*this)); InstSelector.reset(new AMDGPUInstructionSelector( *this, *static_cast(RegBankInfo.get()), TM)); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c9d305a5bba8..bf7cf86bc42d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -555,6 +555,10 @@ public: return GFX9Insts; } + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 1af12721b64c..40887a3c56eb 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), @@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat; multiclass MUBUF_StoreIntrinsicPat { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns { def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _BOTHEN_RTN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN { def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFSET) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _BOTHEN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, timm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), @@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in { multiclass MTBUF_StoreIntrinsicPat { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, timm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) @@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index e1d53ae71a80..86c2db92acb4 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), + (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e459a3a4e0d0..690114297d6b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue}); unsigned CachePolicy = cast(GLC)->getZExtValue(); SDValue Ops[] = { - DAG.getEntryNode(), // Chain - Rsrc, // rsrc - DAG.getConstant(0, DL, MVT::i32), // vindex - {}, // voffset - {}, // soffset - {}, // offset - DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getEntryNode(), // Chain + Rsrc, // rsrc + DAG.getConstant(0, DL, MVT::i32), // vindex + {}, // voffset + {}, // soffset + {}, // offset + DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; // Use the alignment to ensure that the required offsets will fit into the @@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, uint64_t InstOffset = cast(Ops[5])->getZExtValue(); for (unsigned i = 0; i < NumLoads; ++i) { - Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32); + Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32); Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops, LoadVT, MMO)); } @@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers S, // Src2 - holds two f16 values selected by high - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32) // $omod + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32) // $omod }; return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops); } else { @@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32), // $omod + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32), // $omod Glue }; return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops); @@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2), // Src0 Op.getOperand(3), // Attrchan Op.getOperand(4), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(1), // Src2 - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(5), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp Glue }; return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops); @@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); @@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // soffset Offsets.second, // offset Op.getOperand(5), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; return lowerIntrinsicLoad(cast(Op), IsFormat, DAG, Ops); @@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; return lowerIntrinsicLoad(cast(Op), IsFormat, DAG, Ops); @@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // voffset Op.getOperand(5), // soffset Op.getOperand(6), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.second, // offset Op.getOperand(5), // format Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.second, // offset Op.getOperand(6), // format Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); EVT VT = Op.getValueType(); @@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); @@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); @@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); EVT VT = Op.getValueType(); @@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); @@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(7), // soffset Offsets.second, // offset Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); @@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // voffset Op.getOperand(6), // soffset Op.getOperand(7), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idexen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.second, // offset Op.getOperand(7), // format Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idexen + DAG.getTargetConstant(1, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.second, // offset Op.getOperand(6), // format Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idexen + DAG.getTargetConstant(0, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? @@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; unsigned Opc = IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; @@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; @@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); EVT VT = Op.getOperand(2).getValueType(); @@ -7084,7 +7084,7 @@ std::pair SITargetLowering::splitBufferOffsets( Overflow += ImmOffset; ImmOffset = 0; } - C1 = cast(DAG.getConstant(ImmOffset, DL, MVT::i32)); + C1 = cast(DAG.getTargetConstant(ImmOffset, DL, MVT::i32)); if (Overflow) { auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); if (!N0) @@ -7098,7 +7098,7 @@ std::pair SITargetLowering::splitBufferOffsets( if (!N0) N0 = DAG.getConstant(0, DL, MVT::i32); if (!C1) - C1 = cast(DAG.getConstant(0, DL, MVT::i32)); + C1 = cast(DAG.getTargetConstant(0, DL, MVT::i32)); return {N0, SDValue(C1, 0)}; } @@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) { Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); return; } } @@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, Subtarget, Align)) { Offsets[0] = N0; Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); return; } } Offsets[0] = CombinedOffset; Offsets[1] = DAG.getConstant(0, DL, MVT::i32); - Offsets[2] = DAG.getConstant(0, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); } // Handle 8 bit and 16 bit buffer loads diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3ae0da3545c6..3c0cc0051c68 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))] + [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan), + (i32 timm:$attr)))] >; let OtherPredicates = [has32BankLDS] in { @@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan), + (i32 timm:$attr)))]>; } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst" @@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan), + (i32 timm:$attr)))]>; } // End Uses = [M0, EXEC] diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0eb01434deb2..2cd4e1cbc077 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", - [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>; + [(int_amdgcn_s_waitcnt timm:$simm16)]>; def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; @@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the // maximum really 15 on VI? def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), - "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> { + "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; let Uses = [EXEC, M0] in { // FIXME: Should this be mayLoad+mayStore? def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", - [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>; + [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", - [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>; + [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; } // End Uses = [EXEC, M0] @@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; } def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", - [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_incperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; } def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", - [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_decperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in { // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_s_getreg imm:$simm16), + (int_amdgcn_s_getreg timm:$simm16), (S_GETREG_B32 (as_i16imm $simm16)) >; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 76abda9218f9..bea0c7bd080d 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo; let OtherPredicates = [isGFX8GFX9] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) @@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let OtherPredicates = [isGFX10Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)), + (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) >; def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl), (i32 0)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl), (i32 0)) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 753f63d3a74d..605425972b1c 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -112,7 +112,7 @@ class getVOP3ClampPat { class getVOP3MAIPat { list ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, - imm:$cbsz, imm:$abid, imm:$blgp))]; + timm:$cbsz, timm:$abid, timm:$blgp))]; } class VOP3Inst : @@ -453,13 +453,13 @@ let FPDPRounding = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; let Uses = [M0, EXEC] in { def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, - [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp)))]>; } // End Uses = [M0, EXEC] } // End FPDPRounding = 1 } // End renamedInGFX9 = 1 @@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1 let Uses = [M0, EXEC], FPDPRounding = 1 in { def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, - [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>, - [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; } // End Uses = [M0, EXEC], FPDPRounding = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 @@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in { } // End $vdst = $vdst_in, DisableEncoding $vdst_in def : GCNPat< - (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; def : GCNPat< - (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; } // End SubtargetPredicate = isGFX10Plus diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c7de50e89cfa..e30b878d84e4 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3116,12 +3116,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, // Load the current TEB (thread environment block) SDValue Ops[] = {Chain, - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(2, DL, MVT::i32)}; + DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getTargetConstant(15, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(13, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(2, DL, MVT::i32)}; SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); @@ -8910,12 +8910,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, // Under Power Management extensions, the cycle-count is: // mrc p15, #0, , c9, c13, #0 SDValue Ops[] = { N->getOperand(0), // Chain - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(9, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32) + DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getTargetConstant(15, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(9, DL, MVT::i32), + DAG.getTargetConstant(13, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32) }; SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 5396ffb7af5c..766c87ed8c54 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -5110,8 +5110,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt), def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]> { bits<4> opc1; bits<4> CRn; @@ -5134,8 +5134,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; @@ -5314,15 +5314,15 @@ multiclass LdSt2Cop pattern> { } } -defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; } // DecoderNamespace = "CoProc" @@ -5358,8 +5358,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, ComplexDeprecationPredicate<"MCR">; def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -5372,8 +5372,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; -def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; class MovRCopro2 pattern> @@ -5404,8 +5404,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -5419,9 +5419,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; -def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, - imm:$CRm, imm:$opc2), - (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, + timm:$CRm, timm:$opc2), + (MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; class MovRRCopro pattern = []> @@ -5447,8 +5447,8 @@ class MovRRCopro def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, imm:$CRm)]>; + [(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, timm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; @@ -5480,8 +5480,8 @@ class MovRRCopro2; + [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, timm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), @@ -6159,7 +6159,7 @@ def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, let mayLoad = 1, mayStore =1, hasSideEffects = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), NoItinerary, - [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; + [(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>; //===---------------------------------- // Atomic cmpxchg for -O0 diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index f8cea50408cf..98de7382ad9e 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -4175,15 +4175,15 @@ multiclass t2LdStCop op31_28, bit load, bit Dbit, string asm, list } let DecoderNamespace = "Thumb2CoProc" in { -defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; } @@ -4368,8 +4368,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, ComplexDeprecationPredicate<"MCR">; def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -4377,8 +4377,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Predicates = [IsThumb2, PreV8]; } def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", @@ -4402,24 +4402,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; -def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; -def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; /* from ARM core register to coprocessor */ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, - imm:$CRm)]>; + [(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2, + timm:$CRm)]>; def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, - GPR:$Rt2, imm:$CRm)]> { + [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt, + GPR:$Rt2, timm:$CRm)]> { let Predicates = [IsThumb2, PreV8]; } @@ -4439,8 +4439,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2), def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Inst{27-24} = 0b1110; bits<4> opc1; @@ -4465,8 +4465,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Inst{27-24} = 0b1110; bits<4> opc1; diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 2ce1419e4790..e4a2ba0ec29c 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -37,12 +37,12 @@ def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2), (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1), (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2), - (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -75,8 +75,8 @@ def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2), (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1), (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2), @@ -89,10 +89,10 @@ def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2), (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2), - (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2), - (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2), (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1), @@ -103,12 +103,12 @@ def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3), + (S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2), (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1), @@ -125,10 +125,10 @@ def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2), (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2), @@ -137,28 +137,28 @@ def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2), (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2), (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2), (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2), - (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2), (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2), (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2), (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2), @@ -177,10 +177,10 @@ def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -199,10 +199,10 @@ def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2), (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2), (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2), - (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2), - (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2), + (M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2), + (M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2), (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2), @@ -225,10 +225,10 @@ def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1), (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1), (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2), @@ -241,12 +241,12 @@ def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$sr (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2), (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4), - (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), + (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2), (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -259,8 +259,8 @@ def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1), (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2), - (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1), (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2), @@ -279,8 +279,8 @@ def: Pat<(int_hexagon_C2_any8 PredRegs:$src1), (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2), (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1), (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1), @@ -303,10 +303,10 @@ def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3) (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -323,34 +323,34 @@ def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleR (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1), - (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred_timm:$src1), + (A2_tfrsi s32_0ImmPred_timm:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2), (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2), (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2), (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2), @@ -381,10 +381,10 @@ def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3 (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3), - (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3), + (M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2), @@ -453,8 +453,8 @@ def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2), - (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1), @@ -469,8 +469,8 @@ def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2), (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2), - (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2), + (C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2), (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2), @@ -535,10 +535,10 @@ def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3 (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2), (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -557,30 +557,30 @@ def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3), + (S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2), (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3), - (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2), (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), @@ -605,14 +605,14 @@ def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1), (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2), (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2), - (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2), (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), @@ -633,8 +633,8 @@ def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3 (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1), (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3), - (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2), (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2), @@ -653,8 +653,8 @@ def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1), (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -669,32 +669,32 @@ def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2), (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2), (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2), - (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2), (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2), (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2), - (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2), - (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2), + (A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2), + (A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -703,8 +703,8 @@ def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2), (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1), (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -721,8 +721,8 @@ def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2), (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -745,10 +745,10 @@ def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1), - (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1), - (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred_timm:$src1), + (F2_sfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred_timm:$src1), + (F2_sfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -759,14 +759,14 @@ def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1), (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2), - (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2), + (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2), (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1), (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -783,8 +783,8 @@ def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2), (A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2), @@ -817,16 +817,16 @@ def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2), (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2), - (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1), (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2), - (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -837,8 +837,8 @@ def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2), (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -851,14 +851,14 @@ def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_satb IntRegs:$src1), (A2_satb IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4), - (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4), + (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2), (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2), @@ -925,8 +925,8 @@ def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2), (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3), - (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3), + (C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -937,8 +937,8 @@ def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -947,8 +947,8 @@ def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -957,16 +957,16 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3), - (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3), + (M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1), (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2), (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1005,8 +1005,8 @@ def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntR (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2), (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2), (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2), @@ -1017,10 +1017,10 @@ def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1), (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2), @@ -1035,22 +1035,22 @@ def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1), (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2), - (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2), + (F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3), - (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2), (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2), - (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2), (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2), @@ -1063,8 +1063,8 @@ def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2), (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -1131,12 +1131,12 @@ def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2), (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1), (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3), + (S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1), (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -1167,8 +1167,8 @@ def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2), - (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -1185,26 +1185,26 @@ def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1), (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1), - (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred_timm:$src1), + (F2_dfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2), (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1), - (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred_timm:$src1), + (F2_dfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3), - (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3), + (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3), - (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1215,20 +1215,20 @@ def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2), (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2), (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2), - (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2), (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2), - (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2), + (A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2), (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2), - (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2), - (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2), + (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2), @@ -1251,16 +1251,16 @@ def: Pat<(int_hexagon_A2_satub IntRegs:$src1), (A2_satub IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2), (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2), (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2), - (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2), - (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2), + (A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2), + (A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1), (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1), @@ -1269,14 +1269,14 @@ def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1), (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2), (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3), + (C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_swiz IntRegs:$src1), (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2), @@ -1295,44 +1295,44 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3), + (S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2), (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1), (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_tfr IntRegs:$src1), (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2), - (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2), + (A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2), (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_brev IntRegs:$src1), (S2_brev IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2), @@ -1343,8 +1343,8 @@ def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1), (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2), @@ -1357,24 +1357,24 @@ def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4), - (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4), + (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1), (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2), - (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2), (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2), (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2), (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2), - (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2), + (M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1397,10 +1397,10 @@ def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1), (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2), - (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1), (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -1423,8 +1423,8 @@ def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4), (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2), - (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1), (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1), @@ -1433,16 +1433,16 @@ def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2), - (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2), (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2), (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2), (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2), @@ -1471,14 +1471,14 @@ def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2), (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2), (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2), (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), @@ -1499,8 +1499,8 @@ def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2), - (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2), + (S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2), (S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2), @@ -1529,8 +1529,8 @@ def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2), (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1), (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2), (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1), @@ -1541,10 +1541,10 @@ def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1), (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2), - (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2), - (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2), + (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2), (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1563,12 +1563,12 @@ def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3), + (S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2), - (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2), + (S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1), @@ -1589,16 +1589,16 @@ def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$sr (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1623,8 +1623,8 @@ def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntReg (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2), - (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1637,10 +1637,10 @@ def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2), (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2), @@ -1655,8 +1655,8 @@ def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2), (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2), (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), @@ -1673,14 +1673,14 @@ def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2), (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2), (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2), - (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2), + (C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; // V55 Scalar Instructions. @@ -1689,30 +1689,30 @@ def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src // V60 Scalar Instructions. -def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; // V62 Scalar Instructions. @@ -1744,8 +1744,8 @@ def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2), (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>; def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>; -def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2), - (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>; +def: Pat<(int_hexagon_S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2), + (S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV66]>; // V60 HVX Instructions. @@ -1773,10 +1773,10 @@ def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2), (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2), (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2), (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2), @@ -1789,10 +1789,10 @@ def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2), (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2), (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2), (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2), @@ -2369,10 +2369,10 @@ def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2), (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1), (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1), @@ -2489,10 +2489,10 @@ def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), @@ -2677,10 +2677,10 @@ def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2), (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2), @@ -2721,10 +2721,10 @@ def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2), (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2), @@ -2885,10 +2885,10 @@ def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2), (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2), (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2), @@ -2929,10 +2929,10 @@ def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2), (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2), (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2), @@ -3016,10 +3016,10 @@ def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2), (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2), (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2), (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2), @@ -3032,10 +3032,10 @@ def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2), (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2), (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2), (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2), @@ -3124,10 +3124,10 @@ def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$sr (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2), (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2), @@ -3188,10 +3188,10 @@ def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2), (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2), (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2), (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2), diff --git a/llvm/lib/Target/Hexagon/HexagonDepOperands.td b/llvm/lib/Target/Hexagon/HexagonDepOperands.td index fdba7b971258..8a94d96522cc 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepOperands.td +++ b/llvm/lib/Target/Hexagon/HexagonDepOperands.td @@ -8,120 +8,125 @@ // Automatically generated file, please consult code owner before editing. //===----------------------------------------------------------------------===// +multiclass ImmOpPred { + def "" : PatLeaf<(vt imm), pred>; + def _timm : PatLeaf<(vt timm), pred>; +} + def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_0Imm : Operand { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; } -def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; +defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s29_3Imm : Operand { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; } -def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; +defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; } def u6_0Imm : Operand { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; +defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def a30_2Imm : Operand { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; } def u29_3Imm : Operand { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; +defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s8_0Imm : Operand { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; } -def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; +defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; } def u32_0Imm : Operand { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; +defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; } def u4_2Imm : Operand { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; +defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; } def u3_0Imm : Operand { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; +defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b15_2Imm : Operand { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; +defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; } def u11_3Imm : Operand { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; +defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_3Imm : Operand { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; } -def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; +defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; } def m32_0Imm : Operand { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; +defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; } def u3_1Imm : Operand { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; +defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; } def u1_0Imm : Operand { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; +defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s31_1Imm : Operand { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; } -def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; +defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s3_0Imm : Operand { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; } -def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; +defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s30_2Imm : Operand { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; } -def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; } def u4_0Imm : Operand { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; +defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_0Imm : Operand { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; } -def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; +defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; } def u5_3Imm : Operand { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; +defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s32_0Imm : Operand { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; } -def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; +defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_3Imm : Operand { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; } -def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; +defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; } def u10_0Imm : Operand { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; +defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; } def u31_1Imm : Operand { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; +defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_1Imm : Operand { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; } -def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; +defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; } def u16_0Imm : Operand { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; +defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; } def u6_1Imm : Operand { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; +defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; } def u5_2Imm : Operand { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; +defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; } def u26_6Imm : Operand { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; +defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; } def u6_2Imm : Operand { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; +defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; } def u7_0Imm : Operand { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; +defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b13_2Imm : Operand { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; +defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; } def u5_0Imm : Operand { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; +defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; } def u2_0Imm : Operand { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; +defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_2Imm : Operand { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; } -def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; +defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b30_2Imm : Operand { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; } def u8_0Imm : Operand { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; +defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; } def u30_2Imm : Operand { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; +defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index cabfd783effa..c5e3cfd080d6 100644 --- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -22,14 +22,14 @@ class T_RP_pat def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt), (A2_add IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16), +def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16), (A2_addi IntRegs:$Rs, imm:$s16)>; def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt), (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>; def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt), (A2_sub IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs), +def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs), (A2_subri imm:$s10, IntRegs:$Rs)>; def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt), (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>; @@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt), (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5), (S2_asl_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5), (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5), (S2_asr_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6), (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>; -def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6), (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>; -def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6), (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>; def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt), (A2_and IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10), +def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10), (A2_andir IntRegs:$Rs, imm:$s10)>; def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt), (A2_or IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10), +def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10), (A2_orir IntRegs:$Rs, imm:$s10)>; def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt), (A2_xor IntRegs:$Rs, IntRegs:$Rt)>; @@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)), (S2_vsathub I64:$Rs)>; } -def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm), +def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm), (S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm), +def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm), (S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm), +def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), (S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm), +def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), (S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; def ImmExt64: SDNodeXForm; -def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2), +def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2), (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>; -def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2), +def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2), (C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>; def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0), @@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2), //===----------------------------------------------------------------------===// class S2op_tableidx_pat - : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4), + : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), (OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3, (XformImm u5_0ImmPred:$src4))>; @@ -197,11 +197,11 @@ class T_stc_pat : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), (MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; multiclass MaskedStore { def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3), diff --git a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td index 5a12568893af..9a1e47e5ecca 100644 --- a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td +++ b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td @@ -360,7 +360,7 @@ class RDDSP_MM_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins uimm7:$mask); string AsmString = !strconcat("rddsp", "\t$rt, $mask"); - list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))]; + list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))]; InstrItinClass Itinerary = NoItinerary; } @@ -383,7 +383,7 @@ class WRDSP_MM_DESC { dag OutOperandList = (outs); dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask); string AsmString = !strconcat("wrdsp", "\t$rt, $mask"); - list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)]; + list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)]; InstrItinClass Itinerary = NoItinerary; bit isMoveReg = 1; } diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td index 7dea56c01c82..cc15949b0d57 100644 --- a/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -16,6 +16,7 @@ // shamt must fit in 6 bits. def immZExt6 : ImmLeaf; +def timmZExt6 : TImmLeaf; // Node immediate fits as 10-bit sign extended on target immediate. // e.g. seqi, snei diff --git a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td index daca8b907081..d3e68c014fb7 100644 --- a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td @@ -12,12 +12,19 @@ // ImmLeaf def immZExt1 : ImmLeaf(Imm);}]>; +def timmZExt1 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt2 : ImmLeaf(Imm);}]>; +def timmZExt2 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt3 : ImmLeaf(Imm);}]>; +def timmZExt3 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt4 : ImmLeaf(Imm);}]>; +def timmZExt4 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt8 : ImmLeaf(Imm);}]>; +def timmZExt8 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt10 : ImmLeaf(Imm);}]>; +def timmZExt10 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immSExt6 : ImmLeaf(Imm);}]>; +def timmSExt6 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes @@ -306,7 +313,7 @@ class PRECR_SRA_PH_W_DESC_BASE Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; + list Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; string BaseOpcode = instr_asm; @@ -443,7 +450,7 @@ class RDDSP_DESC_BASE Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))]; + list Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; @@ -454,7 +461,7 @@ class WRDSP_DESC_BASE Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)]; + list Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; @@ -1096,14 +1103,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>; // Misc -class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5, +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5, NoItinerary>; -class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2, +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2, NoItinerary>; class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5, - immZExt5, NoItinerary>; + timmZExt5, NoItinerary>; // Pseudos. def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE(N->getSExtValue()); }]>; // Node immediate fits as 7-bit zero extended on target immediate. def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>; // Node immediate fits as 16-bit zero extended on target immediate. // The LO16 param means that only the lower 16 bits of the node @@ -1295,6 +1296,7 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; // shamt field must fit in 5 bits. def immZExt5 : ImmLeaf; +def timmZExt5 : TImmLeaf; def immZExt5Plus1 : PatLeaf<(imm), [{ return isUInt<5>(N->getZExtValue() - 1); diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index 907ed9ef746f..f585d9c1a148 100644 --- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -60,6 +60,11 @@ def immZExt2Ptr : ImmLeaf(Imm);}]>; def immZExt3Ptr : ImmLeaf(Imm);}]>; def immZExt4Ptr : ImmLeaf(Imm);}]>; +def timmZExt1Ptr : TImmLeaf(Imm);}]>; +def timmZExt2Ptr : TImmLeaf(Imm);}]>; +def timmZExt3Ptr : TImmLeaf(Imm);}]>; +def timmZExt4Ptr : TImmLeaf(Imm);}]>; + // Operands def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; @@ -1270,7 +1275,7 @@ class MSA_I8_SHF_DESC_BASE Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))]; + list Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))]; InstrItinClass Itinerary = itin; } @@ -2299,13 +2304,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC : MSA_INSERT_VIDX_PSEUDO_BASE; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4, +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4, MSA128BOpnd>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3, +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3, MSA128HOpnd>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2, +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2, MSA128WOpnd>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1, +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1, MSA128DOpnd>; class LD_DESC_BASE; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2546,16 +2551,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd, MSA128BOpnd, uimm4, - immZExt4>; + timmZExt4>; class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd, MSA128HOpnd, uimm3, - immZExt3>; + timmZExt3>; class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd, MSA128WOpnd, uimm2, - immZExt2>; + timmZExt2>; class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd, MSA128DOpnd, uimm1, - immZExt1>; + timmZExt1>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; @@ -2609,13 +2614,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2637,13 +2642,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class ST_DESC_BASEgetOperand(0)); + DAG.getTargetConstant(Imm, DL, MVT::i32), + Op->getOperand(0)); } /// Determine whether a range fits a regular pattern of values. diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 4cef5b7eb994..fd3fc2af2327 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -331,7 +331,7 @@ class VXBX_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VXCR_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_CR; + [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>; //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in { def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), "mfvscr $vD", IIC_LdStStore, - [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, - [(int_ppc_altivec_mtvscr v4i32:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index e883ede65438..910d98dc108c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2868,12 +2868,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, [(set v4i32: $XT, - (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, [(set v2i64: $XT, - (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 1484ba9f0687..38ba3f9fb24e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -214,12 +214,12 @@ class PseudoMaskedAMOUMinUMax } class PseudoMaskedAMOPat - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering), + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>; class PseudoMaskedAMOMinMaxPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, - imm:$ordering), + timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, imm:$ordering)>; @@ -288,7 +288,7 @@ def PseudoMaskedCmpXchg32 } def : Pat<(int_riscv_masked_cmpxchg_i32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; @@ -365,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>; def : Pat<(int_riscv_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; } // Predicates = [HasStdExtA, IsRV64] diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 9dc4512255cc..e73d6e82e6e1 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1146,7 +1146,7 @@ void SystemZDAGToDAGISel::loadVectorConstant( SDLoc DL(Node); SmallVector Ops; for (unsigned OpVal : VCI.OpVals) - Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32)); SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); if (VCI.VecVT == VT.getSimpleVT()) @@ -1550,8 +1550,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { uint64_t ConstCCMask = cast(CCMask.getNode())->getZExtValue(); // Invert the condition. - CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node), - CCMask.getValueType()); + CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask, + SDLoc(Node), CCMask.getValueType()); SDValue Op4 = Node->getOperand(4); SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index febf75cb67be..aaf7c580ea5c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2549,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, DL, MVT::i32)); + DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, - DAG.getConstant(RegisterOnly, DL, MVT::i32)); + DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2592,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { - SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(CCValid, DL, MVT::i32), - DAG.getConstant(CCMask, DL, MVT::i32), CCReg }; + SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getTargetConstant(CCValid, DL, MVT::i32), + DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } @@ -2757,9 +2757,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); - return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); + return DAG.getNode( + SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -2810,8 +2811,9 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, } SDValue CCReg = emitCmp(DAG, DL, C); - SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg}; + SDValue Ops[] = {TrueOp, FalseOp, + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } @@ -3898,11 +3900,8 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast(Op.getNode()); - SDValue Ops[] = { - Op.getOperand(0), - DAG.getConstant(Code, DL, MVT::i32), - Op.getOperand(1) - }; + SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), + Op.getOperand(1)}; return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); @@ -4244,7 +4243,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); SDValue Op; if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { - SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32); + SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); } else if (P.Opcode == SystemZISD::PACK) { MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), @@ -4269,7 +4268,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, unsigned StartIndex, OpNo0, OpNo1; if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], - Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32)); + Ops[OpNo1], + DAG.getTargetConstant(StartIndex, DL, MVT::i32)); // Fall back on VPERM. Construct an SDNode for the permute vector. SDValue IndexNodes[SystemZ::VectorBytes]; @@ -4767,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); // Otherwise keep it as a vector-to-vector operation. return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), - DAG.getConstant(Index, DL, MVT::i32)); + DAG.getTargetConstant(Index, DL, MVT::i32)); } GeneralShuffle GS(VT); @@ -6057,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, - DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), N->getOperand(3), CCReg); return SDValue(); } @@ -6079,10 +6079,9 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), - N->getOperand(1), - DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(0), N->getOperand(1), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); return SDValue(); } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 2a1d14de3ddf..c9dbe3da686d 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2141,17 +2141,17 @@ class FixedCondBranchRXY opcode, } class CmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa; class AsmCmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa; class FixedCmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa { let isAsmParserOnly = V.alternate; @@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa opcode, } multiclass CmpBranchRIEaPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIEa; def Asm : AsmCmpBranchRIEa; @@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair opcode, } class CmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc; class AsmCmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc; class FixedCmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc { let isAsmParserOnly = V.alternate; @@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc opcode, } multiclass CmpBranchRIEcPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIEc; def Asm : AsmCmpBranchRIEc; @@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair opcode, } class CmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS; class AsmCmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS; class FixedCmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS { let isAsmParserOnly = V.alternate; @@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS opcode, } multiclass CmpBranchRISPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIS; def Asm : AsmCmpBranchRIS; @@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign opcode> { // We therefore match the address in the same way as a normal store and // only use the StoreSI* instruction if the matched address is suitable. class StoreSI opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSI { @@ -2593,7 +2593,7 @@ class StoreSI opcode, SDPatternOperator operator, } class StoreSIY opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSIY { @@ -2601,7 +2601,7 @@ class StoreSIY opcode, SDPatternOperator operator, } class StoreSIL opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSIL { @@ -2609,7 +2609,7 @@ class StoreSIL opcode, SDPatternOperator operator, } multiclass StoreSIPair siOpcode, bits<16> siyOpcode, - SDPatternOperator operator, Immediate imm> { + SDPatternOperator operator, ImmOpWithPattern imm> { let DispKey = mnemonic in { let DispSize = "12" in def "" : StoreSI; @@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair opcode, def Asm : AsmCondStoreRSY; } -class SideEffectUnaryI opcode, Immediate imm> +class SideEffectUnaryI opcode, ImmOpWithPattern imm> : InstI; @@ -2761,13 +2761,13 @@ class UnaryMemRRFc opcode, } class UnaryRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa; class UnaryRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa; @@ -2885,14 +2885,14 @@ multiclass UnaryRXPair rxOpcode, bits<16> rxyOpcode, } class UnaryVRIa opcode, SDPatternOperator operator, - TypedReg tr, Immediate imm, bits<4> type = 0> + TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0> : InstVRIa { + [(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> { let M3 = type; } -class UnaryVRIaGeneric opcode, Immediate imm> +class UnaryVRIaGeneric opcode, ImmOpWithPattern imm> : InstVRIa; @@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc opcode, } class SideEffectBinaryIE opcode, - Immediate imm1, Immediate imm2> + ImmOpWithPattern imm1, ImmOpWithPattern imm2> : InstIE; @@ -3030,7 +3030,7 @@ class SideEffectBinarySI opcode, Operand imm> mnemonic#"\t$BD1, $I2", []>; class SideEffectBinarySIL opcode, - SDPatternOperator operator, Immediate imm> + SDPatternOperator operator, ImmOpWithPattern imm> : InstSIL; @@ -3165,7 +3165,7 @@ class BinaryRRFc opcode, mnemonic#"\t$R1, $R2, $M3", []>; class BinaryMemRRFc opcode, - RegisterOperand cls1, RegisterOperand cls2, Immediate imm> + RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm> : InstRRFc { let Constraints = "$R1 = $R1src"; @@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair opcode, } class BinaryRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa { @@ -3276,14 +3276,14 @@ class BinaryRI opcode, SDPatternOperator operator, } class BinaryRIE opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEd; multiclass BinaryRIAndK opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls, - Immediate imm> { + ImmOpWithPattern imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in def K : BinaryRIE, @@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK opcode1, bits<16> opcode2, } class CondBinaryRIE opcode, RegisterOperand cls, - Immediate imm> + ImmOpWithPattern imm> : InstRIEg opcode, RegisterOperand cls, // Like CondBinaryRIE, but used for the raw assembly form. The condition-code // mask is the third operand rather than being part of the mnemonic. class AsmCondBinaryRIE opcode, RegisterOperand cls, - Immediate imm> + ImmOpWithPattern imm> : InstRIEg { @@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE opcode, RegisterOperand cls, // Like CondBinaryRIE, but with a fixed CC mask. class FixedCondBinaryRIE opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEg { let Constraints = "$R1 = $R1src"; @@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE opcode, } multiclass CondBinaryRIEPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CondBinaryRIE; def Asm : AsmCondBinaryRIE; } class BinaryRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa { @@ -3484,7 +3484,7 @@ class BinaryVRIb opcode, SDPatternOperator operator, TypedReg tr, bits<4> type> : InstVRIb { + [(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> { let M4 = type; } @@ -3498,7 +3498,7 @@ class BinaryVRIc opcode, SDPatternOperator operator, : InstVRIc { + imm32zx16_timm:$I2))]> { let M4 = type; } @@ -3512,7 +3512,7 @@ class BinaryVRIe opcode, SDPatternOperator operator, : InstVRIe { + imm32zx12_timm:$I3))]> { let M4 = type; let M5 = m5; } @@ -3715,7 +3715,7 @@ class BinaryVRX opcode, SDPatternOperator operator, : InstVRX { + imm32zx4_timm:$M3))]> { let mayLoad = 1; let AccessBytes = bytes; } @@ -3765,7 +3765,7 @@ class BinaryVSI opcode, SDPatternOperator operator, } class StoreBinaryVRV opcode, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRV { let mayStore = 1; @@ -3774,7 +3774,7 @@ class StoreBinaryVRV opcode, bits<5> bytes, class StoreBinaryVRX opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRX { @@ -3809,7 +3809,7 @@ class CompareRRE opcode, SDPatternOperator operator, } class CompareRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa { @@ -3817,7 +3817,7 @@ class CompareRI opcode, SDPatternOperator operator, } class CompareRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa { @@ -3924,7 +3924,7 @@ class CompareSSb opcode> } class CompareSI opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm, + SDPatternOperator load, ImmOpWithPattern imm, AddressingMode mode = bdaddr12only> : InstSI opcode, SDPatternOperator operator, } class CompareSIL opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm> + SDPatternOperator load, ImmOpWithPattern imm> : InstSIL { @@ -3943,7 +3943,7 @@ class CompareSIL opcode, SDPatternOperator operator, } class CompareSIY opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm, + SDPatternOperator load, ImmOpWithPattern imm, AddressingMode mode = bdaddr20only> : InstSIY opcode, SDPatternOperator operator, multiclass CompareSIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, SDPatternOperator load, - Immediate imm> { + ImmOpWithPattern imm> { let DispKey = mnemonic in { let DispSize = "12" in def "" : CompareSI; @@ -4012,7 +4012,7 @@ class TestRXE opcode, SDPatternOperator operator, } class TestBinarySIL opcode, - SDPatternOperator operator, Immediate imm> + SDPatternOperator operator, ImmOpWithPattern imm> : InstSIL; @@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb opcode, class SideEffectTernaryRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, - Immediate imm> + ImmOpWithPattern imm> : InstRRFc; @@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt opcode, class SideEffectTernaryMemMemRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, - Immediate imm> + ImmOpWithPattern imm> : InstRRFc { @@ -4221,7 +4221,7 @@ class TernaryRXF opcode, SDPatternOperator operator, } class TernaryVRIa opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> + TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index> : InstVRIa opcode, SDPatternOperator operator, mnemonic#"\t$V1, $V2, $V3, $I4", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx8:$I4))]> { + imm32zx8_timm:$I4))]> { let M5 = type; } @@ -4252,8 +4252,8 @@ class TernaryVRRa opcode, SDPatternOperator operator, (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), mnemonic#"\t$V1, $V2, $M4, $M5", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), - imm32zx4:$M4, - imm32zx4:$M5))], + imm32zx4_timm:$M4, + imm32zx4_timm:$M5))], m4or> { let M3 = type; } @@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair opcode, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> modifier = 0> { def "" : TernaryVRRb; + imm32zx4even_timm, !and (modifier, 14)>; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; let Defs = [CC] in def S : TernaryVRRb; + imm32zx4even_timm, !add(!and (modifier, 14), 1)>; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; @@ -4314,7 +4314,7 @@ class TernaryVRRc opcode, SDPatternOperator operator, mnemonic#"\t$V1, $V2, $V3, $M4", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx4:$M4))]> { + imm32zx4_timm:$M4))]> { let M5 = 0; let M6 = 0; } @@ -4327,7 +4327,7 @@ class TernaryVRRcFloat opcode, mnemonic#"\t$V1, $V2, $V3, $M6", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx4:$M6))]> { + imm32zx4_timm:$M6))]> { let M4 = type; let M5 = m5; } @@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric opcode> } class TernaryVRV opcode, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRV { @@ -4440,7 +4440,7 @@ class TernaryVRV opcode, bits<5> bytes, } class TernaryVRX opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> + TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index> : InstVRX opcode, SDPatternOperator operato [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx8:$I4))]> { + imm32zx8_timm:$I4))]> { let Constraints = "$V1 = $V1src"; let DisableEncoding = "$V1src"; let M5 = type; @@ -4480,7 +4480,7 @@ class QuaternaryVRIf opcode> : InstVRIf; + mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>; class QuaternaryVRIg opcode> : InstVRIg opcode> class QuaternaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, TypedReg tr3, TypedReg tr4, bits<4> type, - SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0> + SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0> : InstVRRd opcode, bits<4> modifier = 0> { def "" : QuaternaryVRRd; + imm32zx4even_timm, !and (modifier, 14)>; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; let Defs = [CC] in def S : QuaternaryVRRd; + imm32zx4even_timm, !add (!and (modifier, 14), 1)>; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; @@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric opcode> { def "" : QuaternaryVRRdGeneric; def : InstAlias(NAME) VR128:$V1, VR128:$V2, VR128:$V3, - VR128:$V4, imm32zx4:$M5, 0)>; + VR128:$V4, imm32zx4_timm:$M5, 0)>; } class SideEffectQuaternaryRRFa opcode, @@ -4638,13 +4638,13 @@ class RotateSelectRIEf opcode, RegisterOperand cls1, class PrefetchRXY opcode, SDPatternOperator operator> : InstRXYb; + [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>; class PrefetchRILPC opcode, SDPatternOperator operator> - : InstRILc { + [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more // complex. @@ -4691,7 +4691,7 @@ class Pseudo pattern> // Like UnaryRI, but expanded after RA depending on the choice of register. class UnaryRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins imm:$I2), [(set cls:$R1, (operator imm:$I2))]>; @@ -4720,7 +4720,7 @@ class UnaryRRPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4728,13 +4728,13 @@ class BinaryRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; // Like BinaryRIAndK, but expanded after RA depending on the choice of register. multiclass BinaryRIAndKPseudo { + RegisterOperand cls, ImmOpWithPattern imm> { let NumOpsKey = key in { let NumOpsValue = "3" in def K : BinaryRIEPseudo, @@ -4764,7 +4764,7 @@ class MemFoldPseudo bytes, // Like CompareRI, but expanded after RA depending on the choice of register. class CompareRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; @@ -4783,7 +4783,7 @@ class CompareRXYPseudo +class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; @@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo +class CondBinaryRIEPseudo : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, @@ -4876,7 +4876,7 @@ class SelectWrapper : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, - imm32zx4:$valid, imm32zx4:$cc))]> { + imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> { let usesCustomInserter = 1; let hasNoSchedulingInfo = 1; let Uses = [CC]; @@ -4890,12 +4890,12 @@ multiclass CondStores; def Inv : Pseudo<(outs), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask (load mode:$addr), cls:$new, - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr)]>; } } @@ -4917,11 +4917,11 @@ class AtomicLoadBinary : AtomicLoadBinary; -class AtomicLoadBinaryImm32 +class AtomicLoadBinaryImm32 : AtomicLoadBinary; class AtomicLoadBinaryReg64 : AtomicLoadBinary; -class AtomicLoadBinaryImm64 +class AtomicLoadBinaryImm64 : AtomicLoadBinary; // OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND @@ -4944,7 +4944,7 @@ class AtomicLoadWBinary : AtomicLoadWBinary; -class AtomicLoadWBinaryImm +class AtomicLoadWBinaryImm : AtomicLoadWBinary; // A pseudo instruction that is a direct alias of a real instruction. @@ -4979,7 +4979,7 @@ class StoreAliasVRX + ImmOpWithPattern imm> : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4987,7 +4987,7 @@ class BinaryAliasRI + ImmOpWithPattern imm> : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4999,7 +4999,7 @@ class BinaryAliasVRRf // An alias of a CompareRI, but with different register sizes. class CompareAliasRI + ImmOpWithPattern imm> : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 73c5028f5e32..02364bbda5c1 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in { // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; @@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in { // Replicate immediate. def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; - def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; - def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; - def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; - def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>; } // Load element immediate. @@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in { (ins bdxaddr12only:$XBD2, imm32zx4:$M3), "lcbb\t$R1, $XBD2, $M3", [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, - imm32zx4:$M3))]>; + imm32zx4_timm:$M3))]>; // Load with length. The number of loaded bytes is only known at run time. def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; @@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in { def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; - def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), + def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)), (VREPF VR128:$vec, imm32zx16:$index)>; - def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)), (VREPG VR128:$vec, imm32zx16:$index)>; // Select. @@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in { // Shift left double by byte. def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; - def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z), (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; // Shift left double by bit. diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td index 56632e1529a2..b2bab68a6274 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperands.td +++ b/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -21,15 +21,32 @@ class ImmediateTLSAsmOperand let RenderMethod = "addImmTLSOperands"; } +class ImmediateOp : Operand { + let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; + let ParserMatchClass = !cast(asmop); +} + +class ImmOpWithPattern : + ImmediateOp, PatLeaf<(vt ImmNode), pred, xform>; + +// class ImmediatePatLeaf +// : PatLeaf<(vt ImmNode), pred, xform>; + + // Constructs both a DAG pattern and instruction operand for an immediate // of type VT. PRED returns true if a node is acceptable and XFORM returns // the operand value associated with the node. ASMOP is the name of the // associated asm operand, and also forms the basis of the asm print method. -class Immediate - : PatLeaf<(vt imm), pred, xform>, Operand { - let PrintMethod = "print"##asmop##"Operand"; - let DecoderMethod = "decode"##asmop##"Operand"; - let ParserMatchClass = !cast(asmop); +multiclass Immediate { + // def "" : ImmediateOp, + // PatLeaf<(vt imm), pred, xform>; + def "" : ImmOpWithPattern; + +// def _timm : PatLeaf<(vt timm), pred, xform>; + def _timm : ImmOpWithPattern; } // Constructs an asm operand for a PC-relative address. SIZE says how @@ -295,87 +312,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">; // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being zero. -def imm32ll16 : ImmediategetZExtValue()); }], LL16, "U16Imm">; -def imm32lh16 : ImmediategetZExtValue()); }], LH16, "U16Imm">; // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being one. -def imm32ll16c : ImmediategetZExtValue())); }], LL16, "U16Imm">; -def imm32lh16c : ImmediategetZExtValue())); }], LH16, "U16Imm">; // Short immediates -def imm32zx1 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U1Imm">; -def imm32zx2 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U2Imm">; -def imm32zx3 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U3Imm">; -def imm32zx4 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U4Imm">; // Note: this enforces an even value during code generation only. // When used from the assembler, any 4-bit value is allowed. -def imm32zx4even : Immediate(N->getZExtValue()); }], UIMM8EVEN, "U4Imm">; -def imm32zx6 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U6Imm">; -def imm32sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; -def imm32zx8 : Immediate(N->getZExtValue()); }], UIMM8, "U8Imm">; -def imm32zx8trunc : Immediate; +defm imm32zx8trunc : Immediate; -def imm32zx12 : Immediate(N->getZExtValue()); }], UIMM12, "U12Imm">; -def imm32sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -def imm32sx16n : Immediate(-N->getSExtValue()); }], NEGSIMM16, "S16Imm">; -def imm32zx16 : Immediate(N->getZExtValue()); }], UIMM16, "U16Imm">; -def imm32sx16trunc : Immediate; -def imm32zx16trunc : Immediate; +defm imm32sx16trunc : Immediate; +defm imm32zx16trunc : Immediate; // Full 32-bit immediates. we need both signed and unsigned versions // because the assembler is picky. E.g. AFI requires signed operands // while NILF requires unsigned ones. -def simm32 : Immediate; -def uimm32 : Immediate; +defm simm32 : Immediate; +defm uimm32 : Immediate; -def simm32n : Immediate(-N->getSExtValue()); }], NEGSIMM32, "S32Imm">; @@ -387,107 +404,107 @@ def imm32 : ImmLeaf; // Immediates for 16-bit chunks of an i64, with the other bits of the // i32 being zero. -def imm64ll16 : ImmediategetZExtValue()); }], LL16, "U16Imm">; -def imm64lh16 : ImmediategetZExtValue()); }], LH16, "U16Imm">; -def imm64hl16 : ImmediategetZExtValue()); }], HL16, "U16Imm">; -def imm64hh16 : ImmediategetZExtValue()); }], HH16, "U16Imm">; // Immediates for 16-bit chunks of an i64, with the other bits of the // i32 being one. -def imm64ll16c : ImmediategetZExtValue())); }], LL16, "U16Imm">; -def imm64lh16c : ImmediategetZExtValue())); }], LH16, "U16Imm">; -def imm64hl16c : ImmediategetZExtValue())); }], HL16, "U16Imm">; -def imm64hh16c : ImmediategetZExtValue())); }], HH16, "U16Imm">; // Immediates for the lower and upper 32 bits of an i64, with the other // bits of the i32 being zero. -def imm64lf32 : ImmediategetZExtValue()); }], LF32, "U32Imm">; -def imm64hf32 : ImmediategetZExtValue()); }], HF32, "U32Imm">; // Immediates for the lower and upper 32 bits of an i64, with the other // bits of the i32 being one. -def imm64lf32c : ImmediategetZExtValue())); }], LF32, "U32Imm">; -def imm64hf32c : ImmediategetZExtValue())); }], HF32, "U32Imm">; // Negated immediates that fit LF32 or LH16. -def imm64lh16n : ImmediategetZExtValue())); }], NEGLH16, "U16Imm">; -def imm64lf32n : ImmediategetZExtValue())); }], NEGLF32, "U32Imm">; // Short immediates. -def imm64sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; -def imm64zx8 : Immediate(N->getSExtValue()); }], UIMM8, "U8Imm">; -def imm64sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -def imm64sx16n : Immediate(-N->getSExtValue()); }], NEGSIMM16, "S16Imm">; -def imm64zx16 : Immediate(N->getZExtValue()); }], UIMM16, "U16Imm">; -def imm64sx32 : Immediate(N->getSExtValue()); }], SIMM32, "S32Imm">; -def imm64sx32n : Immediate(-N->getSExtValue()); }], NEGSIMM32, "S32Imm">; -def imm64zx32 : Immediate(N->getZExtValue()); }], UIMM32, "U32Imm">; -def imm64zx32n : Immediate(-N->getSExtValue()); }], NEGUIMM32, "U32Imm">; -def imm64zx48 : Immediate(N->getZExtValue()); }], UIMM48, "U48Imm">; @@ -637,7 +654,7 @@ def bdvaddr12only : BDVMode< "64", "12">; //===----------------------------------------------------------------------===// // A 4-bit condition-code mask. -def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>, +def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>, Operand { let PrintMethod = "printCond4Operand"; } diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 15bd12bc98a4..6fe383e64b74 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs), (z_subcarry_1 node:$lhs, node:$rhs, CC)>; // Signed and unsigned comparisons. -def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); return Type != SystemZICMP::UnsignedOnly; }]>; -def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); return Type != SystemZICMP::SignedOnly; }]>; // Register- and memory-based TEST UNDER MASK. -def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>; +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>; def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; // Register sign-extend operations. Sub-32-bit values are represented as i32s. diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td index beaf4de285a3..65300fb47627 100644 --- a/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -100,12 +100,12 @@ multiclass CondStores64 { def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr), (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, imm32zx4:$valid, imm32zx4:$cc)>; def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr), (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, imm32zx4:$valid, imm32zx4:$cc)>; diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index a50e6aa59711..47c925dcf730 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -209,10 +209,10 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( // Now select between End and null, depending on whether the character // was found. - SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), - DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), - DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), - CCReg}; + SDValue Ops[] = { + End, DAG.getConstant(0, DL, PtrVT), + DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg}; End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); return std::make_pair(End, Chain); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td index f4352e3d12ec..05735cf6d31f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -39,7 +39,7 @@ defm MEMORY_INIT : (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, I32:$offset, I32:$size), (outs), (ins i32imm_op:$seg, i32imm_op:$idx), - [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest, + [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest, I32:$offset, I32:$size )], "memory.init\t$seg, $idx, $dest, $offset, $size", @@ -48,7 +48,7 @@ defm MEMORY_INIT : let hasSideEffects = 1 in defm DATA_DROP : BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), - [(int_wasm_data_drop (i32 imm:$seg))], + [(int_wasm_data_drop (i32 timm:$seg))], "data.drop\t$seg", "data.drop\t$seg", 0x09>; let mayLoad = 1, mayStore = 1 in diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 9ec0f89e1611..8a1dd62f057d 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -879,10 +879,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(N); - SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, - N->getValueType(0), - N->getOperand(0), - CurDAG->getConstant(Imm, dl, MVT::i8)); + SDValue Res = CurDAG->getNode( + X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0), + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); --I; CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); ++I; @@ -5096,10 +5095,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(Node); - SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, - Node->getValueType(0), + SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0), Node->getOperand(0), - CurDAG->getConstant(Imm, dl, MVT::i8)); + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); ReplaceNode(Node, Res.getNode()); SelectCode(Res.getNode()); return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8965a9d1582d..029d73a4ac1c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -211,7 +211,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Integer absolute. if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); - setOperationAction(ISD::ABS , MVT::i32 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); } setOperationAction(ISD::ABS , MVT::i64 , Custom); @@ -4981,7 +4981,7 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, // Find the type this will be legalized too. Otherwise we might prematurely // convert this to shl+add/sub and then still have to type legalize those ops. - // Another choice would be to defer the decision for illegal types until + // Another choice would be to defer the decision for illegal types until // after type legalization. But constant splat vectors of i64 can't make it // through type legalization on 32-bit targets so we would need to special // case vXi64. @@ -5759,7 +5759,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (IdxVal == 0) { // Zero lower bits of the Vec - SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); @@ -5778,7 +5778,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (Vec.isUndef()) { assert(IdxVal != 0 && "Unexpected index"); SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } @@ -5788,17 +5788,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); if (ShiftRight != 0) SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, - DAG.getConstant(ShiftRight, dl, MVT::i8)); + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); if (SubVecNumElems * 2 == NumElems) { // Special case, use legal zero extending insert_subvector. This allows // isel to opimitize when bits are known zero. @@ -5811,7 +5811,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); NumElems = WideOpVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8); + SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); } @@ -5827,17 +5827,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); // Move the current value of the bit to be replace to the lsbs. Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); // Xor with the new bit. Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec); // Shift to MSB, filling bottom bits with 0. unsigned ShiftLeft = NumElems - SubVecNumElems; Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); // Shift to the final position, filling upper bits with 0. unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, - DAG.getConstant(ShiftRight, dl, MVT::i8)); + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); // Xor with original vector leaving the new value. Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); // Reduce to original width if needed. @@ -7637,7 +7637,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getIntPtrConstant(InsertPSMask, DL)); + DAG.getIntPtrConstant(InsertPSMask, DL, true)); return DAG.getBitcast(VT, Result); } @@ -7650,7 +7650,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); - SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8); + SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } @@ -9439,9 +9439,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, {4, 5, 6, 7, 4, 5, 6, 7}); if (Subtarget.hasXOP()) - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, - LoLo, HiHi, IndicesVec, - DAG.getConstant(0, DL, MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, + IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPS only uses index bits[0:1] to permute elements. SDValue Res = DAG.getSelectCC( @@ -9475,9 +9475,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, // VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec. IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); if (Subtarget.hasXOP()) - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, - LoLo, HiHi, IndicesVec, - DAG.getConstant(0, DL, MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, + IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPD only uses index bit[1] to permute elements. SDValue Res = DAG.getSelectCC( @@ -10048,7 +10048,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, DAG.getUNDEF(ShiftVT), SubVec, DAG.getIntPtrConstant(0, dl)); Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, - DAG.getConstant(Idx * SubVecNumElts, dl, MVT::i8)); + DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, DAG.getIntPtrConstant(0, dl)); } @@ -10441,7 +10441,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef Mask) { static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, const SDLoc &DL, SelectionDAG &DAG) { - return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); + return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); } /// Compute whether each element of a shuffle is zeroable. @@ -11086,7 +11086,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v8i16: assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8)); + DAG.getTargetConstant(BlendMask, DL, MVT::i8)); case MVT::v16i16: { assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); SmallVector RepeatedMask; @@ -11098,7 +11098,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (RepeatedMask[i] >= 8) BlendMask |= 1ull << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8)); + DAG.getTargetConstant(BlendMask, DL, MVT::i8)); } // Use PBLENDW for lower/upper lanes and then blend lanes. // TODO - we should allow 2 PBLENDW here and leave shuffle combine to @@ -11107,9 +11107,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, uint64_t HiMask = (BlendMask >> 8) & 0xFF; if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(LoMask, DL, MVT::i8)); + DAG.getTargetConstant(LoMask, DL, MVT::i8)); SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(HiMask, DL, MVT::i8)); + DAG.getTargetConstant(HiMask, DL, MVT::i8)); return DAG.getVectorShuffle( MVT::v16i16, DL, Lo, Hi, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); @@ -11369,7 +11369,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute( SDValue Rotate = DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), DAG.getBitcast(ByteVT, Lo), - DAG.getConstant(Scale * RotAmt, DL, MVT::i8))); + DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); SmallVector PermMask(NumElts, SM_SentinelUndef); for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { @@ -11576,7 +11576,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, "512-bit PALIGNR requires BWI instructions"); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, - DAG.getConstant(ByteRotation, DL, MVT::i8))); + DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); } assert(VT.is128BitVector() && @@ -11590,10 +11590,12 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, int LoByteShift = 16 - ByteRotation; int HiByteShift = ByteRotation; - SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, - DAG.getConstant(LoByteShift, DL, MVT::i8)); - SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, - DAG.getConstant(HiByteShift, DL, MVT::i8)); + SDValue LoShift = + DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, + DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); + SDValue HiShift = + DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, + DAG.getTargetConstant(HiByteShift, DL, MVT::i8)); return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); } @@ -11625,7 +11627,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1, return SDValue(); return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, - DAG.getConstant(Rotation, DL, MVT::i8)); + DAG.getTargetConstant(Rotation, DL, MVT::i8)); } /// Try to lower a vector shuffle as a byte shift sequence. @@ -11664,27 +11666,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask( if (ZeroLo == 0) { unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroHi, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); } else if (ZeroHi == 0) { unsigned Shift = Mask[ZeroLo] % NumElts; Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); } else if (!Subtarget.hasSSSE3()) { // If we don't have PSHUFB then its worth avoiding an AND constant mask // by performing 3 byte shifts. Shuffle combining can kick in above that. // TODO: There may be some cases where VSH{LR}DQ+PAND is still better. unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Shift += Mask[ZeroLo] % NumElts; Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); } else return SDValue(); @@ -11806,7 +11808,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, "Illegal integer vector type"); V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(Opcode, DL, ShiftVT, V, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); return DAG.getBitcast(VT, V); } @@ -11940,14 +11942,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), V2 ? V2 : DAG.getUNDEF(VT), - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return SDValue(); } @@ -12044,8 +12046,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( int LoIdx = Offset * EltBits; SDValue Lo = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(LoIdx, DL, MVT::i8))); + DAG.getTargetConstant(EltBits, DL, MVT::i8), + DAG.getTargetConstant(LoIdx, DL, MVT::i8))); if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) return DAG.getBitcast(VT, Lo); @@ -12053,8 +12055,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( int HiIdx = (Offset + 1) * EltBits; SDValue Hi = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(HiIdx, DL, MVT::i8))); + DAG.getTargetConstant(EltBits, DL, MVT::i8), + DAG.getTargetConstant(HiIdx, DL, MVT::i8))); return DAG.getBitcast(VT, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); } @@ -12364,9 +12366,9 @@ static SDValue lowerShuffleAsElementInsertion( V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { V2 = DAG.getBitcast(MVT::v16i8, V2); - V2 = DAG.getNode( - X86ISD::VSHLDQ, DL, MVT::v16i8, V2, - DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); + V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, + DAG.getTargetConstant( + V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); V2 = DAG.getBitcast(VT, V2); } } @@ -12798,7 +12800,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, // Insert the V2 element into the desired position. return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } /// Try to lower a shuffle as a permute of the inputs followed by an @@ -12947,14 +12949,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } return DAG.getNode( X86ISD::SHUFP, DL, MVT::v2f64, Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); @@ -13000,7 +13002,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } /// Handle lowering of 2-lane 64-bit integer shuffles. @@ -14880,8 +14882,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { unsigned PermMask = ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1); - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } } } @@ -14913,7 +14915,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, V2 = DAG.getUNDEF(VT); return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } /// Lower a vector shuffle by first fixing the 128-bit lanes and then @@ -15542,7 +15544,7 @@ static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, V2 = getZeroVector(VT, Subtarget, DAG, DL); return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, - DAG.getConstant(Immediate, DL, MVT::i8)); + DAG.getTargetConstant(Immediate, DL, MVT::i8)); } /// Handle lowering of 4-lane 64-bit floating point shuffles. @@ -15577,7 +15579,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, - DAG.getConstant(VPERMILPMask, DL, MVT::i8)); + DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); } // With AVX2 we have direct support for this permutation. @@ -16316,7 +16318,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef Mask, } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } /// Handle lowering of 8-lane 64-bit floating point shuffles. @@ -16341,7 +16343,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, - DAG.getConstant(VPERMILPMask, DL, MVT::i8)); + DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); } SmallVector RepeatedMask; @@ -16770,7 +16772,7 @@ static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef Mask, DAG.getUNDEF(WideVT), V1, DAG.getIntPtrConstant(0, DL)); Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, DAG.getIntPtrConstant(0, DL)); } @@ -16877,13 +16879,13 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, int WideElts = WideVT.getVectorNumElements(); // Shift left to put the original vector in the MSBs of the new size. Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, - DAG.getConstant(WideElts - NumElts, DL, MVT::i8)); + DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); // Increase the shift amount to account for the left shift. ShiftAmt += WideElts - NumElts; } Res = DAG.getNode(Opcode, DL, WideVT, Res, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, DAG.getIntPtrConstant(0, DL)); } @@ -17331,7 +17333,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, // Use kshiftr instruction to move to the lower element. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); @@ -17559,7 +17561,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, (Subtarget.hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, - DAG.getConstant(1, dl, MVT::i8)); + DAG.getTargetConstant(1, dl, MVT::i8)); } } @@ -17635,7 +17637,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, - DAG.getConstant(IdxVal << 4, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); } // PINSR* works with constant index. @@ -17721,7 +17723,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // Shift to the LSB. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); @@ -18264,8 +18266,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, APInt APIntShiftAmt; if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); - return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, - Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, + Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); } return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, @@ -18697,7 +18699,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, - VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i8)); + VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); @@ -18705,7 +18707,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, - VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i8)); + VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); } else { SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; @@ -20655,14 +20657,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC0, dl, MVT::i8)); + DAG.getTargetConstant(CC0, dl, MVT::i8)); SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC1, dl, MVT::i8)); + DAG.getTargetConstant(CC1, dl, MVT::i8)); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { // Handle all other FP comparisons here. Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(SSECC, dl, MVT::i8)); + DAG.getTargetConstant(SSECC, dl, MVT::i8)); } // If this is SSE/AVX CMPP, bitcast the result back to integer to match the @@ -20725,7 +20727,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CmpMode, dl, MVT::i8)); + DAG.getTargetConstant(CmpMode, dl, MVT::i8)); } // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. @@ -21195,15 +21197,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, - CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); + SDValue Cmp = + DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, + DAG.getTargetConstant(SSECC, DL, MVT::i8)); assert(!VT.isVector() && "Not a scalar type?"); return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } if (SSECC < 8 || Subtarget.hasAVX()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, - DAG.getConstant(SSECC, DL, MVT::i8)); + DAG.getTargetConstant(SSECC, DL, MVT::i8)); // If we have AVX, we can use a variable vector select (VBLENDV) instead // of 3 logic instructions for size savings and potentially speed. @@ -21661,7 +21664,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, - DAG.getConstant(SignExtShift, dl, MVT::i8)); + DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); } if (VT == MVT::v2i64) { @@ -22656,7 +22659,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, } return DAG.getNode(Opc, dl, VT, SrcOp, - DAG.getConstant(ShiftAmt, dl, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); } /// Handle vector element shifts where the shift amount may or may not be a @@ -22701,7 +22704,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); else { - SDValue ByteShift = DAG.getConstant( + SDValue ByteShift = DAG.getTargetConstant( (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, @@ -22999,9 +23002,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); - if (IntrData->Type == INTR_TYPE_3OP_IMM8) - Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -23254,7 +23254,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case CMP_MASK_CC: { MVT MaskVT = Op.getSimpleValueType(); SDValue CC = Op.getOperand(3); - CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -23273,7 +23272,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); - SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); + SDValue CC = Op.getOperand(3); SDValue Mask = Op.getOperand(4); SDValue Cmp; @@ -23344,10 +23343,10 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue FCmp; if (isRoundModeCurDirection(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8)); + DAG.getTargetConstant(CondVal, dl, MVT::i8)); else if (isRoundModeSAE(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8), Sae); + DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); else return SDValue(); // Need to fill with zeros to ensure the bitcast will produce zeroes @@ -23407,9 +23406,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(2), - DAG.getConstant(0xf, dl, MVT::i32)); + auto Round = cast(Op.getOperand(2)); + SDValue RoundingMode = + DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), RoundingMode); } @@ -23417,9 +23416,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(3), - DAG.getConstant(0xf, dl, MVT::i32)); + auto Round = cast(Op.getOperand(3)); + SDValue RoundingMode = + DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), RoundingMode); } @@ -26096,7 +26095,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, (VT == MVT::v32i8 && Subtarget.hasInt256())) && !Subtarget.hasXOP()) { int NumElts = VT.getVectorNumElements(); - SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8); + SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); // Extend constant shift amount to vXi16 (it doesn't matter if the type // isn't legal). @@ -26368,7 +26367,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(Op, DL, VT, R, - DAG.getConstant(RotateAmt, DL, MVT::i8)); + DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); } // Else, fall-back on VPROLV/VPRORV. @@ -26389,7 +26388,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (0 <= CstSplatIndex) { uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(X86ISD::VROTLI, DL, VT, R, - DAG.getConstant(RotateAmt, DL, MVT::i8)); + DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); } // Use general rotate by variable (per-element). @@ -26626,7 +26625,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // If this is a canonical idempotent atomicrmw w/no uses, we have a better // lowering available in lowerAtomicArith. - // TODO: push more cases through this path. + // TODO: push more cases through this path. if (auto *C = dyn_cast(AI->getValOperand())) if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && AI->use_empty()) @@ -26696,7 +26695,7 @@ bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { /// Emit a locked operation on a stack location which does not change any /// memory location, but does involve a lock prefix. Location is chosen to be /// a) very likely accessed only by a single thread to minimize cache traffic, -/// and b) definitely dereferenceable. Returns the new Chain result. +/// and b) definitely dereferenceable. Returns the new Chain result. static SDValue emitLockedStackOp(SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue Chain, SDLoc DL) { @@ -26705,22 +26704,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG, // operations issued by the current processor. As such, the location // referenced is not relevant for the ordering properties of the instruction. // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual, - // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions + // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions // 2) Using an immediate operand appears to be the best encoding choice // here since it doesn't require an extra register. // 3) OR appears to be very slightly faster than ADD. (Though, the difference // is small enough it might just be measurement noise.) // 4) When choosing offsets, there are several contributing factors: // a) If there's no redzone, we default to TOS. (We could allocate a cache - // line aligned stack object to improve this case.) + // line aligned stack object to improve this case.) // b) To minimize our chances of introducing a false dependence, we prefer - // to offset the stack usage from TOS slightly. + // to offset the stack usage from TOS slightly. // c) To minimize concerns about cross thread stack usage - in particular, // the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which // captures state in the TOS frame and accesses it from many threads - // we want to use an offset such that the offset is in a distinct cache // line from the TOS frame. - // + // // For a general discussion of the tradeoffs and benchmark results, see: // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ @@ -26773,7 +26772,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); return emitLockedStackOp(DAG, Subtarget, Chain, dl); } @@ -27256,12 +27255,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, // seq_cst which isn't SingleThread, everything just needs to be preserved // during codegen and then dropped. Note that we expect (but don't assume), // that orderings other than seq_cst and acq_rel have been canonicalized to - // a store or load. + // a store or load. if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent && AN->getSyncScopeID() == SyncScope::System) { // Prefer a locked operation against a stack location to minimize cache // traffic. This assumes that stack locations are very likely to be - // accessed only by the owning thread. + // accessed only by the owning thread. SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. @@ -32636,7 +32635,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res, DAG.getUNDEF(ShuffleVT), - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32743,7 +32742,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, - DAG.getConstant(PermuteImm, DL, MVT::i8)); + DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } @@ -32773,7 +32772,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV2 = DAG.getBitcast(ShuffleVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, - DAG.getConstant(PermuteImm, DL, MVT::i8)); + DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32790,8 +32789,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32801,8 +32800,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, V1 = DAG.getBitcast(IntMaskVT, V1); V2 = DAG.getBitcast(IntMaskVT, V2); Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } @@ -32966,7 +32965,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, V2 = DAG.getBitcast(MaskVT, V2); SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, - DAG.getConstant(M2ZImm, DL, MVT::i8)); + DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -33785,7 +33784,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return DAG.getBitcast( VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), N1.getOperand(0), - DAG.getConstant(BlendMask, DL, MVT::i8))); + DAG.getTargetConstant(BlendMask, DL, MVT::i8))); } } return SDValue(); @@ -33853,12 +33852,12 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, // If we zero out all elements from Op0 then we don't need to reference it. if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); // If we zero out the element from Op1 then we don't need to reference it. if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); // Attempt to merge insertps Op1 with an inner target shuffle node. SmallVector TargetMask1; @@ -33869,14 +33868,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, // Zero/UNDEF insertion - zero out element and remove dependency. InsertPSMask |= (1u << DstIdx); return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Update insertps mask srcidx and reference the source input directly. assert(0 <= M && M < 8 && "Shuffle index out of range"); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); Op1 = Ops1[M < 4 ? 0 : 1]; return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Attempt to merge insertps Op0 with an inner target shuffle node. @@ -33919,7 +33918,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (Updated) return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // If we're inserting an element from a vbroadcast of a load, fold the @@ -33932,7 +33931,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Op1.getOperand(0)), - DAG.getConstant(InsertPSMask & 0x3f, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); return SDValue(); } @@ -34666,7 +34665,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } SDLoc dl(Op); - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8); + SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); return TLO.CombineTo( Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA)); } @@ -34705,7 +34704,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } SDLoc dl(Op); - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8); + SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); return TLO.CombineTo( Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA)); } @@ -35108,7 +35107,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; SDValue NewShift = TLO.DAG.getNode( NewOpc, SDLoc(Op), VT, Op0.getOperand(0), - TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); + TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); return TLO.CombineTo(Op, NewShift); } } @@ -35754,8 +35753,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, - DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat, - DAG.getConstant(ShufMask, DL, MVT::i8)); + DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), + Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); } Ops.append(NumElts, Splat); } else { @@ -36511,7 +36510,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { } // TODO: This switch could include FNEG and the x86-specific FP logic ops - // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid + // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid // missed load folding and fma+fneg combining. switch (Vec.getOpcode()) { case ISD::FMA: // Begin 3 operands @@ -38912,7 +38911,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, if (NewShiftVal >= NumBitsPerElt) NewShiftVal = NumBitsPerElt - 1; return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8)); + DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); } // We can decode 'whole byte' logical bit shifts as shuffles. @@ -39032,7 +39031,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, - DAG.getConstant(x86cc, DL, MVT::i8)); + DAG.getTargetConstant(x86cc, DL, MVT::i8)); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, @@ -39041,10 +39040,9 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, N->getSimpleValueType(0)); } - SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, - CMP00.getValueType(), CMP00, CMP01, - DAG.getConstant(x86cc, DL, - MVT::i8)); + SDValue OnesOrZeroesF = + DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, + CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; @@ -39238,7 +39236,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); unsigned ShiftVal = SplatVal.countTrailingOnes(); - SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8); + SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); return DAG.getBitcast(N->getValueType(0), Shift); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 4127e4d5644e..cd833b7d21e5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -753,14 +753,14 @@ let isCommutable = 1 in def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -2054,9 +2054,9 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, Sched<[sched]>; + timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2064,9 +2064,9 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -2075,9 +2075,9 @@ multiclass avx512_cmp_scalar, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; let isCodeGenOnly = 1 in { @@ -2088,7 +2088,7 @@ multiclass avx512_cmp_scalar, + timm:$cc))]>, EVEX_4V, VEX_LIG, Sched<[sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), @@ -2097,7 +2097,7 @@ multiclass avx512_cmp_scalar, + timm:$cc))]>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2530,8 +2530,8 @@ multiclass avx512_vcmp_common, Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2539,9 +2539,9 @@ multiclass avx512_vcmp_common, + timm:$cc)>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2552,10 +2552,10 @@ multiclass avx512_vcmp_common, + timm:$cc)>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; // Patterns for selecting with loads in other operand. @@ -2592,9 +2592,9 @@ multiclass avx512_vcmp_sae { "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc", - (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, + timm:$cc)>, EVEX_B, Sched<[sched]>; } @@ -2649,7 +2649,7 @@ multiclass avx512_scalar_fpclass opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst, (X86Vfpclasss _.ScalarIntMemCPat:$src1, - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2689,7 +2689,7 @@ multiclass avx512_vector_fpclass opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX512 opc, string OpcodeStr, [(set _.KRC:$dst,(X86Vfpclass (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512 opc, string OpcodeStr, [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -3111,7 +3111,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, def ri : Ii8, + [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, Sched<[sched]>; } @@ -3187,7 +3187,7 @@ multiclass axv512_cmp_packed_cc_no_vlx_lowering { def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc)), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS (!cast(InstStr##Zrri) (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -3196,7 +3196,7 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc))), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), @@ -5787,13 +5787,13 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, defm ri : AVX512_maskable, + (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, Sched<[sched]>; defm mi : AVX512_maskable, + (i8 timm:$src2)))>, Sched<[sched.Folded]>; } } @@ -5805,7 +5805,7 @@ multiclass avx512_shift_rmbi opc, Format ImmFormM, defm mbi : AVX512_maskable, + (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 timm:$src2)))>, EVEX_B, Sched<[sched.Folded]>; } @@ -5947,13 +5947,13 @@ let Predicates = [HasAVX512, NoVLX] in { (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), VR128X:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; - def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), @@ -6098,23 +6098,23 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), @@ -6149,23 +6149,23 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), imm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), @@ -8612,21 +8612,21 @@ let ExeDomain = GenericDomain in { (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _dest.RC:$dst, - (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>, + (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, Sched<[RR]>; let Constraints = "$src0 = $dst" in def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.RC:$src0, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_K; def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_KZ; let hasSideEffects = 0, mayStore = 1 in { @@ -9085,14 +9085,14 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3)))>, + (i32 timm:$src3)))>, Sched<[sched]>; defm rb_Int : AVX512_maskable_scalar, EVEX_B, + (i32 timm:$src3)))>, EVEX_B, Sched<[sched]>; defm m_Int : AVX512_maskable_scalar opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, - _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, + _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { @@ -9118,13 +9118,13 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, } let Predicates = [HasAVX512] in { - def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2), + def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src1, imm:$src2))>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src1, imm:$src2))>; } @@ -10145,19 +10145,19 @@ multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNo (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, Sched<[sched]>; + (i32 timm:$src2))>, Sched<[sched]>; defm rmi : AVX512_maskable, + (i32 timm:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, + (i32 timm:$src2))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10172,7 +10172,7 @@ multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, OpcodeStr##_.Suffix, "$src2, {sae}, $src1", "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, + (i32 timm:$src2))>, EVEX_B, Sched<[sched]>; } @@ -10205,14 +10205,14 @@ multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable, + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i32 imm:$src3))>, EVEX_B, + (i32 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10236,7 +10236,7 @@ multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT SrcInfo.RC:$src2), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched]>; defm rmi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10263,7 +10263,7 @@ multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10277,7 +10277,7 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable_scalar opc, string OpcodeStr, "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10315,7 +10315,7 @@ multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10437,7 +10437,7 @@ multiclass avx512_shuff_packed_128_common opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched]>, EVEX2VEXOverride; defm rmi : AVX512_maskable opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.LdFrag addr:$src2), - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride; defm rmbi : AVX512_maskable opc, string OpcodeStr, (CastInfo.VT (X86Shuf128 _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (i8 imm:$src3)))))>, EVEX_B, + (i8 timm:$src3)))))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10527,14 +10527,14 @@ multiclass avx512_valign opc, string OpcodeStr, defm rri : AVX512_maskable, + (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; defm rmi : AVX512_maskable, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride<"VPALIGNRrmi">; @@ -10544,7 +10544,7 @@ multiclass avx512_valign opc, string OpcodeStr, "$src1, ${src2}"##_.BroadcastStr##", $src3", (X86VAlign _.RC:$src1, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10593,7 +10593,7 @@ multiclass avx512_vpalign_mask_lowering(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, @@ -10602,7 +10602,7 @@ multiclass avx512_vpalign_mask_lowering(OpcodeStr#"rrikz") To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, @@ -10612,7 +10612,7 @@ multiclass avx512_vpalign_mask_lowering(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, @@ -10622,7 +10622,7 @@ multiclass avx512_vpalign_mask_lowering(OpcodeStr#"rmikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, @@ -10637,7 +10637,7 @@ multiclass avx512_vpalign_mask_lowering_mb(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, (ImmXForm imm:$src3))>; @@ -10647,7 +10647,7 @@ multiclass avx512_vpalign_mask_lowering_mb(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, @@ -10659,7 +10659,7 @@ multiclass avx512_vpalign_mask_lowering_mb(OpcodeStr#"rmbikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, @@ -11103,14 +11103,14 @@ multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, def rr : AVX512, + [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; def rm : AVX512, + (i8 timm:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11243,7 +11243,7 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT _.RC:$src3), - (i8 imm:$src4)), 1, 1>, + (i8 timm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V, Sched<[sched]>; defm rmi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (bitconvert (_.LdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, + (i8 timm:$src4)), 1, 0>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, EVEX_B, + (i8 timm:$src4)), 1, 0>, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), + (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; // Additional patterns for matching loads in other positions. def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + _.RC:$src2, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; @@ -11293,13 +11293,13 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; @@ -11308,43 +11308,43 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), + (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; // Additional patterns for matching broadcasts in other positions. def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + _.RC:$src2, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; @@ -11352,7 +11352,7 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, @@ -11360,7 +11360,7 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, @@ -11371,32 +11371,32 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - (i8 imm:$src4)), _.RC:$src1)), + (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; @@ -11531,14 +11531,14 @@ multiclass avx512_fixupimm_packed opc, string OpcodeStr, (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Constraints = "$src1 = $dst" } @@ -11564,7 +11564,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (X86VFixupimmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched]>; } } @@ -11580,7 +11580,7 @@ multiclass avx512_fixupimm_scalar opc, string OpcodeStr, (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rrib : AVX512_maskable_3src_scalar opc, string OpcodeStr, (X86VFixupimmSAEs (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmi : AVX512_maskable_3src_scalar opc, string OpcodeStr, (_.VT _.RC:$src2), (_src3VT.VT (scalar_to_vector (_src3VT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_fixupimm_packed_all { let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, @@ -12072,7 +12072,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", (OpNode (VTI.VT VTI.RC:$src1), (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), - (i8 imm:$src3))>, EVEX_B, + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index f60378597fa7..08104c8bcc66 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -114,13 +114,13 @@ multiclass ssse3_palign_mm, + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>, Sched<[sched]>; def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, - (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>, + (bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -496,14 +496,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>, + (int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>, Sched<[SchedWriteShuffle.MMX]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w (load_mmx addr:$src1), - imm:$src2))]>, + timm:$src2))]>, Sched<[SchedWriteShuffle.MMX.Folded]>; // -- Conversion Instructions diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 8d3199e3cfa9..8ea8c2d37db9 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -370,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SchedWriteFMoveLS.YMM>, PS, VEX, VEX_L, VEX_WIG; -defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SchedWriteFMoveLS.YMM>, PD, VEX, VEX_L, VEX_WIG; } @@ -1728,12 +1728,12 @@ multiclass sse12_cmp_scalar, + [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>, Sched<[sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))]>, + (ld_frag addr:$src2), timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1766,13 +1766,13 @@ multiclass sse12_cmp_scalar_int, + VR128:$src, timm:$cc))]>, Sched<[sched]>; let mayLoad = 1 in def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src, u8imm:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, - mem_cpat:$src, imm:$cc))]>, + mem_cpat:$src, timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1891,12 +1891,12 @@ multiclass sse12_cmp_packed, + [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, Sched<[sched]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, - (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>, + (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1921,7 +1921,7 @@ let Constraints = "$src1 = $dst" in { SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; } -def CommutableCMPCC : PatLeaf<(imm), [{ +def CommutableCMPCC : PatLeaf<(timm), [{ uint64_t Imm = N->getZExtValue() & 0x7; return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); }]>; @@ -1985,13 +1985,13 @@ multiclass sse12_shuffle, + (i8 timm:$src3))))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = IsCommutable in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d>, + (i8 timm:$src3))))], d>, Sched<[sched]>; } @@ -2736,7 +2736,7 @@ defm : scalar_math_patterns; defm : scalar_math_patterns; defm : scalar_math_patterns; - + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -3497,7 +3497,7 @@ multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>, + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, Sched<[schedImm]>; } @@ -3529,7 +3529,7 @@ multiclass PDI_binop_ri opc, Format ImmForm, string OpcodeStr, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>, + [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; } @@ -3612,7 +3612,7 @@ let Predicates = [HasAVX, prd] in { !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, + (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, VEX, Sched<[sched.XMM]>, VEX_WIG; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), @@ -3620,7 +3620,7 @@ let Predicates = [HasAVX, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (load addr:$src1), - (i8 imm:$src2))))]>, VEX, + (i8 timm:$src2))))]>, VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; } @@ -3630,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in { !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>, + (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, u8imm:$src2), @@ -3638,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (load addr:$src1), - (i8 imm:$src2))))]>, VEX, VEX_L, + (i8 timm:$src2))))]>, VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; } @@ -3648,7 +3648,7 @@ let Predicates = [UseSSE2] in { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, + (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, Sched<[sched.XMM]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), @@ -3656,7 +3656,7 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (memop addr:$src1), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[sched.XMM.Folded]>; } } @@ -4827,7 +4827,7 @@ multiclass ssse3_palignr, + [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, Sched<[sched]>; let mayLoad = 1 in def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), @@ -4838,7 +4838,7 @@ multiclass ssse3_palignr, + (i8 timm:$src3))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5315,7 +5315,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>, + (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, Sched<[SchedWriteFShuffle.XMM]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { [(set VR128:$dst, (X86insertps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -5352,7 +5352,7 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>, + [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, Sched<[sched]>; // Vector intrinsic operation, mem @@ -5361,7 +5361,7 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>, + (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, Sched<[sched.Folded]>; } @@ -5443,7 +5443,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in { "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, + [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, Sched<[sched]>; def SSm_Int : SS4AIi8, + (OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 @@ -5466,7 +5466,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in { "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, + [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, Sched<[sched]>; def SDm_Int : SS4AIi8, + (OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } @@ -5512,16 +5512,16 @@ let Predicates = [UseAVX] in { } let Predicates = [UseAVX] in { - def : Pat<(X86VRndScale FR32:$src1, imm:$src2), + def : Pat<(X86VRndScale FR32:$src1, timm:$src2), (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, imm:$src2), + def : Pat<(X86VRndScale FR64:$src1, timm:$src2), (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; } @@ -5539,16 +5539,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { - def : Pat<(X86VRndScale FR32:$src1, imm:$src2), + def : Pat<(X86VRndScale FR32:$src1, timm:$src2), (ROUNDSSr FR32:$src1, imm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, imm:$src2), + def : Pat<(X86VRndScale FR64:$src1, timm:$src2), (ROUNDSDr FR64:$src1, imm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), (ROUNDSSm addr:$src1, imm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), (ROUNDSDm addr:$src1, imm:$src2)>; } @@ -5830,7 +5830,7 @@ multiclass SS41I_binop_rmi_int opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, Sched<[sched]>; def rmi : SS4AIi8 opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, - (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>, + (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5857,7 +5857,7 @@ multiclass SS41I_binop_rmi opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>; def rmi : SS4AIi8 opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6012,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>; def rmi : SS4AIi8, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Pattern to commute if load is in first source. - def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)), + def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), (!cast(NAME#"rmi") RC:$src1, addr:$src2, (commuteXForm imm:$src3))>; } @@ -6065,36 +6065,36 @@ let Predicates = [HasAVX2] in { // Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. // ExecutionDomainFixPass will cleanup domains later on. let Predicates = [HasAVX1Only] in { -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movsd via commuting under optsize. -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; -def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3), +def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3), +def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>; // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movss via commuting under optsize. -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3), +def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; } @@ -6111,18 +6111,18 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, let Predicates = [UseSSE41] in { // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movss via commuting under optsize. -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3), +def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3), +def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; } @@ -6596,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, - (i8 imm:$src3)))]>, TA, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), @@ -6604,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, (memop addr:$src2), - (i8 imm:$src3)))]>, TA, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM.Folded, SchedWriteVecIMul.XMM.ReadAfterFold]>; @@ -6722,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in { (ins VR128:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, Sched<[WriteAESKeyGen]>; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, Sched<[WriteAESKeyGen.Folded]>; //===----------------------------------------------------------------------===// @@ -6762,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in { (ins VR128:$src1, VR128:$src2, u8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, Sched<[WriteCLMul]>; def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), @@ -6770,12 +6770,12 @@ let Predicates = [NoAVX, HasPCLMUL] in { "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), - imm:$src3))]>, + timm:$src3))]>, Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; } // Constraints = "$src1 = $dst" def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, - (i8 imm:$src3)), + (i8 timm:$src3)), (PCLMULQDQrm VR128:$src1, addr:$src2, (PCLMULCommuteImm imm:$src3))>; } // Predicates = [NoAVX, HasPCLMUL] @@ -6799,19 +6799,19 @@ multiclass vpclmulqdq, + (IntId RC:$src1, RC:$src2, timm:$src3))]>, Sched<[WriteCLMul]>; def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, MemOp:$src2, u8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set RC:$dst, - (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, + (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; // We can commute a load in the first operand by swapping the sources and // rotating the immediate. - def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)), + def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), (!cast(NAME#"rm") RC:$src1, addr:$src2, (PCLMULCommuteImm imm:$src3))>; } @@ -6857,8 +6857,8 @@ let Constraints = "$src = $dst" in { def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), (ins VR128:$src, u8imm:$len, u8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", - [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, - imm:$idx))]>, + [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, + timm:$idx))]>, PD, Sched<[SchedWriteVecALU.XMM]>; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -6871,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, - imm:$len, imm:$idx))]>, + timm:$len, timm:$idx))]>, XD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -7142,13 +7142,13 @@ multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, def ri : AVXAIi8, VEX, + [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, Sched<[sched]>; def mi : AVXAIi8, VEX, + (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, Sched<[sched.Folded]>; }// Predicates = [HasAVX, NoVLX] } @@ -7180,13 +7180,13 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, VEX_4V, VEX_L, + (i8 timm:$src3))))]>, VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2), - (i8 imm:$src3)))]>, VEX_4V, VEX_L, + (i8 timm:$src3)))]>, VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; } @@ -7198,19 +7198,19 @@ def Perm2XCommuteImm : SDNodeXForm; } let Predicates = [HasAVX1Only] in { -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, - (loadv4i64 addr:$src2), (i8 imm:$imm))), + (loadv4i64 addr:$src2), (i8 timm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; // Pattern with load in other operand. def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), + VR256:$src1, (i8 timm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; } @@ -7256,7 +7256,7 @@ multiclass f16c_ps2ph, + [(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>, TAPD, VEX, Sched<[RR]>; let hasSideEffects = 0, mayStore = 1 in def mr : Ii8<0x1D, MRMDestMem, (outs), @@ -7326,18 +7326,18 @@ multiclass AVX2_blend_rmi opc, string OpcodeStr, SDNode OpNode, (ins RC:$src1, RC:$src2, u8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>, VEX_4V; def rmi : AVX2AIi8, + (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; // Pattern to commute if load is in first source. - def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)), + def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), (!cast(NAME#"rmi") RC:$src1, addr:$src2, (commuteXForm imm:$src3))>; } @@ -7350,18 +7350,18 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, SchedWriteBlend.YMM, VR256, i256mem, BlendCommuteImm8>, VEX_L; -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>; -def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>; } @@ -7611,7 +7611,7 @@ multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, + (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, Sched<[Sched]>, VEX, VEX_L; def Ymi : AVX2AIi8 opc, string OpcodeStr, PatFrag mem_frag, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpVT (X86VPermi (mem_frag addr:$src1), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; } } @@ -7638,18 +7638,18 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, + (i8 timm:$src3))))]>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), - (i8 imm:$src3)))]>, + (i8 timm:$src3)))]>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; let Predicates = [HasAVX2] in def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), + VR256:$src1, (i8 timm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; @@ -7931,13 +7931,13 @@ multiclass GF2P8AFFINE_rmi Op, string OpStr, ValueType OpVT, OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { def rri : Ii8, Sched<[SchedWriteVecALU.XMM]>; def rmi : Ii8, + timm:$src3)))], SSEPackedInt>, Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; } } diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 7050e1917494..7f41feb6c0d9 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>; let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap", - [(int_x86_int imm:$trap)]>; + [(int_x86_int timm:$trap)]>; def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; diff --git a/llvm/lib/Target/X86/X86InstrTSX.td b/llvm/lib/Target/X86/X86InstrTSX.td index fc0da845299f..3a1212342a13 100644 --- a/llvm/lib/Target/X86/X86InstrTSX.td +++ b/llvm/lib/Target/X86/X86InstrTSX.td @@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins), def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", - [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; + [(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>; } // SchedRW // HLE prefixes diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index 9a972a9adf9a..6522a950bde1 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -143,13 +143,13 @@ multiclass xop3opimm opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, + (vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>, XOP, Sched<[sched]>; def mi : IXOPi8, + (vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>, XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -251,7 +251,7 @@ multiclass xopvpcom opc, string Suffix, SDNode OpNode, ValueType vt128, "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), - imm:$cc)))]>, + timm:$cc)))]>, XOP_4V, Sched<[sched]>; def mi : IXOPi8 opc, string Suffix, SDNode OpNode, ValueType vt128, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)), - imm:$cc)))]>, + timm:$cc)))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } def : Pat<(OpNode (load addr:$src2), - (vt128 VR128:$src1), imm:$cc), + (vt128 VR128:$src1), timm:$cc), (!cast(NAME#"mi") VR128:$src1, addr:$src2, (CommuteVPCOMCC imm:$cc))>; } @@ -422,7 +422,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set RC:$dst, - (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>, + (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>, Sched<[sched]>; def rm : IXOP5 Opc, string OpcodeStr, RegisterClass RC, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set RC:$dst, (VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3), - (i8 imm:$src4))))]>, VEX_W, + (i8 timm:$src4))))]>, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def mr : IXOP5 Opc, string OpcodeStr, RegisterClass RC, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set RC:$dst, (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), - RC:$src3, (i8 imm:$src4))))]>, + RC:$src3, (i8 timm:$src4))))]>, Sched<[sched.Folded, sched.ReadAfterFold, // fpmemop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index dd5339a615ce..ade206615037 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -389,8 +389,7 @@ define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) ; CHECK-LABEL: name: intrinsics ; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY $w1 -; CHECK: [[CREG:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 -; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]] +; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 ; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec ; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]] ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir index 071625f98c0f..99b28f382c07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.exp.mir @@ -10,24 +10,20 @@ body: | bb.0: liveins: $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = G_CONSTANT i32 1 - %2:sgpr(s32) = G_CONSTANT i32 15 - %3:sgpr(s1) = G_CONSTANT i1 0 - %4:sgpr(s1) = G_CONSTANT i1 1 ; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %3:sgpr(s1), %3:sgpr(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0 ; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %4:sgpr(s1), %3:sgpr(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 1, 0 %5:vgpr(<2 x s16>) = G_BITCAST %0(s32) ; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: EXP 1, %0, %0, [[UNDEF0]], [[UNDEF0]], 0, 1, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %3:sgpr(s1), %3:sgpr(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0 ; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: EXP_DONE 1, %0, %0, [[UNDEF1]], [[UNDEF1]], 0, 1, 15, implicit $exec - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %4:sgpr(s1), %3:sgpr(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir index 1339d3dd56ca..4129e7be49f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.s.sendmsg.mir @@ -18,8 +18,7 @@ body: | ; GCN: S_SENDMSG 1, implicit $exec, implicit $m0 ; GCN: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 - %2:sgpr(s32) = G_CONSTANT i32 1 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %2(s32), %0(s32) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 1, %0(s32) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll new file mode 100644 index 000000000000..0934375dc803 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32) + +define amdgpu_ps void @test_sendmsg(i32 inreg %m0) { + ; CHECK-LABEL: name: test_sendmsg + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32) + ; CHECK: S_ENDPGM + call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll index e5e8b6840d2f..2957b1833ed9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -9,10 +9,7 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 ; CHECK: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 @@ -27,17 +24,14 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 ; CHECK: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 ret void } -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 attributes #0 = { nounwind } attributes #1 = { "InitialPSInputAddr"="0x00002" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 5f90340fbd60..e93d39289a44 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -1,9 +1,8 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s - ; CHECK-LABEL: name: test_f32_inreg ; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2 -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]] +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S0]] define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -11,7 +10,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { ; CHECK-LABEL: name: test_f32 ; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]] +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]] define amdgpu_vs void @test_f32(float %arg0) { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 ret void @@ -33,7 +32,7 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) { ; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4 ; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32) ; CHECK: G_LOAD [[S34]] -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S2]] +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S2]](s32) define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) { %tmp0 = load volatile i32, i32 addrspace(4)* %arg1 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 @@ -45,7 +44,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* ; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1 -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32) +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32) define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll index f45037027147..66a04b45c424 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -9,14 +9,13 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) { ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[C]](s1) + ; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) ; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[SEXT]](s32), [[DEF1]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM + ; CHECK: S_ENDPGM 0 %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) %extract0 = extractvalue { float, i1 } %call, 0 %extract1 = extractvalue { float, i1 } %call, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll new file mode 100644 index 000000000000..25f5d873e9d5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec + ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Make sure unpack code is emitted outside of loop +define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: successors: %bb.2(0x80000000) + ; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec + ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; UNPACKED: bb.2: + ; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec + ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; UNPACKED: bb.3: + ; UNPACKED: successors: %bb.4(0x80000000) + ; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; UNPACKED: bb.4: + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: successors: %bb.2(0x80000000) + ; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; PACKED: bb.2: + ; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec + ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; PACKED: bb.3: + ; PACKED: successors: %bb.4(0x80000000) + ; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; PACKED: bb.4: + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 + ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: %11:vgpr_32, dead %21:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 16 + call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 + ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 + ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4095 + call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4096 + call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + + +; Check what happens with offset add inside a waterfall loop +define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096 + ; UNPACKED: bb.1 (%ir-block.0): + ; UNPACKED: successors: %bb.2(0x80000000) + ; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; UNPACKED: %13:vgpr_32, dead %47:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec + ; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec + ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 + ; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; UNPACKED: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; UNPACKED: bb.2: + ; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec + ; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec + ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; UNPACKED: bb.3: + ; UNPACKED: successors: %bb.4(0x80000000) + ; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; UNPACKED: bb.4: + ; UNPACKED: S_ENDPGM 0 + ; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096 + ; PACKED: bb.1 (%ir-block.0): + ; PACKED: successors: %bb.2(0x80000000) + ; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; PACKED: %13:vgpr_32, dead %31:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; PACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; PACKED: bb.2: + ; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec + ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; PACKED: bb.3: + ; PACKED: successors: %bb.4(0x80000000) + ; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; PACKED: bb.4: + ; PACKED: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4096 + call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll new file mode 100644 index 000000000000..7de9e4554425 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -0,0 +1,314 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 + ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 16 + call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4095 + call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4096 + call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + + +; Check what happens with offset add inside a waterfall loop +define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %15:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec + ; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4096 + call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll new file mode 100644 index 000000000000..3ed040bdfffc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -0,0 +1,791 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s +; FIXME: Test with SI when argument lowering not broken for f16 + +; Natural mapping +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Copies for VGPR arguments +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Waterfall for rsrc +define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Waterfall for soffset +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Waterfall for rsrc and soffset +define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 + ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4) + ; CHECK: S_ENDPGM 0 + %val.trunc = trunc i32 %val to i8 + call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %val.trunc = trunc i32 %val to i16 + call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 16 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4095 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4096 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 16 + call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4095 + call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 4096 + call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. +define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5000 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; CHECK: %11:vgpr_32, dead %29:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec + ; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + %voffset.add = add i32 %voffset, 5000 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) + ret void +} + +; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. +define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset(<4 x i32> %rsrc, float %val, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg) + +declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg) + +declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg) +declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll new file mode 100644 index 000000000000..55d142cc3c1a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll @@ -0,0 +1,45 @@ +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare void @llvm.amdgcn.s.sleep(i32) #0 + +; GCN-LABEL: {{^}}test_s_sleep: +; GCN: s_sleep 0{{$}} +; GCN: s_sleep 1{{$}} +; GCN: s_sleep 2{{$}} +; GCN: s_sleep 3{{$}} +; GCN: s_sleep 4{{$}} +; GCN: s_sleep 5{{$}} +; GCN: s_sleep 6{{$}} +; GCN: s_sleep 7{{$}} +; GCN: s_sleep 8{{$}} +; GCN: s_sleep 9{{$}} +; GCN: s_sleep 10{{$}} +; GCN: s_sleep 11{{$}} +; GCN: s_sleep 12{{$}} +; GCN: s_sleep 13{{$}} +; GCN: s_sleep 14{{$}} +; GCN: s_sleep 15{{$}} +define amdgpu_kernel void @test_s_sleep(i32 %x) #0 { + call void @llvm.amdgcn.s.sleep(i32 0) + call void @llvm.amdgcn.s.sleep(i32 1) + call void @llvm.amdgcn.s.sleep(i32 2) + call void @llvm.amdgcn.s.sleep(i32 3) + call void @llvm.amdgcn.s.sleep(i32 4) + call void @llvm.amdgcn.s.sleep(i32 5) + call void @llvm.amdgcn.s.sleep(i32 6) + call void @llvm.amdgcn.s.sleep(i32 7) + + ; Values that might only work on VI + call void @llvm.amdgcn.s.sleep(i32 8) + call void @llvm.amdgcn.s.sleep(i32 9) + call void @llvm.amdgcn.s.sleep(i32 10) + call void @llvm.amdgcn.s.sleep(i32 11) + call void @llvm.amdgcn.s.sleep(i32 12) + call void @llvm.amdgcn.s.sleep(i32 13) + call void @llvm.amdgcn.s.sleep(i32 14) + call void @llvm.amdgcn.s.sleep(i32 15) + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir index 32839ffafc44..65afcc6b7c70 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir @@ -23,28 +23,20 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; CHECK-LABEL: name: exp_s - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false - ; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[C2]](s1), [[C3]](s1) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = COPY $sgpr0 - %3:_(s32) = COPY $sgpr1 - %4:_(s32) = COPY $sgpr2 - %5:_(s32) = COPY $sgpr3 - %6:_(s1) = G_CONSTANT i1 0 - %7:_(s1) = G_CONSTANT i1 0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7 + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 ... --- name: exp_v @@ -54,22 +46,14 @@ body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: exp_v - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false - ; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[C2]](s1), [[C3]](s1) - %0:_(s32) = G_CONSTANT i32 0 - %1:_(s32) = G_CONSTANT i32 0 - %2:_(s32) = COPY $vgpr0 - %3:_(s32) = COPY $vgpr1 - %4:_(s32) = COPY $vgpr2 - %5:_(s32) = COPY $vgpr3 - %6:_(s1) = G_CONSTANT i1 0 - %7:_(s1) = G_CONSTANT i1 0 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7 + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir new file mode 100644 index 000000000000..54c3a5009ca8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.swizzle.mir @@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_swizzle_s +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ds_swizzle_s + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll new file mode 100644 index 000000000000..5618c1f910a5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s + +; Natural mapping +define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) { + ; CHECK-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Copy needed for VGPR argument +define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 inreg %s) { + ; CHECK-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Waterfall loop needed for rsrc +define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { + ; CHECK-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %19, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Waterfall loop needed for rsrc, copy needed for vaddr +define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) { + ; CHECK-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %20, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0 + +attributes #0 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll new file mode 100644 index 000000000000..0ea100dc2029 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -0,0 +1,268 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s + +; Natural mapping +define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { + ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 + ; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 + ; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 + ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Copy required for VGPR input +define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float inreg %s) { + ; CHECK-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 + ; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11 + ; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12 + ; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13 + ; CHECK: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Waterfall loop for rsrc +define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsrc, <4 x i32> inreg %samp, float %s) { + ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Waterfall loop for sampler +define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> %samp, float %s) { + ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +; Waterfall loop for rsrc and sampler +define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsrc, <4 x i32> %samp, float %s) { + ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec + ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec + ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec + ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec + ; CHECK: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec + ; CHECK: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc + ; CHECK: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec + ; CHECK: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec + ; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: S_ENDPGM 0 + %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* undef + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 + +attributes #0 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll new file mode 100644 index 000000000000..71014f1ab811 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s + +; Natural mapping +define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Copies for VGPR arguments +define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Waterfall for rsrc +define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Waterfall for soffset +define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Waterfall for rsrc and soffset +define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll new file mode 100644 index 000000000000..5991e3c89171 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s + +; Natural mapping +define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Copies for VGPR arguments +define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Waterfall for rsrc +define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Waterfall for soffset +define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +; Waterfall for rsrc and soffset +define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2 + ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: $vgpr0 = COPY [[INT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret float %val +} + +declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll new file mode 100644 index 000000000000..6b8dd4cfd994 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s + +; Natural mapping +define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Copies for VGPR arguments +define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Waterfall for rsrc +define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) { + ; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Waterfall for soffset +define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +; Waterfall for rsrc and soffset +define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) { + ; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec + ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec + ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK: bb.4: + ; CHECK: S_ENDPGM 0 + call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir index 4eb3cae9ef90..4401a3c8bd9b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smulh.mir @@ -1,5 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s + +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: smulh_s32_ss @@ -8,10 +12,16 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: smulh_s32_ss - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + + ; GFX6-LABEL: name: smulh_s32_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX9-LABEL: name: smulh_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SMULH %0, %1 @@ -24,10 +34,15 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: smulh_s32_sv - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + + ; GFX6-LABEL: name: smulh_s32_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9-LABEL: name: smulh_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SMULH %0, %1 @@ -40,11 +55,17 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: smulh_s32_vs - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + + ; GFX6-LABEL: name: smulh_s32_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] + ; GFX9-LABEL: name: smulh_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SMULH %0, %1 @@ -57,10 +78,15 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1 - ; CHECK-LABEL: name: smulh_s32_vv - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + + ; GFX6-LABEL: name: smulh_s32_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; GFX9-LABEL: name: smulh_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SMULH %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir index d801149e7a16..f62cd4cd4695 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umulh.mir @@ -1,5 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# XUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s + +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: umulh_s32_ss @@ -8,10 +12,16 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: umulh_s32_ss - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + + ; GFX6-LABEL: name: umulh_s32_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX9-LABEL: name: umulh_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_UMULH %0, %1 @@ -24,10 +34,15 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: umulh_s32_sv - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + + ; GFX6-LABEL: name: umulh_s32_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-LABEL: name: umulh_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_UMULH %0, %1 @@ -40,11 +55,17 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: umulh_s32_vs - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + + ; GFX6-LABEL: name: umulh_s32_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] + ; GFX9-LABEL: name: umulh_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_UMULH %0, %1 @@ -57,10 +78,15 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1 - ; CHECK-LABEL: name: umulh_s32_vv - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + + ; GFX6-LABEL: name: umulh_s32_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-LABEL: name: umulh_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMULH %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir new file mode 100644 index 000000000000..915f63dbdd47 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s + +# This would assert that a dead def should have no uses, but the dead +# def and use have different subreg indices. + +--- +name: multi_def_dead_reg_subreg_check +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + scratchWaveOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } +body: | + ; CHECK-LABEL: name: multi_def_dead_reg_subreg_check + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr6_sgpr7 + ; CHECK: undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5) + ; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec + ; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1 + ; CHECK: INLINEASM &"", 1, 851978, def dead [[COPY1]], 851978, def dead [[COPY]].sub1, 2147483657, [[COPY1]], 2147549193, [[COPY]].sub1 + ; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0 + ; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3 + ; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec + ; CHECK: %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]] + ; CHECK: %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]] + ; CHECK: [[COPY2:%[0-9]+]]:vreg_512 = COPY %11 + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $sgpr6_sgpr7 + + undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e32 0, %1, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vreg_512 = COPY %0 + + bb.1: + BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5) + %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec + %8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec + %9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec + %10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec + undef %11.sub0:vreg_512 = COPY %4.sub0 + %12:vgpr_32 = COPY %4.sub0 + %11.sub1:vreg_512 = COPY %4.sub1 + INLINEASM &"", 1, 851978, def dead %12, 851978, def dead %4.sub1, 2147483657, %12, 2147549193, %4.sub1 + %11.sub2:vreg_512 = COPY undef %1 + %11.sub3:vreg_512 = COPY %4.sub3 + %11.sub5:vreg_512 = COPY undef %1 + %4:vreg_512 = COPY %11 + S_BRANCH %bb.1 + +... diff --git a/llvm/test/TableGen/immarg.td b/llvm/test/TableGen/immarg.td new file mode 100644 index 000000000000..407f06c3a40e --- /dev/null +++ b/llvm/test/TableGen/immarg.td @@ -0,0 +1,31 @@ +// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/Common -I %p/../../include %s -o - < %s | FileCheck -check-prefix=GISEL %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +let TargetPrefix = "mytarget" in { +def int_mytarget_sleep0 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; +def int_mytarget_sleep1 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; +} + +// GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, +// GISEL-NEXT: // MIs[0] Operand 0 +// GISEL-NEXT: GIM_CheckIntrinsicID, /*MI*/0, /*Op*/0, Intrinsic::mytarget_sleep0, +// GISEL-NEXT: // MIs[0] src +// GISEL-NEXT: GIM_CheckIsImm, /*MI*/0, /*Op*/1, +// GISEL-NEXT: // (intrinsic_void {{[0-9]+}}:{ *:[iPTR] }, (timm:{ *:[i32] }):$src) => (SLEEP0 (timm:{ *:[i32] }):$src) +// GISEL-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SLEEP0, +// GISEL-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src +def SLEEP0 : I<(outs), (ins i32imm:$src), + [(int_mytarget_sleep0 timm:$src)] +>; + +// Test for situation which was crashing in ARM patterns. +def p_imm : Operand; +def SLEEP1 : I<(outs), (ins p_imm:$src), []>; + +// FIXME: This should not crash, but should it work or be an error? +// def : Pat < +// (int_mytarget_sleep1 timm:$src), +// (SLEEP1 imm:$src) +// >; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index fbf0b4bd9ddd..d8d4c9f4f55c 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -1062,6 +1062,7 @@ public: IPM_Opcode, IPM_NumOperands, IPM_ImmPredicate, + IPM_Imm, IPM_AtomicOrderingMMO, IPM_MemoryLLTSize, IPM_MemoryVsLLTSize, @@ -1340,6 +1341,23 @@ public: } }; +class ImmOperandMatcher : public OperandPredicateMatcher { +public: + ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx) + : OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_Imm; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI") + << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op") + << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is a G_CONSTANT with a particular /// int. class ConstantIntOperandMatcher : public OperandPredicateMatcher { @@ -3794,6 +3812,10 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, OM.addPredicate(); return Error::success(); } + if (SrcChild->getOperator()->getName() == "timm") { + OM.addPredicate(); + return Error::success(); + } } } @@ -3943,7 +3965,10 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( // rendered as operands. // FIXME: The target should be able to choose sign-extended when appropriate // (e.g. on Mips). - if (DstChild->getOperator()->getName() == "imm") { + if (DstChild->getOperator()->getName() == "timm") { + DstMIBuilder.addRenderer(DstChild->getName()); + return InsertPt; + } else if (DstChild->getOperator()->getName() == "imm") { DstMIBuilder.addRenderer(DstChild->getName()); return InsertPt; } else if (DstChild->getOperator()->getName() == "fpimm") {